### Logistic Regression :
is a supervised machine learning algorithm used for classification tasks. Unlike linear regression, which predicts continuous values, logistic regression predicts probabilities and maps these probabilities to discrete classes using the sigmoid function.

### Key Characteristics:


## Example 1

In [6]:
 # import librarys
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [8]:
# Sample data
data = {
    'MonthlyCharges': [70, 100, 60, 90, 50, 85, 65, 75, 95, 55],
    'Tenure': [12, 24, 6, 18, 5, 20, 8, 10, 22, 4],
    'Churn': [1, 0, 1, 0, 1, 0, 1, 0, 0, 1]  # 1 = churn, 0 = not churn
}
df=pd.DataFrame(data)
df

Unnamed: 0,MonthlyCharges,Tenure,Churn
0,70,12,1
1,100,24,0
2,60,6,1
3,90,18,0
4,50,5,1
5,85,20,0
6,65,8,1
7,75,10,0
8,95,22,0
9,55,4,1


In [12]:
# features(X) and target(y)
X=df.drop(['Churn'],axis=1)
y=df['Churn']


In [26]:
#  split data into train and test sets
X_train,X_test,y_trian,y_test=train_test_split(X,y,test_size=0.3,random_state=42)


In [28]:
# train logistic regresion model
model=LogisticRegression()
model.fit(X_train,y_trian)
# Display coefficients
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

Coefficients: [[-0.69733286  0.21570868]]
Intercept: [48.23056326]


In [30]:
# make prediction on test data
y_pred=model.predict(X_test)
# probablity prediction
y_prob=model.predict_proba(X_test)[:,1]

print("Predicted classes:", y_pred)
print("Predicted probabilities:", y_prob)

Predicted classes: [0 0 0]
Predicted probabilities: [1.72468029e-06 8.12516955e-08 1.19482986e-03]


In [32]:
# evaluate the model

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)


Accuracy: 1.0
Confusion Matrix:
 [[3]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3





## Example 2

In [36]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.pipeline import make_pipeline


In [44]:
df=pd.read_csv('Telco-Customer-Churn.csv')
df.head()


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### data preprocessing

In [49]:
# Checking for missing values
print(df.isnull().sum())

# Filling missing values (e.g., replacing with mode for categorical columns)
df['TotalCharges'] = df['TotalCharges'].replace(" ", np.nan)  # Convert blanks to NaN
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')  # Convert to numeric
df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace=True)

# Label encoding categorical columns
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['gender'])
df['Partner'] = label_encoder.fit_transform(df['Partner'])
df['Dependents'] = label_encoder.fit_transform(df['Dependents'])
df['PhoneService'] = label_encoder.fit_transform(df['PhoneService'])
df['MultipleLines'] = label_encoder.fit_transform(df['MultipleLines'])
df['InternetService'] = label_encoder.fit_transform(df['InternetService'])
df['Contract'] = label_encoder.fit_transform(df['Contract'])
df['PaymentMethod'] = label_encoder.fit_transform(df['PaymentMethod'])
df['Churn'] = label_encoder.fit_transform(df['Churn'])  # Target variable

# Select features and target variable
X = df.drop('Churn', axis=1)  # Drop the target column
y = df['Churn']  # Churn column as target variable


customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace=True)


### Train-test split

In [52]:
# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shape of train-test sets
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


Training data shape: (5634, 21)
Testing data shape: (1409, 21)


### Model Training with Logistic Regression (Including Hyperparameter Tuning)

In [None]:
# Create a Logistic Regression model with hyperparameter tuning using GridSearchCV
model = LogisticRegression(max_iter=10000)

# Define the parameter grid for tuning
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'solver': ['liblinear', 'saga']  # Solvers for optimization
}

# Using GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best model after tuning
best_model = grid_search.best_estimator_

# Display the best hyperparameters found
print("Best Hyperparameters:", grid_search.best_params_)


### prediction evalution

In [None]:
# Predict the churn values for test data
y_pred = best_model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)


### Feature Importance

In [None]:
# Get the feature importance (coefficients)
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': best_model.coef_[0]
})

# Sort the features by importance
feature_importance = feature_importance.sort_values(by='Coefficient', ascending=False)
print("Feature Importance:\n", feature_importance)
