### Application of Logistic Regression as a predictive model 

#### Logistic Regression without hyperparameters tuning

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Data Preprocessing
data = pd.read_csv('Customer_Churn.csv')

# Handling missing values
data.fillna(0, inplace=True)

# Convert categorical variables into numerical representations
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Feature Engineering
X_numerical = data[['MonthlyCharges', 'TotalCharges']] # Concatenate numerical features (MonthlyCharges and TotalCharges) with existing features
X_categorical = data.drop(['Churn', 'MonthlyCharges', 'TotalCharges'], axis=1)
X = pd.concat([X_categorical, X_numerical], axis=1)

# Split the Data
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection and Training
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Model Evaluation
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.8147622427253371
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1036
           1       0.68      0.56      0.61       373

    accuracy                           0.81      1409
   macro avg       0.77      0.73      0.75      1409
weighted avg       0.81      0.81      0.81      1409



#### Logistic Regression with hyperparameters tuning

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Data Preprocessing
data = pd.read_csv('Customer_Churn.csv')

# Handling missing values
data.fillna(0, inplace=True)

# Convert categorical variables into numerical representations
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        data[column] = label_encoders[column].fit_transform(data[column])

# Feature Engineering
# Concatenate numerical features (MonthlyCharges and TotalCharges) with existing features
X_numerical = data[['MonthlyCharges', 'TotalCharges']]
X_categorical = data.drop(['Churn', 'MonthlyCharges', 'TotalCharges'], axis=1)
X = pd.concat([X_categorical, X_numerical], axis=1)

# Split the Data
y = data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection and Hyperparameter Tuning
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'saga']
}

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

logistic_regression = LogisticRegression(random_state=42)
grid_search = GridSearchCV(estimator=logistic_regression, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

# Best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Model Evaluation with Best Hyperparameters
best_logistic_regression = grid_search.best_estimator_
y_pred = best_logistic_regression.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Best Hyperparameters: {'C': 1, 'penalty': 'l2', 'solver': 'saga'}
Accuracy: 0.8147622427253371
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1036
           1       0.68      0.56      0.61       373

    accuracy                           0.81      1409
   macro avg       0.77      0.73      0.75      1409
weighted avg       0.81      0.81      0.81      1409

