In [77]:
import numpy as np
import pandas as pd
import seaborn as sns

In [78]:
df=pd.read_csv('cleaned_data.csv')

In [79]:
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,1,0,0,1,23,0,0,0,0,0,0,0,0,0,0,1,0,49.85,1146.55,0
1,0,0,1,0,43,0,0,0,1,0,1,0,1,0,0,0,0,100.7,4330.1,1
2,1,1,0,0,51,1,0,0,0,1,1,1,0,0,0,0,1,97.33,4963.83,1
3,1,1,0,0,72,1,1,0,1,0,1,0,0,0,0,0,0,101.38,7299.36,0
4,1,1,0,0,25,1,1,0,0,0,0,1,0,1,0,0,1,52.22,1305.5,1


Separate the Data

In [80]:
X=df.drop('Churn',axis=1)
y=df['Churn']

Model Preparation

In [81]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import  StandardScaler

In [82]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [83]:
scaler=StandardScaler()

In [84]:
X_train_scaled=scaler.fit_transform(X_train)

In [85]:
X_test_scaled=scaler.fit_transform(X_test)

In [86]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [87]:
clf = DecisionTreeClassifier(max_depth=14, criterion='gini', class_weight='balanced', random_state=42)


In [88]:
clf.fit(X_train_scaled,y_train)

In [89]:
y_pred=clf.predict(X_test_scaled)

In [90]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.514172335600907
Confusion Matrix:
 [[247 627]
 [230 660]]
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.28      0.37       874
           1       0.51      0.74      0.61       890

    accuracy                           0.51      1764
   macro avg       0.52      0.51      0.49      1764
weighted avg       0.52      0.51      0.49      1764



In [91]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'max_depth': range(2, 20),
    'criterion': ['gini', 'entropy']
}

grid = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best Depth:", grid.best_params_['max_depth'])
print("Best Score:", grid.best_score_)
print("Best Criterion",grid.best_params_['criterion'])

Best Depth: 14
Best Score: 0.5094725076384056
Best Criterion gini


In [92]:
# Check class distribution
df['Churn'].value_counts(normalize=True)


Churn
0    0.507143
1    0.492857
Name: proportion, dtype: float64

Logictics Regression

In [93]:
from sklearn.linear_model import LogisticRegression

In [94]:
logreg = LogisticRegression(max_iter=1000, random_state=42)

In [95]:
logreg.fit(X_train_scaled,y_train)

In [96]:
lr_pred=logreg.predict(X_test_scaled)

In [97]:
print("Accuracy:", accuracy_score(y_test, lr_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, lr_pred))
print("Classification Report:\n", classification_report(y_test, lr_pred))

Accuracy: 0.49263038548752835
Confusion Matrix:
 [[549 325]
 [570 320]]
Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.63      0.55       874
           1       0.50      0.36      0.42       890

    accuracy                           0.49      1764
   macro avg       0.49      0.49      0.48      1764
weighted avg       0.49      0.49      0.48      1764



In [98]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
grid = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)
grid.fit(X_train_scaled, y_train)

print("Best C:", grid.best_params_['C'])


Best C: 0.001


In [99]:
predictions_grid=grid.predict(X_test_scaled)

In [100]:
print("Accuracy:", accuracy_score(y_test, predictions_grid))
print("Confusion Matrix:\n", confusion_matrix(y_test, predictions_grid))
print("Classification Report:\n", classification_report(y_test, predictions_grid))

Accuracy: 0.4988662131519274
Confusion Matrix:
 [[685 189]
 [695 195]]
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.78      0.61       874
           1       0.51      0.22      0.31       890

    accuracy                           0.50      1764
   macro avg       0.50      0.50      0.46      1764
weighted avg       0.50      0.50      0.46      1764

