In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

Dataset Loading

In [2]:
train_df = pd.read_csv(r"D:\Final_tasks\classification\churn-bigml-80.csv")
test_df = pd.read_csv(r"D:\Final_tasks\classification\churn-bigml-20.csv")

Converts text columns to numbers

In [None]:
le = LabelEncoder()

cat_cols = train_df.select_dtypes(include=["object"]).columns
for col in cat_cols:
    train_df[col] = le.fit_transform(train_df[col])
    test_df[col] = le.transform(test_df[col])


X_train = train_df.drop('Churn', axis=1)
y_train = train_df['Churn']

X_test = test_df.drop('Churn', axis=1)
y_test = test_df['Churn']

scales to same unit 

In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Logistic regression with gridsearch

In [9]:
log_params = {
    "C": [0.1, 1, 10],
    "solver": ["lbfgs"]
}

log_grid = GridSearchCV(LogisticRegression(max_iter=1000), log_params, cv=5)
log_grid.fit(X_train, y_train)

log_pred = log_grid.predict(X_test)

print("\n ***Logistic Regression*** ")
print("Best Params:", log_grid.best_params_)
print(classification_report(y_test, log_pred))



 ***Logistic Regression*** 
Best Params: {'C': 0.1, 'solver': 'lbfgs'}
              precision    recall  f1-score   support

       False       0.88      0.97      0.92       572
        True       0.49      0.18      0.26        95

    accuracy                           0.86       667
   macro avg       0.68      0.57      0.59       667
weighted avg       0.82      0.86      0.83       667



Decision Tree tuning with gridsearch

In [None]:
tree_params = {
    "criterion": ["gini", "entropy"],
    "max_depth": [3, 5, 10, None],
    "min_samples_split": [2, 5]
}

tree_grid = GridSearchCV(DecisionTreeClassifier(), tree_params, cv=5)
tree_grid.fit(X_train, y_train)

tree_pred = tree_grid.predict(X_test)

print("\n ***Decision Tree***")
print("Best Params:", tree_grid.best_params_)
print(classification_report(y_test, tree_pred))



 ***Decision Tree ***
Best Params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 5}
              precision    recall  f1-score   support

       False       0.95      0.98      0.97       572
        True       0.84      0.72      0.77        95

    accuracy                           0.94       667
   macro avg       0.90      0.85      0.87       667
weighted avg       0.94      0.94      0.94       667



Random Forest tuning with gridsearch

In [11]:
rf_params = {
    "n_estimators": [50, 100, 200],
    "max_depth": [5, 10, None],
    "min_samples_split": [2, 5]
}

rf_grid = GridSearchCV(RandomForestClassifier(), rf_params, cv=5)
rf_grid.fit(X_train, y_train)

rf_pred = rf_grid.predict(X_test)

print("\n ***Random Forest*** ")
print("Best Params:", rf_grid.best_params_)
print(classification_report(y_test, rf_pred))


 ***Random Forest*** 
Best Params: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 200}
              precision    recall  f1-score   support

       False       0.95      0.99      0.97       572
        True       0.96      0.71      0.81        95

    accuracy                           0.95       667
   macro avg       0.96      0.85      0.89       667
weighted avg       0.95      0.95      0.95       667

