In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.combine import SMOTEENN
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [4]:
df = pd.read_csv(r"C:\Users\imam9\Project_ChurnPrediction\Dataset\tel_churn.csv")
X = df.drop("Churn", axis=1)
y = df["Churn"]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, )

In [6]:
smote = SMOTEENN(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression

In [10]:
# ---------------- Logistic Regression ----------------
model_lg = LogisticRegression(max_iter=1000)
model_lg.fit(X_train_scaled, y_train)
y_pred_lg = model_lg.predict(X_test_scaled)

In [11]:
print("Accuracy",accuracy_score(y_test,y_pred_lg))
print("\nLogistic Regression Report:")
print(classification_report(y_test, y_pred_lg))


Accuracy 0.9566694987255735

Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.95       546
           1       0.96      0.96      0.96       631

    accuracy                           0.96      1177
   macro avg       0.96      0.96      0.96      1177
weighted avg       0.96      0.96      0.96      1177



# Decision Tree

In [22]:
model_dt = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=6, min_samples_leaf=8)
model_dt.fit(X_train_scaled, y_train)
y_pred_dt = model_dt.predict(X_test_scaled)

In [23]:
print("Accuracy",accuracy_score(y_test,y_pred_dt))
print("\nDecision Tree Report:")
print(classification_report(y_test, y_pred_dt))


Accuracy 0.9311809685641461

Decision Tree Report:
              precision    recall  f1-score   support

           0       0.94      0.91      0.92       546
           1       0.92      0.95      0.94       631

    accuracy                           0.93      1177
   macro avg       0.93      0.93      0.93      1177
weighted avg       0.93      0.93      0.93      1177



# Random forest

In [24]:
# ---------------- Random Forest ----------------
model_rf = RandomForestClassifier(criterion="gini", random_state=100, max_depth=6, min_samples_leaf=8)
model_rf.fit(X_train_scaled, y_train)
y_pred_rf = model_rf.predict(X_test_scaled)

In [25]:
print("Accuracy",accuracy_score(y_test,y_pred_rf))
print("\nRandom Forest Report:")
print(classification_report(y_test, y_pred_rf))


Accuracy 0.9473237043330501

Random Forest Report:
              precision    recall  f1-score   support

           0       0.96      0.93      0.94       546
           1       0.94      0.97      0.95       631

    accuracy                           0.95      1177
   macro avg       0.95      0.95      0.95      1177
weighted avg       0.95      0.95      0.95      1177



# Pickling

In [16]:
models = {
    "scaler": scaler,  # store your fitted scaler / preprocessor
    "logistic": model_lg,
    "decision_tree": model_dt,
    "random_forest": model_rf
}

In [17]:
with open("models_pipeline.pkl", "wb") as f:
    pickle.dump(models, f)

In [18]:
with open("models_pipeline.pkl", "rb") as f:
    loaded_objects = pickle.load(f)

In [19]:
logistic_loaded = loaded_objects["logistic"]
tree_loaded = loaded_objects["decision_tree"]
rf_loaded = loaded_objects["random_forest"]
scaler_loaded = loaded_objects["scaler"]

In [28]:
rf_score = rf_loaded.score(X_test, y_test)
print("Random Forest Accuracy:", rf_score)

Random Forest Accuracy: 0.9473237043330501
