In [9]:
#Load & Explore Data Set
import pandas as pd
import os
import joblib

# Define path to backend folder
backend_folder = "Customer_Churn_Prediction/backend"
os.makedirs(backend_folder, exist_ok=True)

df=pd.read_csv("../data/Customer_Churn.csv")

df.head()
print(df.head())
print(df.info())

  CustomerID  Age  Gender          State ServiceType  MonthlyCharges  \
0   CUST1000   56    Male      Karnataka    Postpaid          723.34   
1   CUST1001   46  Female    West Bengal    Postpaid         1204.32   
2   CUST1002   32  Female      Karnataka    Postpaid          995.45   
3   CUST1003   60  Female      Karnataka    Postpaid         1047.00   
4   CUST1004   25    Male  Uttar Pradesh     Prepaid          284.37   

   TenureMonths  InternetUsageGB  CallDropsPerMonth  ComplaintsLast6Months  \
0            59            57.97                  9                      6   
1             3            36.24                 10                      4   
2            15            82.44                 16                      1   
3            29            82.16                  6                      5   
4            53            48.00                 11                      5   

  IsActive PaymentMethod Churn  
0      Yes          Cash    No  
1      Yes   Net Banking   Yes  

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib


# Encode categorical features
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    if column != 'CustomerID':
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

# Features & Target
X = df.drop(['CustomerID', 'Churn'], axis=1)
y = df['Churn']

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Define correct backend path (relative to notebook)
backend_dir = "../backend"
os.makedirs(backend_dir, exist_ok=True)

# Save model
joblib.dump(model, os.path.join(backend_dir, "customer_churn_model.pkl"))

# Save label encoders
joblib.dump(label_encoders, os.path.join(backend_dir, "label_encoders.pkl"))

# Optional: Save column names for frontend dynamic form generation
with open(os.path.join(backend_dir, "input_columns.json"), "w") as f:
    json.dump({"columns": list(X.columns)}, f)

print("Model and encoders saved to existing backend folder ✅")

Accuracy: 0.81
              precision    recall  f1-score   support

           0       0.82      0.98      0.90       165
           1       0.00      0.00      0.00        35

    accuracy                           0.81       200
   macro avg       0.41      0.49      0.45       200
weighted avg       0.68      0.81      0.74       200

Model and encoders saved to existing backend folder ✅
