In [None]:
# Import Liabraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

In [None]:
# Load and preprocess data
df = pd.read_csv(r"Telco_Customer_Churn_Dataset.csv")

In [None]:
# Convert TotalCharges to numeric, coercing errors to NaN
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

In [None]:
# Drop rows with missing values that were created
df = df.dropna()

In [None]:
# --- Encoding Categorical Variables ---
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    if col != 'Churn': # We don't encode the target variable yet
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Encode the target variable separately
churn_le = LabelEncoder()
df['Churn'] = churn_le.fit_transform(df['Churn'])
label_encoders['Churn'] = churn_le 

In [None]:
# --- Prepare Features and Target ---
x = df.drop(columns=['Churn'])
y = df['Churn']

In [None]:
# Save the column order
model_columns = x.columns
with open('model_columns.pkl', 'wb') as file:
    pickle.dump(model_columns, file)

In [None]:
# --- Split Data ---
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
# --- Train Model ---
model = RandomForestClassifier(
    random_state=0,
    n_estimators=150,
    min_samples_split=5,
    min_samples_leaf=1,
    max_features='log2',
    max_depth=None,
    criterion='gini',
    bootstrap=True
)

model.fit(x_train, y_train)

In [None]:
# --- Metrics ---
y_pred = model.predict(x_test)

In [5]:
print('Accuracy :', accuracy_score(y_test, y_pred)*100)

Accuracy : 90.8030803080308


In [6]:
print('Classification Report :\n', classification_report(y_test, y_pred))

Classification Report :
               precision    recall  f1-score   support

           0       0.92      0.96      0.94      3314
           1       0.88      0.76      0.82      1231

    accuracy                           0.91      4545
   macro avg       0.90      0.86      0.88      4545
weighted avg       0.91      0.91      0.91      4545



In [7]:
print('Confusion Matrix :\n', confusion_matrix(y_test, y_pred))

Confusion Matrix :
 [[3192  122]
 [ 296  935]]


In [None]:
# --- Save Model and Encoders ---
with open('churn_model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('label_encoders.pkl', 'wb') as file:
    pickle.dump(label_encoders, file)

print("\n✅ Model, encoders, and column list have been saved successfully!")
print("You can now run the Flask app using 'app.py'.")

Starting the training process...
Data preprocessing and encoding complete.
Training the RandomForest model...
Model training complete.

✅ Model, encoders, and column list have been saved successfully!
You can now run the Flask app using 'app.py'.
