In [7]:
# notebooks/02_modeling.ipynb
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))
from src.preprocessing import preprocess_data

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load and preprocess
df = pd.read_csv(r'C:\Users\Akhil\Desktop\telecom-customer-churn\data\WA_Fn-UseC_-Telco-Customer-Churn.csv')
df_processed = preprocess_data(df)

X = df_processed.drop('Churn', axis=1)
y = df_processed['Churn']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)


Accuracy: 0.7920511000709723
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.91      0.87      1036
           1       0.65      0.46      0.54       373

    accuracy                           0.79      1409
   macro avg       0.74      0.68      0.70      1409
weighted avg       0.78      0.79      0.78      1409

Confusion Matrix:
 [[946  90]
 [203 170]]


In [8]:
import pickle

# Save model
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)
