In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import pickle


In [None]:
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')  
df.head()


In [None]:
df.info()
df.describe()
df['Churn'].value_counts().plot(kind='bar', title='Churn Distribution')
sns.heatmap(df.corr(numeric_only=True), annot=True)


In [None]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

df.drop('customerID', axis=1, inplace=True)

encoders = {}
for column in df.select_dtypes(include='object').columns:
    if column != 'Churn':
        enc = LabelEncoder()
        df[column] = enc.fit_transform(df[column])
        encoders[df.columns.get_loc(column)] = enc  # Store encoder by index


In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


In [None]:
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
# Save model
import pickle

with open('customer_churn_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save encoders (e.g., if you're using LabelEncoders or OneHotEncoders)
with open('encoders.pkl', 'wb') as f:
    pickle.dump(encoders, f)



In [None]:
importances = pd.Series(model.feature_importances_, index=X.columns)
importances.sort_values().plot(kind='barh', title='Feature Importances')
