In [5]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import joblib

# 2. Load Dataset
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

# 3. Clean Data
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

# 4. Encode Target Variable
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# 5. Drop 'customerID'
df.drop('customerID', axis=1, inplace=True)

# 6. Convert Categorical to Dummies
df_encoded = pd.get_dummies(df)

# 7. Split Features and Target
X = df_encoded.drop('Churn', axis=1)
y = df_encoded['Churn']

# 8. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 9. Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 10. Model Training
model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)

# 11. Save Model and Scaler
joblib.dump(model, 'churn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(X.columns.tolist(), 'feature_columns.pkl')  # Save feature order


['feature_columns.pkl']