In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
df = pd.read_csv("C:/Users/Admin/Desktop/Customer-Churn-Prediction/data/raw/churn.csv")

df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df = df.dropna()

# Preserve original for BI
df_bi = df.copy()

# ML preparation
X = df.drop(["customerID", "Churn"], axis=1)
y = df["Churn"].map({"Yes": 1, "No": 0})

X = pd.get_dummies(X, drop_first=True)

scaler = StandardScaler()
X[X.columns] = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train final model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict churn probability
df_bi["Churn_Probability"] = model.predict_proba(X)[:, 1]
df_bi["Churn_Prediction"] = (df_bi["Churn_Probability"] > 0.5).astype(int)

df_bi["Churn_Prediction"] = df_bi["Churn_Prediction"].map({1: "Yes", 0: "No"})

df_bi.head()

df_bi.to_csv("C:/Users/Admin/Desktop/Customer-Churn-Prediction/data/processed/churn_powerbi.csv", index=False)

