In [4]:


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt

# 1. Load Data
df = pd.read_csv("C:\\Users\\KShah1\\Downloads\\Customer Churn\\WA_Fn-UseC_-Telco-Customer-Churn.csv")

# 2. Drop unused column
df.drop("customerID", axis=1, inplace=True)

# 3. Convert TotalCharges to numeric
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

# 4. Drop missing rows
df.dropna(inplace=True)

# 5. Encode Target Variable
df["Churn"] = df["Churn"].map({"No": 0, "Yes": 1})

# 6. One-hot encode categorical variables
categorical_cols = df.select_dtypes(include="object").columns.tolist()

df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# 7. Split Features and Target
X = df.drop("Churn", axis=1)
y = df["Churn"]

# 8. Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 9. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 10. Train Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# 11. Predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

# 12. Evaluation
print("Confusion Matrix:", cm)
cm = confusion_matrix(y_test, y_pred)



print("\nROC AUC Score:", round(roc_auc_score(y_test, y_prob), 3))


Confusion Matrix: [[915 118]
 [181 193]]

ROC AUC Score: 0.832
