In [6]:
import pandas as pd
import joblib

# Reload dataset
df = pd.read_excel("Telco_customer_churn.xlsx")

# Same preprocessing
df["Total Charges"] = pd.to_numeric(df["Total Charges"], errors="coerce")
df["Total Charges"] = df["Total Charges"].fillna(df["Total Charges"].median())

drop_cols = ["CustomerID","Count","Lat Long","Latitude","Longitude","Churn Reason"]
df_model = df.drop(columns=[c for c in drop_cols if c in df.columns])

X = df_model.drop(columns=["Churn Label"])

# Load model
model = joblib.load("logistic_regression_churn_model.joblib")

# Pick 5 random customers
sample = df.sample(5, random_state=42)  # keep CustomerID
test_data = sample.drop(columns=[c for c in drop_cols if c in sample.columns and c != "CustomerID"])
if "Churn Label" in test_data.columns:
    test_data = test_data.drop(columns=["Churn Label"])

# Predictions
sample["Predicted_Churn"] = model.predict(test_data)
sample["Churn_Probability"] = model.predict_proba(test_data)[:, 1]

# Sort by risk (highest probability first)
sample_sorted = sample[["CustomerID", "Predicted_Churn", "Churn_Probability"]].sort_values(
    by="Churn_Probability", ascending=False
)

print(sample_sorted)


      CustomerID  Predicted_Churn  Churn_Probability
185   2189-WWOEW                1           0.997929
132   9412-GHEEC                1           0.995180
1807  5868-YWPDW                1           0.995029
2715  2446-ZKVAF                0           0.008677
3825  4986-MXSFP                0           0.002512


In [10]:
import pandas as pd
import joblib

# Reload dataset
df = pd.read_excel("Telco_customer_churn.xlsx")

# Preprocessing
df["Total Charges"] = pd.to_numeric(df["Total Charges"], errors="coerce")
df["Total Charges"] = df["Total Charges"].fillna(df["Total Charges"].median())

drop_cols = ["CustomerID","Count","Lat Long","Latitude","Longitude","Churn Reason"]
df_model = df.drop(columns=[c for c in drop_cols if c in df.columns])

X = df_model.drop(columns=["Churn Label"])

# Load model
model = joblib.load("logistic_regression_churn_model.joblib")

# Pick 5 random customers (keep ID safe)
sample = df.sample(100, random_state=42)
test_data = sample.drop(columns=[c for c in drop_cols if c in sample.columns and c != "CustomerID"])
if "Churn Label" in test_data.columns:
    test_data = test_data.drop(columns=["Churn Label"])

# Predictions
sample["Predicted_Churn"] = model.predict(test_data)
sample["Churn_Probability"] = model.predict_proba(test_data)[:, 1]

# Select and sort (without Name, since it doesn’t exist)
sample_sorted = sample[["CustomerID", "Predicted_Churn", "Churn_Probability"]].sort_values(
    by="Churn_Probability", ascending=False
)

print(sample_sorted)


      CustomerID  Predicted_Churn  Churn_Probability
233   6513-EECDB                1           0.998622
1345  8020-BWHYL                1           0.998142
185   2189-WWOEW                1           0.997929
1188  4910-AQFFX                1           0.997924
1090  9821-POOTN                1           0.997578
...          ...              ...                ...
2622  7649-SIJJF                0           0.000909
6006  5286-YHCVC                0           0.000726
5194  5329-KRDTM                0           0.000667
6185  5093-FEGLU                0           0.000518
6685  4891-NLUBA                0           0.000395

[100 rows x 3 columns]
