In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df_encoded_balanced_fixed = pd.read_csv("df_encoded_balanced_fixed.csv")

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

columns_to_drop = ["customer_id", "churn_reason"]  
df_encoded_clean = df_encoded_balanced_fixed.drop(columns=columns_to_drop, errors="ignore")

df_encoded_clean["churn_probability"] = df_encoded_clean["churn_probability"].map({"Low": 0, "Medium": 1, "High": 1})

df_encoded_strategy = df_encoded_clean.copy()

X_churn = df_encoded_clean.drop(columns=["churn_probability", "recommended_retention_strategy"])
y_churn = df_encoded_clean["churn_probability"]

X_train_churn, X_test_churn, y_train_churn, y_test_churn = train_test_split(
    X_churn, y_churn, test_size=0.2, random_state=42, stratify=y_churn
)

xgb_model = XGBClassifier(
    n_estimators=500,        
    learning_rate=0.01,      
    max_depth=8,             
    subsample=0.8,           
    colsample_bytree=0.8,    
    gamma=0.1,               
    min_child_weight=3,      
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss"
)
xgb_model.fit(X_train_churn, y_train_churn)

y_pred_xgb = xgb_model.predict(X_test_churn)

accuracy_xgb = accuracy_score(y_test_churn, y_pred_xgb)
classification_rep_xgb = classification_report(y_test_churn, y_pred_xgb)

print(f"Accuracy: {accuracy_xgb:.4f}")
print("Classification Report:")
print(classification_rep_xgb)

Parameters: { "use_label_encoder" } are not used.



Accuracy: 0.8019
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.83      0.82       578
           1       0.79      0.77      0.78       482

    accuracy                           0.80      1060
   macro avg       0.80      0.80      0.80      1060
weighted avg       0.80      0.80      0.80      1060



In [None]:
import random
import pandas as pd

random_client = pd.DataFrame([{
    "tariff_plan": random.choice(["Fiber Only (Basic)", "Mobile - Unlimited", "Fiber + TV + Mobile (Premium)", "Fiber + TV (Basic)"]),
    "contract_type": random.choice(["Monthly", "Annual"]),
    "customer_type": random.choice(["Single", "Family", "Couple"]),
    "income_level": random.choice(["Low", "Medium", "High"]),
    "contract_satisfaction": random.choice(["Low", "Medium", "High"]),
    "monthly_price": round(random.uniform(20, 120), 2),
    "years_in_company": random.randint(0, 10),
    "monthly_usage_hours": random.randint(10, 500),
    "num_family_lines": random.randint(0, 5),
    "age": random.randint(18, 80)
}])

random_client_encoded = pd.get_dummies(random_client, dtype=int)

random_client_encoded = random_client_encoded.reindex(columns=X_train_churn.columns, fill_value=0)

predicted_proba = xgb_model.predict_proba(random_client_encoded)[0][1]

def classify_churn(probability):
    if probability <= 0.33:
        return "Low"
    elif 0.34 <= probability <= 0.65:
        return "Medium"
    else:
        return "High"

churn_label = classify_churn(predicted_proba)

random_client_info = random_client.to_dict(orient="records")[0]
print("\n🎯 **Generated Client:**")
for key, value in random_client_info.items():
    print(f"   - {key}: {value}")

print("\n📊 **Churn Prediction:**")
print(f"   - Final Classification: {churn_label}")


🎯 **Generated Client:**
   - tariff_plan: Fiber Only (Basic)
   - contract_type: Monthly
   - customer_type: Couple
   - income_level: High
   - contract_satisfaction: Low
   - monthly_price: 42.67
   - years_in_company: 6
   - monthly_usage_hours: 184
   - num_family_lines: 5
   - age: 31

📊 **Churn Prediction:**
   - Final Classification: High


In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

file_path = "df_encoded_balanced_fixed.csv"
df_encoded = pd.read_csv(file_path)

columns_to_drop = ["customer_id", "churn_reason"]
df_encoded = df_encoded.drop(columns=columns_to_drop, errors="ignore")

df_encoded["churn_probability"] = pd.to_numeric(df_encoded["churn_probability"], errors="coerce")

X_churn = df_encoded.drop(columns=["churn_probability", "recommended_retention_strategy"])
df_encoded["churn_proba"] = xgb_model.predict_proba(X_churn)[:, 1] 

df_high_churn = df_encoded[df_encoded["churn_proba"] > 0.66].copy() 

strategies = [
    "10% Discount",
    "Contract Freeze",
    "Dedicated Customer Support",
    "Extra Data Package",
    "Loyalty Bonus",
    "Upgrade to Better Plan",
    "iPhone Discount or Free Device"
]

strategy_models = {}

for strategy in strategies:
    print(f"📌 Training model for strategy: {strategy}...")

    df_high_churn["strategy_applied"] = (df_high_churn["recommended_retention_strategy"] == strategy).astype(int)

    X_strategy = df_high_churn.drop(columns=["recommended_retention_strategy", "strategy_applied", "churn_proba"])
    y_strategy = df_high_churn["strategy_applied"]

    if len(y_strategy.unique()) < 2:
        print(f"⚠️ Skipping {strategy}: Not enough data for both classes.")
        continue

    X_train_strat, X_test_strat, y_train_strat, y_test_strat = train_test_split(
        X_strategy, y_strategy, test_size=0.2, random_state=42, stratify=y_strategy
    )

    model = XGBClassifier(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=6,
        objective="binary:logistic",
        eval_metric="logloss",
        random_state=42
    )
    model.fit(X_train_strat, y_train_strat)

    accuracy = accuracy_score(y_test_strat, model.predict(X_test_strat))
    print(f"✅ Accuracy for {strategy}: {accuracy:.4f}")

    strategy_models[strategy] = model

print("\n✅ All strategy models trained successfully!")

📌 Training model for strategy: 10% Discount...
✅ Accuracy for 10% Discount: 0.9268
📌 Training model for strategy: Contract Freeze...
✅ Accuracy for Contract Freeze: 0.9024
📌 Training model for strategy: Dedicated Customer Support...
✅ Accuracy for Dedicated Customer Support: 0.9329
📌 Training model for strategy: Extra Data Package...
✅ Accuracy for Extra Data Package: 0.9329
📌 Training model for strategy: Loyalty Bonus...
✅ Accuracy for Loyalty Bonus: 0.9268
📌 Training model for strategy: Upgrade to Better Plan...
✅ Accuracy for Upgrade to Better Plan: 0.9329
📌 Training model for strategy: iPhone Discount or Free Device...
✅ Accuracy for iPhone Discount or Free Device: 0.9146

✅ All strategy models trained successfully!


In [None]:
import random
import pandas as pd

random_client = pd.DataFrame([{
    "tariff_plan": random.choice(["Fiber Only (Basic)", "Mobile - Unlimited", "Fiber + TV + Mobile (Premium)", "Fiber + TV (Basic)"]),
    "contract_type": random.choice(["Monthly", "Annual"]),
    "customer_type": random.choice(["Single", "Family", "Couple"]),
    "income_level": random.choice(["Low", "Medium", "High"]),
    "contract_satisfaction": random.choice(["Low", "Medium", "High"]),
    "monthly_price": round(random.uniform(20, 120), 2),
    "years_in_company": random.randint(0, 10),
    "monthly_usage_hours": random.randint(10, 500),
    "num_family_lines": random.randint(0, 5),
    "age": random.randint(18, 80)
}])

random_client_encoded = pd.get_dummies(random_client, dtype=int)

random_client_encoded = random_client_encoded.reindex(columns=X_train_churn.columns, fill_value=0)

predicted_proba = xgb_model.predict_proba(random_client_encoded)[0][1]

def classify_churn(probability):
    if probability <= 0.33:
        return "Low"
    elif 0.34 <= probability <= 0.65:
        return "Medium"
    else:
        return "High"

churn_label = classify_churn(predicted_proba)

random_client_info = random_client.to_dict(orient="records")[0]
print("\n🎯 **Generated Client:**")
for key, value in random_client_info.items():
    print(f"   - {key}: {value}")

print("\n📊 **Churn Prediction:**")
print(f"   - Final Classification: {churn_label}")

if churn_label == "High":
    print("\n🚨 **High churn risk detected! Applying retention strategy...**")

    random_client_encoded = random_client_encoded.reindex(columns=X_strategy.columns, fill_value=0)

    def recommend_best_strategy(client_data, strategy_models):
        """
        Predicts the best retention strategy for a high-churn client by testing all strategies.
        """
        if not strategy_models:
            print("⚠️ No strategy models available.")
            return None, 0.0, {}

        best_strategy = None
        best_success_probability = 0.0
        strategy_probabilities = {}

        for strategy, model in strategy_models.items():
            success_proba = model.predict_proba(client_data)[0][1]  
            strategy_probabilities[strategy] = success_proba

            if success_proba > best_success_probability:
                best_success_probability = success_proba
                best_strategy = strategy

        return best_strategy, best_success_probability, strategy_probabilities

    best_strategy, best_success_probability, all_probabilities = recommend_best_strategy(random_client_encoded, strategy_models)

    if best_strategy:
        print("📉 **Success probabilities per strategy:**")
        for strategy, prob in all_probabilities.items():
            print(f"   - {strategy}: {round(prob, 2)}")

        print(f"\n🎯 **Best recommended strategy:** {best_strategy} (Success Probability: {round(best_success_probability, 2)})")
    else:
        print("⚠️ No valid strategy was found.")



🎯 **Generated Client:**
   - tariff_plan: Fiber + TV + Mobile (Premium)
   - contract_type: Annual
   - customer_type: Couple
   - income_level: Medium
   - contract_satisfaction: Low
   - monthly_price: 32.72
   - years_in_company: 2
   - monthly_usage_hours: 141
   - num_family_lines: 5
   - age: 68

📊 **Churn Prediction:**
   - Final Classification: High

🚨 **High churn risk detected! Applying retention strategy...**
📉 **Success probabilities per strategy:**
   - 10% Discount: 0.019999999552965164
   - Contract Freeze: 0.019999999552965164
   - Dedicated Customer Support: 0.0
   - Extra Data Package: 0.0
   - Loyalty Bonus: 0.019999999552965164
   - Upgrade to Better Plan: 0.10000000149011612
   - iPhone Discount or Free Device: 0.03999999910593033

🎯 **Best recommended strategy:** Upgrade to Better Plan (Success Probability: 0.10000000149011612)


In [None]:
import pandas as pd

def input_client_data():
    """
    Allows user to input customer details manually and converts it into a DataFrame.
    """
    client = pd.DataFrame([{
        "tariff_plan": input("Enter Tariff Plan (e.g., Fiber + TV, Mobile - Unlimited): "),
        "contract_type": input("Enter Contract Type (Monthly/Annual): "),
        "customer_type": input("Enter Customer Type (Single/Family/Couple): "),
        "income_level": input("Enter Income Level (Low/Medium/High): "),
        "contract_satisfaction": input("Enter Contract Satisfaction (Low/Medium/High): "),
        "monthly_price": float(input("Enter Monthly Price ($): ")),
        "years_in_company": int(input("Enter Years in Company: ")),
        "monthly_usage_hours": int(input("Enter Monthly Usage Hours: ")),
        "num_family_lines": int(input("Enter Number of Family Lines: ")),
        "age": int(input("Enter Age: "))
    }])

    return client

client_df = input_client_data()

client_encoded = pd.get_dummies(client_df, dtype=int)

client_encoded = client_encoded.reindex(columns=X_train_churn.columns, fill_value=0)

predicted_proba = xgb_model.predict_proba(client_encoded)[0][1] 

def classify_churn(probability):
    if probability <= 0.33:
        return "Low"
    elif 0.34 <= probability <= 0.65:
        return "Medium"
    else:
        return "High"

churn_label = classify_churn(predicted_proba)

print("\n🎯 **Client Data:**")
for key, value in client_df.to_dict(orient="records")[0].items():
    print(f"   - {key}: {value}")

print("\n📊 **Churn Prediction:**")
print(f"   - Final Classification: {churn_label}")

if churn_label == "High":
    print("\n🚨 **High churn risk detected! Applying retention strategy...**")

    client_encoded = client_encoded.reindex(columns=X_train_strategy.columns, fill_value=0)

    def recommend_best_strategy(client_data, strategy_models):
        """
        Predicts the best retention strategy for a high-churn client by testing all strategies.
        """
        if not strategy_models:
            print("⚠️ No strategy models available.")
            return None, 0.0, {}

        best_strategy = None
        best_success_probability = 0.0
        strategy_probabilities = {}

        for strategy, model in strategy_models.items():
            success_proba = model.predict_proba(client_data)[0][1]  
            strategy_probabilities[strategy] = success_proba

            if success_proba > best_success_probability:
                best_success_probability = success_proba
                best_strategy = strategy

        return best_strategy, best_success_probability, strategy_probabilities

    best_strategy, best_success_probability, all_probabilities = recommend_best_strategy(client_encoded, strategy_models)

    if best_strategy:
        print("📉 **Success probabilities per strategy:**")
        for strategy, prob in all_probabilities.items():
            print(f"   - {strategy}: {round(prob, 2)}")

        print(f"\n🎯 **Best recommended strategy:** {best_strategy} (Success Probability: {round(best_success_probability, 2)})")
    else:
        print("⚠️ No valid strategy was found.")



🎯 **Client Data:**
   - tariff_plan: Fiber + TV
   - contract_type: annual
   - customer_type: Couple
   - income_level: low
   - contract_satisfaction: low
   - monthly_price: 45.5
   - years_in_company: 4
   - monthly_usage_hours: 44
   - num_family_lines: 2
   - age: 27

📊 **Churn Prediction:**
   - Final Classification: Medium


In [None]:
import pickle

with open("xgb_model.pkl", "wb") as churn_file:
    pickle.dump(xgb_model, churn_file)
print("✅ Churn model saved as 'xgb_model.pkl'.")

for strategy, model in strategy_models.items():
    model_filename = f"strategy_model_{strategy.replace(' ', '_').replace('%', 'percent')}.pkl"
    with open(model_filename, "wb") as strategy_file:
        pickle.dump(model, strategy_file)
    print(f"✅ Strategy model for '{strategy}' saved as '{model_filename}'.")

✅ Churn model saved as 'xgb_model.pkl'.
✅ Strategy model for '10% Discount' saved as 'strategy_model_10percent_Discount.pkl'.
✅ Strategy model for 'Contract Freeze' saved as 'strategy_model_Contract_Freeze.pkl'.
✅ Strategy model for 'Dedicated Customer Support' saved as 'strategy_model_Dedicated_Customer_Support.pkl'.
✅ Strategy model for 'Extra Data Package' saved as 'strategy_model_Extra_Data_Package.pkl'.
✅ Strategy model for 'Loyalty Bonus' saved as 'strategy_model_Loyalty_Bonus.pkl'.
✅ Strategy model for 'Upgrade to Better Plan' saved as 'strategy_model_Upgrade_to_Better_Plan.pkl'.
✅ Strategy model for 'iPhone Discount or Free Device' saved as 'strategy_model_iPhone_Discount_or_Free_Device.pkl'.
