In [10]:
from joblib import load
import numpy as np
import pandas as pd

In [11]:
# Load the trained model encoders, scalers, and label encoders

pipeline_path = r"D:\Portfolio Github\customer-churn-prediction\pipeline"
scaler = {
 'Tenure Months': load(f"{pipeline_path}\Tenure Months_scaler.joblib"),
 'Monthly Charges': load(f"{pipeline_path}\Monthly Charges_scaler.joblib"),
 'CLTV': load(f"{pipeline_path}\CLTV_scaler.joblib"),

}
label_encoders = {
    'City': load(f"{pipeline_path}\City_label_encoder.joblib"),
    'Zip Code': load(f"{pipeline_path}\Zip Code_label_encoder.joblib"),
    'Internet Service': load(f"{pipeline_path}\Internet Service_label_encoder.joblib"),
    'Online Security': load(f"{pipeline_path}\Online Security_label_encoder.joblib"),
    'Tech Support': load(f"{pipeline_path}\Tech Support_label_encoder.joblib"),
    'Contract': load(f"{pipeline_path}\Contract_label_encoder.joblib"),
    'Dependents': load(f"{pipeline_path}\Dependents_label_encoder.joblib"),
}

In [12]:
def preprocess_data(df):
    # Select relevant features
    features = ['Zip Code', 'Dependents', 'Tenure Months', 'Internet Service', 'Online Security', 'Tech Support', 'Contract', 'Monthly Charges', 'CLTV']
    df = df[features]

    # Handle missing values
    df.fillna(method='ffill', inplace=True)

    # Encode categorical features using LabelEncoders
    categorical_features = ['Zip Code','Internet Service', 'Online Security', 'Tech Support', 'Contract', 'Dependents']
    for feature in categorical_features:
        try:
            df[feature] = label_encoders[feature].transform(df[feature])
        except ValueError as e:
            unseen_label = str(e).split(': ')[1]
            print(f"Warning: Unseen label '{unseen_label}' encountered. Assigning default value.")
            df[feature] = df[feature].apply(lambda x: label_encoders[feature].transform([x])[0] if x in label_encoders[feature].classes_ else np.nan)
            df[feature].fillna(label_encoders[feature].transform([label_encoders[feature].classes_[0]])[0], inplace=True)  # Assign a default value
    
    # Scale numerical features
    numerical_features = ['Tenure Months', 'Monthly Charges', 'CLTV']
    for feature in numerical_features:
        df[[feature]] = scaler[feature].transform(df[[feature]])

    return df

In [13]:
def load_model():
    model = load(f"{pipeline_path}/xgb.pkl")
    return model

model = load_model()

In [14]:
df = pd.read_csv(r"D:/customer-churn-prediction/sample_customer_churn.csv")

In [15]:
df.columns

Index(['CustomerID', 'Count', 'Zip Code', 'City', 'Gender', 'Senior Citizen',
       'Partner', 'Dependents', 'Tenure Months', 'Phone Service',
       'Multiple Lines', 'Internet Service', 'Online Security',
       'Online Backup', 'Device Protection', 'Tech Support', 'Streaming TV',
       'Streaming Movies', 'Contract', 'Paperless Billing', 'Payment Method',
       'Monthly Charges', 'Total Charges', 'CLTV'],
      dtype='object')

In [16]:
# Preprocess the data
preprocessed_data = preprocess_data(df)



  df.fillna(method='ffill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.fillna(method='ffill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature] = df[feature].apply(lambda x: label_encoders[feature].transform([x])[0] if x in label_encoders[feature].classes_ else np.nan)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(

In [19]:


# Make predictions
df['Predicted Churn Status'] = model.predict(preprocessed_data)
df['Churn Probability'] = model.predict_proba(preprocessed_data)[:, 1]

# Convert predictions to human-readable format
df['Predicted Churn Status'] = df['Predicted Churn Status'].map({1: 'Churn', 0: 'No Churn'})
df['Churn Probability'] = df['Churn Probability'].apply(lambda x: f"{x:.2f}")

def generate_insights(churn_status):
    if churn_status == "Churn":
        risk_level = "High"
        next_steps = "Immediate Action Required: Engage the customer with personalized offers or support."
        retention_strategy = "Consider offering discounts or loyalty programs."
        revenue_impact = "The potential loss of revenue is significant. Immediate action is required to prevent churn."
    else:
        risk_level = "Low"
        next_steps = "Monitor: Continue to monitor the customer’s activity and engagement levels."
        retention_strategy = "Consider offering a small incentive to maintain the positive relationship."
        revenue_impact = "The immediate risk to revenue is minimal, but proactive engagement can further reduce this risk."

    return risk_level, next_steps, retention_strategy, revenue_impact


# Save the results to a new CSV file
output_file = r"D:\Portfolio Github\customer-churn-prediction\churn_predictions.csv"
df.to_csv(output_file, index=False)


In [18]:
input_data = pd.DataFrame([
    [12345, "Fiber optic", "Yes", "Yes", "Month-to-month", "No", 12, 75.3, 1890.50]
])

# Preprocess the input data
preprocessed_data = preprocess_data(df)
    
# Make prediction
prediction = model.predict(preprocessed_data)
churn_prob = model.predict_proba(preprocessed_data)[0][1]
print(f'The predicted churn status is: {"Churn" if prediction[0] == 1 else "No Churn"}')
print(f'The probability of churn is: {churn_prob:.2f}')       

  df.fillna(method='ffill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.fillna(method='ffill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[feature] = df[feature].apply(lambda x: label_encoders[feature].transform([x])[0] if x in label_encoders[feature].classes_ else np.nan)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(

The predicted churn status is: No Churn
The probability of churn is: 0.34
