In [1]:
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler


def preprocess_data(input_data, scaler, numerical_features, label_encoded_features, binary_features):
    """
    Preprocesses the input data to be compatible with the trained model.

    Args:
        input_data (dict or pd.DataFrame): Input data for prediction.
        scaler (MinMaxScaler): The fitted scaler to apply.
        numerical_features (list): List of numerical feature names.
        label_encoded_features (list): List of label encoded feature names.
        binary_features (list): List of binary feature names.

    Returns:
        pd.DataFrame: Preprocessed input data.
    """
    # Convert the input data to a Pandas DataFrame if it is not already one.
    if not isinstance(input_data, pd.DataFrame):
        input_df = pd.DataFrame(input_data)
    else:
        input_df = input_data

    # Separate numerical/label encoded and binary features
    input_df_numerical = input_df[numerical_features + label_encoded_features]
    input_df_binary = input_df[binary_features]

    # Scale the numerical and label encoded features
    scaled_data = scaler.transform(input_df_numerical)
    scaled_df = pd.DataFrame(scaled_data, columns = numerical_features + label_encoded_features, index = input_df.index)

    # Combine features
    preprocessed_input = pd.concat([scaled_df.reset_index(drop=True), input_df_binary.reset_index(drop=True)], axis=1)

    return preprocessed_input


# Load the Trained Model (Pipeline)
model_path = 'xgboost_model.sav'  # Replace with the actual path to your .sav file
try:
    with open(model_path, 'rb') as file:
        pipeline = pickle.load(file)
    print("Model pipeline successfully loaded.")
except Exception as e:
    print(f"Error loading model pipeline: {e}")
    exit()  # Exit if the model cannot be loaded

# 2. Load the Scaler
scaler_path = 'minmax_scaler.sav'  # Replace with the actual path to your .sav file
try:
    with open(scaler_path, 'rb') as file:
        scaler = pickle.load(file)
    print(f"Scaler successfully loaded from {scaler_path}")
except Exception as e:
    print(f"Error loading scaler: {e}")
    exit()


# New Sample Data
new_data = {
    'total_unsuccessful_calls': [1, 5, 0, 2],
    'CustomerServiceInteractionRatio': [0.1, 0.5, 0.0, 0.2],
    'MinutesOverUsage': [0.2, 1.0, 0.0, 0.3],
    'TotalRevenueGenerated': [70, 150, 40, 90],
    'TotalCallFeaturesUsed': [3, 1, 2, 0],
    'RetentionCalls': [0, 1, 0, 0],
    'RetentionOffersAccepted': [0, 0, 0, 0],
    'MadeCallToRetentionTeam': [0, 1, 0, 0],
    'AdjustmentsToCreditRating': [0, -1, 0, 1],
    'MonthlyRevenue': [60, 120, 35, 80],
    'TotalRecurringCharge': [10, 30, 5, 10],
    'OverageMinutes': [10, 50, 0, 15],
    'MonthsInService': [12, 30, 5, 10],
    'PercChangeMinutes': [0.05, -0.2, 0.1, 0.0],
    'PercChangeRevenues': [-0.1, 0.1, 0.0, -0.05],
    'HandsetPrice': [200, 600, 100, 300],
    'CreditRating': [2, 5, 1, 3],
     'IncomeGroup': [3, 7, 2, 4],
    'AgeHH1': [35, 50, 25, 40],
    'AgeHH2': [30, 45, 20, 35],
    'ChildrenInHH': [1, 2, 0, 1]
}

# 4. Define the features
numerical_features = ['total_unsuccessful_calls', 'CustomerServiceInteractionRatio', 'MinutesOverUsage',
                     'TotalRevenueGenerated', 'TotalCallFeaturesUsed', 'MonthlyRevenue', 'TotalRecurringCharge',
                     'OverageMinutes', 'MonthsInService', 'PercChangeMinutes', 'PercChangeRevenues',
                     'HandsetPrice']
label_encoded_features = ['CreditRating', 'IncomeGroup', 'AgeHH1', 'AgeHH2', 'ChildrenInHH']
binary_features = ['RetentionCalls', 'RetentionOffersAccepted', 'MadeCallToRetentionTeam', 'AdjustmentsToCreditRating']

#  Preprocess the data
try:
    preprocessed_data = preprocess_data(new_data, scaler, numerical_features, label_encoded_features, binary_features)

except Exception as e:
    print(f"Error preprocessing data: {e}")
    exit()

# Make Predictions
try:
    predictions = pipeline.predict(preprocessed_data)
    probabilities = pipeline.predict_proba(preprocessed_data)[:, 1]  # Probability for class 1 (churn)

except Exception as e:
    print(f"Error during prediction: {e}")
    exit()

# Create a DataFrame from the results
result_df = pd.DataFrame({
    'predicted_churn': predictions,
    'churn_probability': probabilities
})
print(result_df)

Model pipeline successfully loaded.
Scaler successfully loaded from minmax_scaler.sav
   predicted_churn  churn_probability
0                1           0.511046
1                1           0.510281
2                0           0.456997
3                1           0.504011
