In [1]:
import pandas as pd
import numpy as np
import joblib
from sqlalchemy import create_engine

# Load saved models and label encoders
model_1 = joblib.load("RanCat_model_1.pkl")
model_2 = joblib.load("RanCat_model_2.pkl")
label_encoders = joblib.load("labelRanCat_encoders.pkl")
features = joblib.load("modelRanCat_features.pkl")

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Load Open Customers data
query = 'SELECT * FROM public.overall_cleaned_base_and_pr_ef_policyef;'
data = pd.read_sql(query, con=engine)

selected_columns = ['policy no', 'renewal type', 'product name', 'product name 2',  'biztype', 'policy end date', 'policy start date', 
 'age', 'manufacturer/make', 'model', 'variant', 'vehicle segment', 'fuel type', 'rto location', 'vehicle idv', 'ncb amount', 'Cleaned Reg no', 
 'before gst add-on gwp', 'total od premium', 'total tp premium', 'gst', 'total premium payable', 
 'ncb % previous year', 'applicable discount with ncb', 'Cleaned Branch Name 2', 'Cleaned State2', 'Cleaned Zone 2', 'tie up',
 'Number of claims', 'approved', 'denied', 'corrected_name', 'customerid', 'Policy Status', 'Policy Tenure', 'Customer Tenure', 'New Customers', 'Claim Happaned/Not', 
 'Renewal Rate Status', 'withdrawn', 'chassis_engine_key', 'policy_wise_purchase']

data = data[selected_columns]

# Convert Policy End Date to datetime
data['policy end date'] = pd.to_datetime(data['policy end date'], errors='coerce')

# Filter open customers (Jan - March 2025)
open_customers = data[
    (data['Policy Status'] == 'Open') & 
    (data['policy end date'].dt.year == 2025) & 
    (data['policy end date'].dt.month.isin([1, 2, 3, 4, 5, 6]))
].copy()

# Extract date features
for col in ['policy start date', 'policy end date']:
    open_customers[col] = pd.to_datetime(open_customers[col], errors='coerce')

open_customers_new_date_cols = {
    f'{col}_YEAR': open_customers[col].dt.year for col in ['policy start date', 'policy end date']
}
open_customers_new_date_cols.update({
    f'{col}_MONTH': open_customers[col].dt.month for col in ['policy start date', 'policy end date']
})
open_customers_new_date_cols.update({
    f'{col}_DAY': open_customers[col].dt.day for col in ['policy start date', 'policy end date']
})

open_customers = pd.concat([open_customers, pd.DataFrame(open_customers_new_date_cols)], axis=1)
open_customers = open_customers.drop(columns=['policy start date', 'policy end date'])

# Handle missing values
for column in open_customers.columns:
    if open_customers[column].dtype == 'object':
        open_customers[column] = open_customers[column].fillna('missing')
    else:
        open_customers[column] = open_customers[column].fillna(0)

# Label Encoding for open customers using dynamic mapping
open_customers_encoded = open_customers.copy()

for column in open_customers_encoded.columns:
    if column in label_encoders:  
        encoder = label_encoders[column]

        # Get existing mapping from the trained encoder
        mapping_dict = {label: i for i, label in enumerate(encoder.classes_)}
        next_unique_value = [max(mapping_dict.values()) + 1]  

        # Function to encode new values dynamically
        def encode_test_value(value):
            if value in mapping_dict:
                return mapping_dict[value]
            else:
                mapping_dict[value] = next_unique_value[0]
                next_unique_value[0] += 1
                return mapping_dict[value]
        
        open_customers_encoded[column] = open_customers_encoded[column].apply(encode_test_value)


X_open_customers = open_customers_encoded[features]

# Get probability predictions for open customers
probs_1_open = model_1.predict_proba(X_open_customers)
probs_2_open = model_2.predict_proba(X_open_customers)

# Define model weights (adjustable)
weight_1 = 0.45  # Model 1 works better for class 0
weight_2 = 0.55  # Model 2 works better for class 1

# Compute weighted average of probabilities
weighted_probs_open = (weight_1 * probs_1_open) + (weight_2 * probs_2_open)

# Get final predictions
y_open_pred = np.argmax(weighted_probs_open, axis=1)
y_open_pred_proba = weighted_probs_open[:, 1]

# Store predictions in unencoded data
open_customers['Predicted Status'] = np.where(y_open_pred == 1, 'Not Renewed', 'Renewed')
open_customers['Churn Probability'] = y_open_pred_proba

# Print counts of predictions
print(f"Predicted Renewed: {(y_open_pred == 0).sum()}")
print(f"Predicted Not Renewed: {(y_open_pred == 1).sum()}")

Predicted Renewed: 120024
Predicted Not Renewed: 238849


In [2]:
# Save predictions
open_customers.to_csv("RanCat_predictions_JFMAMJ(Final).csv", index=False)