In [1]:
import pandas as pd
import numpy as np
import joblib
from sqlalchemy import create_engine

# Load saved model and label encoders
model = joblib.load("gbm_model.pkl")
label_encoders = joblib.load("label_encoders_gbm.pkl")
features = joblib.load("model_features_gbm.pkl") 

# Load Data from PostgreSQL
db_config = {
    'host': 'localhost',
    'database': 'Liberty',
    'user': 'postgres',
    'password': 'abc',
    'port': '5432'
}

connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

query = 'SELECT * FROM public.policydata_with_fb_cc_pc_newfea_opti_correct;'
data = pd.read_sql(query, con=engine)

selected_columns = ['rto_risk_factor', 'ncb % previous year', 'state_risk_score', 'retention_rate_pct', 'total od premium_max', 'applicable discount with ncb', 
                    'policy_wise_purchase', 'manufacturer_risk_rate', 'days_between_renewals', 'retention_streak', 'total od premium_mean', 'total od premium', 
                    'firstpolicyyear', 'lag_1_tp_premium', 'total od premium_min', 'avg_premium_hist', 'lag_1_ncb', 'age', 'total tp premium_max', 'total tp premium_mean', 
                    'total tp premium', 'total tp premium_min', 'lag_1_premium', 'previous_year_premium_ratio', 'total premium payable', 'total_revenue', 'gst', 
                    'fuel_type_risk_factor', 'lag_1_od_premium', 'Customer_APV', 'segment_risk_score', 'vehicle idv', 'Policy Tenure', 'Number of claims', 'approved', 
                    'claim_approval_rate', 'Customer Tenure', 'before gst add-on gwp', 'od_tp_ratio', 'add_on_adoption', 'CLV', 'idv_premium_ratio', 'Customer_APF', 
                    'days_gap_prev_end_to_curr_start', 'customerid', 'Claim Happaned/Not', 'Cleaned Branch Name 2', 'Cleaned Chassis Number', 'Cleaned Engine Number', 
                    'Cleaned Reg no', 'Cleaned State2', 'Cleaned Zone 2', 'biztype', 'corrected_name', 'make_clean', 'model_clean', 'product name', 'policy no', 
                    'policy end date', 'policy start date', 'decline', 'tie up', 'variant', 'Policy Status']

data = data[selected_columns]

# Convert Policy End Date to datetime
data['policy end date'] = pd.to_datetime(data['policy end date'], errors='coerce')

# Filter open customers (Jan - March 2025)
open_customers = data[
    (data['Policy Status'] == 'Open') & 
    (data['policy end date'].dt.year == 2025) & 
    (data['policy end date'].dt.month.isin([1, 2, 3, 4, 5, 6]))
].copy()

# Extract date features
for col in ['policy start date', 'policy end date']:
    open_customers[col] = pd.to_datetime(open_customers[col], errors='coerce')

open_customers_new_date_cols = {
    f'{col}_YEAR': open_customers[col].dt.year for col in ['policy start date', 'policy end date']
}
open_customers_new_date_cols.update({
    f'{col}_MONTH': open_customers[col].dt.month for col in ['policy start date', 'policy end date']
})
open_customers_new_date_cols.update({
    f'{col}_DAY': open_customers[col].dt.day for col in ['policy start date', 'policy end date']
})

open_customers = pd.concat([open_customers, pd.DataFrame(open_customers_new_date_cols)], axis=1)
open_customers = open_customers.drop(columns=['policy start date', 'policy end date'])

# Handle missing values
for column in open_customers.columns:
    if open_customers[column].dtype == 'object':
        open_customers[column] = open_customers[column].fillna('none')
    else:
        open_customers[column] = open_customers[column].fillna(0)

# Label Encoding for open customers using dynamic mapping
open_customers_encoded = open_customers.copy()

for column in open_customers_encoded.columns:
    if column in label_encoders:  
        encoder = label_encoders[column]

        # Get existing mapping from the trained encoder
        mapping_dict = {label: i for i, label in enumerate(encoder.classes_)}
        next_unique_value = [max(mapping_dict.values()) + 1]  

        # Function to encode new values dynamically
        def encode_test_value(value):
            if value in mapping_dict:
                return mapping_dict[value]
            else:
                mapping_dict[value] = next_unique_value[0]
                next_unique_value[0] += 1
                return mapping_dict[value]
        
        open_customers_encoded[column] = open_customers_encoded[column].apply(encode_test_value)

# Predict
X_open_customers = open_customers_encoded[features]
y_open_pred = model.predict(X_open_customers)
y_open_pred_proba = model.predict_proba(X_open_customers)[:, 1]

open_customers['Predicted Status'] = ['Not Renewed' if pred == 1 else 'Renewed' for pred in y_open_pred]
open_customers['Churn Probability'] = y_open_pred_proba

# Save predictions
open_customers.to_csv("GBM1_predictions_JFMAMJ(Final).csv", index=False)
print("Predictions saved in Open_predictions.csv")

print(f"Predicted Renewed: {(y_open_pred == 0).sum()}")
print(f"Predicted Not Renewed: {(y_open_pred == 1).sum()}")

Predictions saved in Open_predictions.csv
Predicted Renewed: 124973
Predicted Not Renewed: 233900


In [2]:
import pandas as pd
import numpy as np
import joblib
from sqlalchemy import create_engine

# Load saved model and label encoders
model = joblib.load("gbm_model.pkl")
label_encoders = joblib.load("label_encoders_gbm.pkl")
features = joblib.load("model_features_gbm.pkl") 

# Load Data from PostgreSQL
db_config = {
    'host': 'localhost',
    'database': 'Liberty',
    'user': 'postgres',
    'password': 'abc',
    'port': '5432'
}

connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

query = 'SELECT * FROM public.policydata_with_fb_cc_pc_newfea_opti_correct;'
data = pd.read_sql(query, con=engine)

selected_columns = ['rto_risk_factor', 'ncb % previous year', 'state_risk_score', 'retention_rate_pct', 'total od premium_max', 'applicable discount with ncb', 
                    'policy_wise_purchase', 'manufacturer_risk_rate', 'days_between_renewals', 'retention_streak', 'total od premium_mean', 'total od premium', 
                    'firstpolicyyear', 'lag_1_tp_premium', 'total od premium_min', 'avg_premium_hist', 'lag_1_ncb', 'age', 'total tp premium_max', 'total tp premium_mean', 
                    'total tp premium', 'total tp premium_min', 'lag_1_premium', 'previous_year_premium_ratio', 'total premium payable', 'total_revenue', 'gst', 
                    'fuel_type_risk_factor', 'lag_1_od_premium', 'Customer_APV', 'segment_risk_score', 'vehicle idv', 'Policy Tenure', 'Number of claims', 'approved', 
                    'claim_approval_rate', 'Customer Tenure', 'before gst add-on gwp', 'od_tp_ratio', 'add_on_adoption', 'CLV', 'idv_premium_ratio', 'Customer_APF', 
                    'days_gap_prev_end_to_curr_start', 'customerid', 'Claim Happaned/Not', 'Cleaned Branch Name 2', 'Cleaned Chassis Number', 'Cleaned Engine Number', 
                    'Cleaned Reg no', 'Cleaned State2', 'Cleaned Zone 2', 'biztype', 'corrected_name', 'make_clean', 'model_clean', 'product name', 'policy no', 
                    'policy end date', 'policy start date', 'decline', 'tie up', 'variant', 'Policy Status']

data = data[selected_columns]

# Convert Policy End Date to datetime
data['policy end date'] = pd.to_datetime(data['policy end date'], errors='coerce')

# Filter open customers (Jan - March 2025)
open_customers = data[
    (data['Policy Status'] == 'Open') & 
    (data['policy end date'].dt.year == 2025) & 
    (data['policy end date'].dt.month.isin([7, 8, 9, 10, 11, 12]))
].copy()

# Extract date features
for col in ['policy start date', 'policy end date']:
    open_customers[col] = pd.to_datetime(open_customers[col], errors='coerce')

open_customers_new_date_cols = {
    f'{col}_YEAR': open_customers[col].dt.year for col in ['policy start date', 'policy end date']
}
open_customers_new_date_cols.update({
    f'{col}_MONTH': open_customers[col].dt.month for col in ['policy start date', 'policy end date']
})
open_customers_new_date_cols.update({
    f'{col}_DAY': open_customers[col].dt.day for col in ['policy start date', 'policy end date']
})

open_customers = pd.concat([open_customers, pd.DataFrame(open_customers_new_date_cols)], axis=1)
open_customers = open_customers.drop(columns=['policy start date', 'policy end date'])

# Handle missing values
for column in open_customers.columns:
    if open_customers[column].dtype == 'object':
        open_customers[column] = open_customers[column].fillna('none')
    else:
        open_customers[column] = open_customers[column].fillna(0)

# Label Encoding for open customers using dynamic mapping
open_customers_encoded = open_customers.copy()

for column in open_customers_encoded.columns:
    if column in label_encoders:  
        encoder = label_encoders[column]

        # Get existing mapping from the trained encoder
        mapping_dict = {label: i for i, label in enumerate(encoder.classes_)}
        next_unique_value = [max(mapping_dict.values()) + 1]  

        # Function to encode new values dynamically
        def encode_test_value(value):
            if value in mapping_dict:
                return mapping_dict[value]
            else:
                mapping_dict[value] = next_unique_value[0]
                next_unique_value[0] += 1
                return mapping_dict[value]
        
        open_customers_encoded[column] = open_customers_encoded[column].apply(encode_test_value)

# Predict
X_open_customers = open_customers_encoded[features]
y_open_pred = model.predict(X_open_customers)
y_open_pred_proba = model.predict_proba(X_open_customers)[:, 1]

open_customers['Predicted Status'] = ['Not Renewed' if pred == 1 else 'Renewed' for pred in y_open_pred]
open_customers['Churn Probability'] = y_open_pred_proba

# Save predictions
open_customers.to_csv("GBM1_predictions_JASOND(Final).csv", index=False)
print("Predictions saved in Open_predictions.csv")

print(f"Predicted Renewed: {(y_open_pred == 0).sum()}")
print(f"Predicted Not Renewed: {(y_open_pred == 1).sum()}")

Predictions saved in Open_predictions.csv
Predicted Renewed: 101376
Predicted Not Renewed: 215032
