In [None]:
import pandas as pd
import numpy as np
import joblib
from sqlalchemy import create_engine

# Load saved model and label encoders
model = joblib.load("xgboost_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")
features = joblib.load("model_features.pkl") 

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Load Open Customers data
query = 'SELECT * FROM "corrected_merged_claim_data_EF";'
data = pd.read_sql(query, con=engine)

selected_columns = ['Policy No', 'Renewal Type', 'Product name ', 'Product name  2', 'biztype', 'Policy End Date', 'Policy Start Date', 'Reg no ', 
                    'age', 'MANUFACTURER/Make', 'model', 'variant', 'Fuel Type', 'RTO Location ', 'Vehicle IDV', 'NCB Amount', 
                    'Before GST Add-on GWP', 'Total OD Premium', 'Total TP Premium', 'gst', 'Total Premium Payable ', 'NCB % Previous Year', 
                    'Vehicle Segment', 'Applicable Discount with NCB', 'Tie Up', 'Cleaned_Insured name', 'Cleaned_New Branch Name 2', 
                    'Cleaned_state2', 'Zone 2', 'Number of claims', 'Approved', 'Denied', 'CustomerID', 'Policy Status', 'Policy Tenure', 
                    'Customer Tenure', 'New Customers', 'Claim Happaned/Not', 'Renewal Rate Status', 'WITHDRAWN']

data = data[selected_columns]

# Convert Policy End Date to datetime
data['Policy End Date'] = pd.to_datetime(data['Policy End Date'], errors='coerce')

# Filter open customers (Jan - March 2025)
open_customers = data[
    (data['Policy Status'] == 'Open') & 
    (data['Policy End Date'].dt.year == 2025) & 
    (data['Policy End Date'].dt.month.isin([1, 2, 3]))
].copy()

# Extract date features
for col in ['Policy Start Date', 'Policy End Date']:
    open_customers[col] = pd.to_datetime(open_customers[col], errors='coerce')

open_customers_new_date_cols = {
    f'{col}_YEAR': open_customers[col].dt.year for col in ['Policy Start Date', 'Policy End Date']
}
open_customers_new_date_cols.update({
    f'{col}_MONTH': open_customers[col].dt.month for col in ['Policy Start Date', 'Policy End Date']
})
open_customers_new_date_cols.update({
    f'{col}_DAY': open_customers[col].dt.day for col in ['Policy Start Date', 'Policy End Date']
})

open_customers = pd.concat([open_customers, pd.DataFrame(open_customers_new_date_cols)], axis=1)
open_customers = open_customers.drop(columns=['Policy Start Date', 'Policy End Date'])

# Handle missing values
for column in open_customers.columns:
    if open_customers[column].dtype == 'object':
        open_customers[column] = open_customers[column].fillna('missing')
    else:
        open_customers[column] = open_customers[column].fillna(0)

# Label Encoding for open customers using dynamic mapping
open_customers_encoded = open_customers.copy()

for column in open_customers_encoded.columns:
    if column in label_encoders:  
        encoder = label_encoders[column]

        # Get existing mapping from the trained encoder
        mapping_dict = {label: i for i, label in enumerate(encoder.classes_)}
        next_unique_value = [max(mapping_dict.values()) + 1]  

        # Function to encode new values dynamically
        def encode_test_value(value):
            if value in mapping_dict:
                return mapping_dict[value]
            else:
                mapping_dict[value] = next_unique_value[0]
                next_unique_value[0] += 1
                return mapping_dict[value]
        
        open_customers_encoded[column] = open_customers_encoded[column].apply(encode_test_value)

# Predict
X_open_customers = open_customers_encoded[features]
y_open_pred = model.predict(X_open_customers)
y_open_pred_proba = model.predict_proba(X_open_customers)[:, 1]

open_customers['Predicted Status'] = ['Not Renewed' if pred == 1 else 'Renewed' for pred in y_open_pred]
open_customers['Churn Probability'] = y_open_pred_proba

# Save predictions
open_customers.to_csv("XGB_predictions_JFM(Final).csv", index=False)
print("Predictions saved in Open_predictions.csv")

print(f"Predicted Renewed: {(y_open_pred == 0).sum()}")
print(f"Predicted Not Renewed: {(y_open_pred == 1).sum()}")
