In [1]:
import pandas as pd

df = pd.read_csv('XGB_predictions_JFM(Final).csv')

In [2]:
import pandas as pd

def reason_for_churn(row): 
    if row['Predicted Status'] == 'Not Renewed':
        reasons = []

        # Business Type
        if row['biztype'] == 'Renewal Business':
            reasons.append('Renewal Business Type')

        # Own-Damage Premium
        if row['Total OD Premium'] >= 10804.0:
            reasons.append('High Own-Damage Premium')

        # Discounts
        if row['Applicable Discount with NCB'] < 71.0:
            reasons.append('Low Discount Offered')

        # Tie-Ups (Top 3 from XGBoost Analysis)
        if row['Tie Up'] in ['MARUTI', 'MIBL OEM', 'TATA PV']:
            reasons.append(f'Tie Up with {row["Tie Up"]}')

        # Premium Payable
        if row['Total Premium Payable '] >= 7823.0:
            reasons.append('High Total Premium Payable')

        if row['Vehicle IDV'] < 87336.0:
            reasons.append('Low Vehicle IDV')

        if row['age'] < 3.7048192:
            reasons.append('Young Vehicle Age')

        if row['Before GST Add-on GWP'] >= 699.152527:
            reasons.append('High Add-On Premium')

        if row['Total TP Premium'] >= 3816.0:
            reasons.append('High Third-Party Premium')

        # Claim and Policy Related Factors
        if row['Renewal Type'] == 'TY Onwards':
            reasons.append('Policy having TY Onwards Renewal Type')

        if row['Claim Happaned/Not'] == 'Yes':
            reasons.append('Claims Happened')

        if row['Renewal Rate Status'] == 'Increase':
            reasons.append('Higher Renewal Premium Impact')

        if row['NCB % Previous Year'] < 35.0:
            reasons.append('Low No Claim Bonus Percentage')

        if row['Product name '] == 'PrivateCarPolicy':
            reasons.append('PrivateCarPolicy Product')

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export results
df.to_csv('XGB_predictions_JFM(Final)_with_reasons.csv', index=False)

print(df.head())

                   Policy No Renewal Type           Product name   \
0  '201150010123800850402000      missing        PrivateCarPolicy   
1  '201120020123703748700000      missing        PrivateCarPolicy   
2  '202540050223800048602000      missing  PrivateCarStandaloneOD   
3  '201140020123703813704000      missing        PrivateCarPolicy   
4  '202550021023700068600000      missing  PrivateCarStandaloneOD   

  Product name  2           biztype        Reg no     age MANUFACTURER/Make  \
0         missing  Renewal Business  TS-07-FS-0767   7.00           HYUNDAI   
1         missing         Roll Over  AP-11-AH-2965  13.91            MARUTI   
2         missing  Renewal Business  RJ-27-CK-7807   2.00           HYUNDAI   
3         missing  Renewal Business  GJ-01-KG-8813  12.97            MARUTI   
4         missing         Roll Over  KA-14-MA-9472   1.00            MARUTI   

              model                  variant  ... WITHDRAWN  \
0         GRAND I10  MAGNA AT 1.2 KAPPA VTVT  .

In [3]:
import pandas as pd
import re
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, classification_report, log_loss
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder


# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL
query = 'SELECT * FROM "corrected_merged_claim_data_EF";'
df = pd.read_sql(query, con=engine)

In [4]:
# Step 2: Ensure columns are of the correct data type
numeric_columns = [
    'Total OD Premium',
    'Total TP Premium',
    'Applicable Discount with NCB',
    'Total Premium Payable ',
    'Before GST Add-on GWP',
    'Vehicle IDV',
    'age',
    'NCB % Previous Year'
]

# Convert columns to numeric, coercing errors to NaN
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

def reason_for_churn(row): 
    if row['Policy Status'] == 'Not Renewed':
        reasons = []

        # Business Type
        if row['biztype'] == 'Renewal Business':
            reasons.append('Renewal Business Type')

        # Own-Damage Premium
        if row['Total OD Premium'] >= 10804.0:
            reasons.append('High Own-Damage Premium')

        # Discounts
        if row['Applicable Discount with NCB'] < 71.0:
            reasons.append('Low Discount Offered')

        # Tie-Ups (Top 3 from XGBoost Analysis)
        if row['Tie Up'] in ['MARUTI', 'MIBL OEM', 'TATA PV']:
            reasons.append(f'Tie Up with {row["Tie Up"]}')

        # Premium Payable
        if row['Total Premium Payable '] >= 7823.0:
            reasons.append('High Total Premium Payable')

        if row['Vehicle IDV'] < 87336.0:
            reasons.append('Low Vehicle IDV')

        if row['age'] < 3.7048192:
            reasons.append('Young Vehicle Age')

        if row['Before GST Add-on GWP'] >= 699.152527:
            reasons.append('High Add-On Premium')

        if row['Total TP Premium'] >= 3816.0:
            reasons.append('High Third-Party Premium')

        # Claim and Policy Related Factors
        if row['Renewal Type'] == 'TY Onwards':
            reasons.append('Policy having TY Onwards Renewal Type')

        if row['Claim Happaned/Not'] == 'Yes':
            reasons.append('Claims Happened')

        if row['Renewal Rate Status'] == 'Increase':
            reasons.append('Higher Renewal Premium Impact')

        if row['NCB % Previous Year'] < 35.0:
            reasons.append('Low No Claim Bonus Percentage')

        if row['Product name '] == 'PrivateCarPolicy':
            reasons.append('PrivateCarPolicy Product')

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function to the DataFrame
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export the results back to a PostgreSQL table
output_table_name = "corrected_merged_claim_data_EF_with_xgbreasons"
df.to_sql(output_table_name, con=engine, if_exists='replace', index=False)

print(f"Data successfully saved to table '{output_table_name}' in the database.")

Data successfully saved to table 'corrected_merged_claim_data_EF_with_xgbreasons' in the database.
