In [1]:
import pandas as pd

df = pd.read_csv('Open_predictions gb1(JFM) final.csv')

In [None]:
claims_threshold = df['Number of claims'].quantile(0.75)
denied_threshold = df['Denied'].quantile(0.75)

def reason_for_churn(row): 
    if row['Predicted Status'] == 'Not Renewed':
        reasons = []

        # 2. Business Type
        if row['biztype'] == 'Renewal Business':
            reasons.append('Renewal Business Type')

        # 3. OD Premium Thresholds
        if row['Total OD Premium'] > 6108.50:
            reasons.append('High Own-Damage Premium')

        # 4. Applicable Discounts
        if row['Applicable Discount with NCB'] <= 57.03:
            reasons.append('Low Discount Offered')
        elif row['Applicable Discount with NCB'] > 72.07:
            reasons.append('High Discount Not Sustained')

        # 5. Tie-Ups
        if row['Tie Up'] in ['MARUTI', 'HYUNDAI', 'Non-OEM']:
            reasons.append(f'Tie Up with {row["Tie Up"]}')

        # 6. Total Premium Payable
        if row['Total Premium Payable '] > 13977.50:
            reasons.append('High Total Premium Payable')

        if row['Before GST Add-on GWP'] > 2819.92:
            reasons.append('High Add-On Premium')

        # 6. Customer and Vehicle Attributes
        if row['Customer Tenure'] <= 2.50:
            reasons.append('Low Customer Tenure')

        if row['Vehicle IDV'] <= 355000:
            reasons.append('Low Vehicle IDV')

        if row['age'] <= 3.69:
            reasons.append('Young Vehicle Age')
        elif row['age'] > 7.58:
            reasons.append('Old Vehicle Age')

        if row['NCB Amount'] <= 2612.37:
            reasons.append('Low No Claim Bonus Amount')

        # 9. Product Name
        if row['Product name  2'] == 'SOD':
            reasons.append('SOD Product')

        if row['Renewal Type'] == 'SY Onwards':
            reasons.append('Policy having SY Onwards Renewal Type')

        # Check if Number of claims exceeds the threshold
        if row['Number of claims'] > claims_threshold:
            reasons.append('High Claim Frequency')
    
        # Check if Denied exceeds the threshold
        if row['Denied'] > denied_threshold:
            reasons.append('Frequent Policy Denials')

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function to the DataFrame
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export the results to CSV
df.to_csv('Open_predictions_with_churn_reasons.csv', index=False)

print(df.head())

In [None]:
claims_threshold

In [None]:
denied_threshold 

In [1]:
import pandas as pd
import re
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, classification_report, log_loss
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder


# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL
query = 'SELECT * FROM "corrected_merged_claim_data_EF";'
df = pd.read_sql(query, con=engine)

In [2]:
# Step 2: Ensure columns are of the correct data type
numeric_columns = [
    'Total OD Premium',
    'Applicable Discount with NCB',
    'Total Premium Payable ',
    'Before GST Add-on GWP',
    'Customer Tenure',
    'Vehicle IDV',
    'age',
    'NCB Amount',
    'Number of claims',
    'Denied'
]

# Convert columns to numeric, coercing errors to NaN
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

claims_threshold = df['Number of claims'].quantile(0.75)
denied_threshold = df['Denied'].quantile(0.75)

def reason_for_churn(row): 
    if row['Policy Status'] == 'Not Renewed':
        reasons = []

        # 2. Business Type
        if row['biztype'] == 'Renewal Business':
            reasons.append('Renewal Business Type')

        # 3. OD Premium Thresholds
        if row['Total OD Premium'] > 6108.50:
            reasons.append('High Own-Damage Premium')

        # 4. Applicable Discounts
        if row['Applicable Discount with NCB'] <= 57.03:
            reasons.append('Low Discount Offered')
        elif row['Applicable Discount with NCB'] > 72.07:
            reasons.append('High Discount Not Sustained')

        # 5. Tie-Ups
        if row['Tie Up'] in ['MARUTI', 'HYUNDAI', 'Non-OEM']:
            reasons.append(f'Tie Up with {row["Tie Up"]}')

        # 6. Total Premium Payable
        if row['Total Premium Payable '] > 13977.50:
            reasons.append('High Total Premium Payable')

        if row['Before GST Add-on GWP'] > 2819.92:
            reasons.append('High Add-On Premium')

        # 6. Customer and Vehicle Attributes
        # if row['Customer Tenure'] <= 2.50:
            # reasons.append('Low Customer Tenure')

        if row['Vehicle IDV'] <= 355000:
            reasons.append('Low Vehicle IDV')

        if row['age'] <= 3.69:
            reasons.append('Young Vehicle Age')
        elif row['age'] > 7.58:
            reasons.append('Old Vehicle Age')

        if row['NCB Amount'] <= 2612.37:
            reasons.append('Low No Claim Bonus Amount')

        # 9. Product Name
        if row['Product name  2'] == 'SOD':
            reasons.append('SOD Product')

        if row['Renewal Type'] == 'SY Onwards':
            reasons.append('Policy having SY Onwards Renewal Type')

        # Check if Number of claims exceeds the threshold
        if row['Number of claims'] > claims_threshold:
            reasons.append('High Claim Frequency')
    
        # Check if Denied exceeds the threshold
        if row['Denied'] > denied_threshold:
            reasons.append('Frequent Policy Denials')

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function to the DataFrame
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export the results back to a PostgreSQL table
output_table_name = "corrected_merged_claim_data_EF_with_reasons"
df.to_sql(output_table_name, con=engine, if_exists='replace', index=False)

print(f"Data successfully saved to table '{output_table_name}' in the database.")

Data successfully saved to table 'corrected_merged_claim_data_EF_with_reasons' in the database.


In [1]:
import pandas as pd

df = pd.read_csv('Open_predictions gb1(JFM) final.csv')

In [2]:
claims_threshold = df['Number of claims'].quantile(0.75)
denied_threshold = df['Denied'].quantile(0.75)

def reason_for_churn(row): 
    if row['Predicted Status'] == 'Not Renewed':
        reasons = []

        # 2. Business Type
        if row['biztype'] == 'Renewal Business':
            reasons.append('Renewal Business Type')

        # 3. OD Premium Thresholds
        if row['Total OD Premium'] > 6108.50:
            reasons.append('High Own-Damage Premium')

        # 4. Applicable Discounts
        if row['Applicable Discount with NCB'] <= 57.03:
            reasons.append('Low Discount Offered')
        elif row['Applicable Discount with NCB'] > 72.07:
            reasons.append('High Discount Not Sustained')

        # 5. Tie-Ups
        if row['Tie Up'] in ['MARUTI', 'HYUNDAI', 'Non-OEM']:
            reasons.append(f'Tie Up with {row["Tie Up"]}')

        # 6. Total Premium Payable
        if row['Total Premium Payable '] > 13977.50:
            reasons.append('High Total Premium Payable')

        if row['Before GST Add-on GWP'] > 2819.92:
            reasons.append('High Add-On Premium')

        if row['Vehicle IDV'] <= 355000:
            reasons.append('Low Vehicle IDV')

        if row['age'] <= 3.69:
            reasons.append('Young Vehicle Age')
        elif row['age'] > 7.58:
            reasons.append('Old Vehicle Age')

        # 9. Product Name
        if row['Product name  2'] == 'SOD':
            reasons.append('SOD Product')

        if row['Renewal Type'] == 'SY Onwards':
            reasons.append('Policy having SY Onwards Renewal Type')

        # Check if Number of claims exceeds the threshold
        if row['Number of claims'] > claims_threshold:
            reasons.append('High Claim Frequency')
    
        # Check if Denied exceeds the threshold
        if row['Denied'] > denied_threshold:
            reasons.append('Frequent Policy Denials')

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function to the DataFrame
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export the results to CSV
df.to_csv('Open_predictions_with_churn_reasons(main).csv', index=False)

print(df.head())

                   Policy No Renewal Type           Product name   \
0  '201150010823700165301000      missing        PrivateCarPolicy   
1  '202550010222800035800000           FY        PrivateCarPolicy   
2  '201150040123100081101000      missing        PrivateCarPolicy   
3  '201150020423700404800000      missing        PrivateCarPolicy   
4  '202520020123700422700000      missing  PrivateCarStandaloneOD   

  Product name  2           biztype        Reg no     age  \
0         missing  Renewal Business  AP-02-BE-8778   7.80   
1         Package  Renewal Business  AP-39-HL-9015   3.00   
2         missing  Renewal Business  TN-10-BK-6396   4.00   
3         missing         Roll Over  KA-09-MA-1269  11.85   
4         missing         Roll Over  TS-07-JU-0505   0.89   

     MANUFACTURER/Make           model                variant  ...  \
0  MAHINDRA & MAHINDRA         KUV 100             K8 D 6 STR  ...   
1              HYUNDAI  GRAND I10 NIOS  SPORTZ 1.2 KAPPA VTVT  ...   
2       