In [1]:
import pandas as pd

df = pd.read_csv('GBM1_predictions_JFMAMJ(Final)_segmentation.csv')

In [2]:
import pandas as pd

def reason_for_churn(row):
    if row['Predicted Status'] == 'Not Renewed':
        reasons = []

        # Age
        if row['age'] <= 1.0038:
            reasons.append("Young Vehicle Age")
        if row['age'] > 6.4663:
            reasons.append("Old Vehicle Age")

        # Vehicle IDV
        if row['vehicle idv'] <= 405003.0:
            reasons.append("Low Vehicle IDV")

        # Add-on Premium
        if row['before gst add-on gwp'] > 6944.5:
            reasons.append("High Add-On Premium")

        # Own-Damage Premium
        if row['total od premium'] > 7790.5:
            reasons.append("High Own-Damage Premium")

        # Third-Party Premium
        if row['total tp premium'] > 8268.5:
            reasons.append("High Third-Party Premium")

        # NCB Percentage
        if row['ncb % previous year'] <= 10.0:
            reasons.append("Low No Claim Bonus Percentage")

        # Discount with NCB
        if row['applicable discount with ncb'] <= 64.5:
            reasons.append("Low Discount with NCB")

        # Number of Claims
        if row['Number of claims'] > 1.5:
            reasons.append("Multiple Claims on Record")

        # Claim Happened
        if row['Claim Happaned/Not'] == 'Yes':
            reasons.append("Claims Happened")

        # Policy-wise Purchase
        if row['policy_wise_purchase'] <= 2.5:
            reasons.append("Minimal Policies Purchased")

        # Policy Tenure
        if row['Policy Tenure'] > 2.0:
            reasons.append("Policy Tenure Exceeds 2 Years")

        # Tie Ups (grouped)
        if row['tie up'] in ['MARUTI', 'MIBL OEM', 'HYUNDAI', 'Non-OEM']:
            reasons.append(f"Tie Up with {row['tie up']}")

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export results
df.to_csv('GBM1_predictions_JFMAMJ(Final)_segment_reasons.csv', index=False)

# Show a sample of the output
print(df[['policy no', 'Not Renewed Reasons']].head())

                   policy no  \
0  '201140020123100158101000   
1  '201140020124700750101000   
2  '201140020123705025001000   
3  '201140020123704287902000   
4  '201140020123703982301000   

                                 Not Renewed Reasons  
0  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...  
1  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...  
2  Old Vehicle Age, Low Vehicle IDV, Low No Claim...  
3  Old Vehicle Age, Low Vehicle IDV, Low No Claim...  
4  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...  


In [1]:
import pandas as pd
import re
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, classification_report, log_loss
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder


# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'Liberty',
    'user': 'postgres',
    'password': 'abc',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL
query = 'SELECT * FROM public.policydata_with_fb_cc_pc_newfea_opti_correct;'
df = pd.read_sql(query, con=engine)

In [2]:
# Step 2: Ensure columns are of the correct data type
numeric_columns = [
    'before gst add-on gwp', 'total od premium', 'total tp premium', 'total premium payable', 
    'ncb % previous year', 'applicable discount with ncb', 'age', 'vehicle idv'
]

# Convert columns to numeric, coercing errors to NaN
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

def reason_for_churn(row):
    if row['Policy Status'] == 'Not Renewed':
        reasons = []

        # Age
        if row['age'] <= 1.0038:
            reasons.append("Young Vehicle Age")
        if row['age'] > 6.4663:
            reasons.append("Old Vehicle Age")

        # Vehicle IDV
        if row['vehicle idv'] <= 405003.0:
            reasons.append("Low Vehicle IDV")

        # Add-on Premium
        if row['before gst add-on gwp'] > 6944.5:
            reasons.append("High Add-On Premium")

        # Own-Damage Premium
        if row['total od premium'] > 7790.5:
            reasons.append("High Own-Damage Premium")

        # Third-Party Premium
        if row['total tp premium'] > 8268.5:
            reasons.append("High Third-Party Premium")

        # NCB Percentage
        if row['ncb % previous year'] <= 10.0:
            reasons.append("Low No Claim Bonus Percentage")

        # Discount with NCB
        if row['applicable discount with ncb'] <= 64.5:
            reasons.append("Low Discount with NCB")

        # Number of Claims
        if row['Number of claims'] > 1.5:
            reasons.append("Multiple Claims on Record")

        # Claim Happened
        if row['Claim Happaned/Not'] == 'Yes':
            reasons.append("Claims Happened")

        # Policy-wise Purchase
        if row['policy_wise_purchase'] <= 2.5:
            reasons.append("Minimal Policies Purchased")

        # Policy Tenure
        if row['Policy Tenure'] > 2.0:
            reasons.append("Policy Tenure Exceeds 2 Years")

        # Tie Ups (grouped)
        if row['tie up'] in ['MARUTI', 'MIBL OEM', 'HYUNDAI', 'Non-OEM']:
            reasons.append(f"Tie Up with {row['tie up']}")

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

In [3]:
# Export the results back to a PostgreSQL table
output_table_name = "policydata_with_fb_cc_pc_newfea_opti_correct_with_reasons"
df.to_sql(output_table_name, con=engine,
    if_exists="replace",
    index=False,
    chunksize=100000
)

print(f"Data successfully saved to table '{output_table_name}' in the database.")

Data successfully saved to table 'policydata_with_fb_cc_pc_newfea_opti_correct_with_reasons' in the database.


In [1]:
import pandas as pd

df = pd.read_csv('GBM1_predictions_JASOND(Final).csv')

In [2]:
import pandas as pd

def reason_for_churn(row):
    if row['Predicted Status'] == 'Not Renewed':
        reasons = []

        # Age
        if row['age'] <= 1.0038:
            reasons.append("Young Vehicle Age")
        if row['age'] > 6.4663:
            reasons.append("Old Vehicle Age")

        # Vehicle IDV
        if row['vehicle idv'] <= 405003.0:
            reasons.append("Low Vehicle IDV")

        # Add-on Premium
        if row['before gst add-on gwp'] > 6944.5:
            reasons.append("High Add-On Premium")

        # Own-Damage Premium
        if row['total od premium'] > 7790.5:
            reasons.append("High Own-Damage Premium")

        # Third-Party Premium
        if row['total tp premium'] > 8268.5:
            reasons.append("High Third-Party Premium")

        # NCB Percentage
        if row['ncb % previous year'] <= 10.0:
            reasons.append("Low No Claim Bonus Percentage")

        # Discount with NCB
        if row['applicable discount with ncb'] <= 64.5:
            reasons.append("Low Discount with NCB")

        # Number of Claims
        if row['Number of claims'] > 1.5:
            reasons.append("Multiple Claims on Record")

        # Claim Happened
        if row['Claim Happaned/Not'] == 'Yes':
            reasons.append("Claims Happened")

        # Policy-wise Purchase
        if row['policy_wise_purchase'] <= 2.5:
            reasons.append("Minimal Policies Purchased")

        # Policy Tenure
        if row['Policy Tenure'] > 2.0:
            reasons.append("Policy Tenure Exceeds 2 Years")

        # Tie Ups (grouped)
        if row['tie up'] in ['MARUTI', 'MIBL OEM', 'HYUNDAI', 'Non-OEM']:
            reasons.append(f"Tie Up with {row['tie up']}")

        return ', '.join(reasons) if reasons else 'Organic Churn'

    return ''

# Apply the function
df['Not Renewed Reasons'] = df.apply(reason_for_churn, axis=1)

# Export results
df.to_csv('GBM1_predictions_JASOND(Final)_mainreasons.csv', index=False)

# Show a sample of the output
print(df[['policy no', 'Not Renewed Reasons']].head())

                   policy no  \
0  '201140020224701462402000   
1  '201140020424100006902000   
2  '201140020124702131400000   
3  '201120060124700413100000   
4  '201120020124702346500000   

                                 Not Renewed Reasons  
0  Old Vehicle Age, Low Vehicle IDV, Low No Claim...  
1  Old Vehicle Age, Low Vehicle IDV, Tie Up with ...  
2  Old Vehicle Age, Low Vehicle IDV, Low No Claim...  
3    Minimal Policies Purchased, Tie Up with Non-OEM  
4  High Own-Damage Premium, Low Discount with NCB...  
