In [1]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'Liberty',
    'user': 'postgres',
    'password': 'abc',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Load historical data & calculate rank
query = 'SELECT * FROM policydata_with_fb_cc_pc_newfea_opti_correct_with_reasons;'
df_hist = pd.read_sql(query, con=engine)
df_hist['Not Renewed Reasons'] = df_hist['Not Renewed Reasons'].dropna().astype(str)

all_reasons = df_hist['Not Renewed Reasons'].str.split(',').explode().str.strip()
reason_counts = all_reasons.value_counts().reset_index()
reason_counts.columns = ['Reason', 'Count']
reason_counts['Rank'] = reason_counts['Count'].rank(method='dense', ascending=False).astype(int)
reason_counts = reason_counts.sort_values(by='Rank')

# Build rank dictionary
rank_dict = reason_counts.set_index('Reason')['Rank'].to_dict()

# Updated business preference mapping (NEW LIST ONLY)
preference_order = [
    'Low Discount with NCB',
    'Low Vehicle IDV',
    'High Own-Damage Premium',
    'High Third-Party Premium',
    'High Add-On Premium',
    'Claims Happened',
    'Young Vehicle Age',
    'Tie Up with Non-OEM',
    'Tie Up with HYUNDAI',
    'Tie Up with MIBL OEM',
    'Tie Up with MARUTI',
    'Low No Claim Bonus Percentage',
    'Multiple Claims on Record',
    'Old Vehicle Age',
    'Minimal Policies Purchased',
    'Policy Tenure Exceeds 2 Years'
]

# Create preference dictionary
preference_dict = {reason: i for i, reason in enumerate(preference_order, 1)}

# Load new prediction data
df = pd.read_csv('GBM1_predictions_JFMAMJ(Final)_segment_reasons.csv')
df['Not Renewed Reasons'] = df['Not Renewed Reasons'].fillna('').astype(str)

# Define main reason extraction logic
def get_main_reason(reason_string):
    if not reason_string.strip():
        return None

    reasons = [r.strip() for r in reason_string.split(',')]

    reason_tuples = []
    for r in reasons:
        pref = preference_dict.get(r, float('inf'))
        rank = rank_dict.get(r, float('inf'))
        reason_tuples.append((r, pref, rank))

    # Sort by preference first, then rank
    reason_tuples.sort(key=lambda x: (x[1], x[2]))

    top_reason = reason_tuples[0][0]
    return top_reason

# Apply to dataset
df['Main Reason'] = df['Not Renewed Reasons'].apply(get_main_reason)

# Save final output
df.to_csv('GBM1_predictions_JFMAMJ(Final)_main_reason.csv', index=False)

# Show a sample of the output
print(df[['Not Renewed Reasons', 'Main Reason']].head(10))

                                 Not Renewed Reasons              Main Reason
0  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...          Low Vehicle IDV
1  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...          Low Vehicle IDV
2  Old Vehicle Age, Low Vehicle IDV, Low No Claim...    Low Discount with NCB
3  Old Vehicle Age, Low Vehicle IDV, Low No Claim...          Low Vehicle IDV
4  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...          Low Vehicle IDV
5  Old Vehicle Age, Low Vehicle IDV, Low No Claim...          Low Vehicle IDV
6  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...          Low Vehicle IDV
7  Old Vehicle Age, Low Vehicle IDV, Tie Up with ...          Low Vehicle IDV
8  High Own-Damage Premium, Low No Claim Bonus Pe...  High Own-Damage Premium
9  Low Vehicle IDV, Low Discount with NCB, Minima...    Low Discount with NCB


In [1]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'Liberty',
    'user': 'postgres',
    'password': 'abc',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Load historical data & calculate rank
query = 'SELECT * FROM policydata_with_fb_cc_pc_newfea_opti_correct_with_reasons;'
df_hist = pd.read_sql(query, con=engine)
df_hist['Not Renewed Reasons'] = df_hist['Not Renewed Reasons'].dropna().astype(str)

all_reasons = df_hist['Not Renewed Reasons'].str.split(',').explode().str.strip()
reason_counts = all_reasons.value_counts().reset_index()
reason_counts.columns = ['Reason', 'Count']
reason_counts['Rank'] = reason_counts['Count'].rank(method='dense', ascending=False).astype(int)
reason_counts = reason_counts.sort_values(by='Rank')

# Build rank dictionary
rank_dict = reason_counts.set_index('Reason')['Rank'].to_dict()

# Updated business preference mapping (NEW LIST ONLY)
preference_order = [
    'Low Discount with NCB',
    'Low Vehicle IDV',
    'High Own-Damage Premium',
    'High Third-Party Premium',
    'High Add-On Premium',
    'Claims Happened',
    'Young Vehicle Age',
    'Tie Up with Non-OEM',
    'Tie Up with HYUNDAI',
    'Tie Up with MIBL OEM',
    'Tie Up with MARUTI',
    'Low No Claim Bonus Percentage',
    'Multiple Claims on Record',
    'Old Vehicle Age',
    'Minimal Policies Purchased',
    'Policy Tenure Exceeds 2 Years'
]

# Create preference dictionary
preference_dict = {reason: i for i, reason in enumerate(preference_order, 1)}

# Load new prediction data
df = pd.read_csv('GBM1_predictions_JASOND(Final)_mainreasons.csv')
df['Not Renewed Reasons'] = df['Not Renewed Reasons'].fillna('').astype(str)

# Define main reason extraction logic
def get_main_reason(reason_string):
    if not reason_string.strip():
        return None

    reasons = [r.strip() for r in reason_string.split(',')]

    reason_tuples = []
    for r in reasons:
        pref = preference_dict.get(r, float('inf'))
        rank = rank_dict.get(r, float('inf'))
        reason_tuples.append((r, pref, rank))

    # Sort by preference first, then rank
    reason_tuples.sort(key=lambda x: (x[1], x[2]))

    top_reason = reason_tuples[0][0]
    return top_reason

# Apply to dataset
df['Main Reason'] = df['Not Renewed Reasons'].apply(get_main_reason)

# Save final output
df.to_csv('GBM1_predictions_JASOND(Final)_main_reason.csv', index=False)

# Show a sample of the output
print(df[['Not Renewed Reasons', 'Main Reason']].head(10))

                                 Not Renewed Reasons              Main Reason
0  Old Vehicle Age, Low Vehicle IDV, Low No Claim...    Low Discount with NCB
1  Old Vehicle Age, Low Vehicle IDV, Tie Up with ...          Low Vehicle IDV
2  Old Vehicle Age, Low Vehicle IDV, Low No Claim...    Low Discount with NCB
3    Minimal Policies Purchased, Tie Up with Non-OEM      Tie Up with Non-OEM
4  High Own-Damage Premium, Low Discount with NCB...    Low Discount with NCB
5  Old Vehicle Age, Low Vehicle IDV, Tie Up with ...          Low Vehicle IDV
6  Old Vehicle Age, Low Vehicle IDV, Tie Up with ...          Low Vehicle IDV
7               Old Vehicle Age, Tie Up with Non-OEM      Tie Up with Non-OEM
8  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...          Low Vehicle IDV
9  High Own-Damage Premium, Minimal Policies Purc...  High Own-Damage Premium
