In [None]:
import pandas as pd
from sqlalchemy import create_engine

import pandas as pd
from sqlalchemy import create_engine
from urllib.parse import quote_plus  

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'Liberty',
    'user': 'postgres',
    'password': 'admin@pro',
    'port': '5432'
}

# Safely encode password for URL
encoded_password = quote_plus(db_config['password'])

# Use encoded password in the connection string
connection_string = f"postgresql://{db_config['user']}:{encoded_password}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Load historical data & calculate rank
query = 'SELECT * FROM policydata_with_fb_cc_pc_newfea_opti_correct_with_reasons;'
df_hist = pd.read_sql(query, con=engine)
df_hist['Not Renewed Reasons'] = df_hist['Not Renewed Reasons'].dropna().astype(str)

all_reasons = df_hist['Not Renewed Reasons'].str.split(',').explode().str.strip()
reason_counts = all_reasons.value_counts().reset_index()
reason_counts.columns = ['Reason', 'Count']
reason_counts['Rank'] = reason_counts['Count'].rank(method='dense', ascending=False).astype(int)
reason_counts = reason_counts.sort_values(by='Rank')

# Build rank dictionary
rank_dict = reason_counts.set_index('Reason')['Rank'].to_dict()

# Updated business preference mapping
preference_order = [
    'Low Discount with NCB',
    'Low Vehicle IDV',
    'High Own-Damage Premium',
    'High Third-Party Premium',
    'High Add-On Premium',
    'Claims Happened',
    'Young Vehicle Age',
    'Tie Up with Non-OEM',
    'Tie Up with HYUNDAI',
    'Tie Up with MIBL OEM',
    'Tie Up with MARUTI',
    'Low No Claim Bonus Percentage',
    'Multiple Claims on Record',
    'Old Vehicle Age',
    'Minimal Policies Purchased',
    'Policy Tenure Exceeds 2 Years'
]

# Create preference dictionary
preference_dict = {reason: i for i, reason in enumerate(preference_order, 1)}

# Load new prediction data
df = pd.read_csv('GBM1_predictions_JFMAMJ(Final)_segment_reasons.csv')
df['Not Renewed Reasons'] = df['Not Renewed Reasons'].fillna('').astype(str)

# Define top-3 reason extraction logic
def get_top_3_reasons(reason_string):
    if not reason_string.strip():
        return None

    reasons = [r.strip() for r in reason_string.split(',')]

    reason_tuples = []
    for r in reasons:
        pref = preference_dict.get(r, float('inf'))
        rank = rank_dict.get(r, float('inf'))
        reason_tuples.append((r, pref, rank))

    # Sort by preference first, then rank
    reason_tuples.sort(key=lambda x: (x[1], x[2]))

    top_reasons = [t[0] for t in reason_tuples[:3]]
    
    if len(top_reasons) == 1:
        return top_reasons[0]
    elif len(top_reasons) == 2:
        return f"{top_reasons[0]} and {top_reasons[1]}"
    else:
        return f"{top_reasons[0]}, {top_reasons[1]} and {top_reasons[2]}"

# Apply to dataset
df['Top 3 Reasons'] = df['Not Renewed Reasons'].apply(get_top_3_reasons)

# Save final output
df.to_csv('GBM1_predictions_JFMAMJ(Final)_top3_reasons.csv', index=False)

# Show a sample of the output
print(df[['Not Renewed Reasons', 'Top 3 Reasons']].head(10))

                                 Not Renewed Reasons  \
0  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...   
1  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...   
2  Old Vehicle Age, Low Vehicle IDV, Low No Claim...   
3  Old Vehicle Age, Low Vehicle IDV, Low No Claim...   
4  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...   
5  Old Vehicle Age, Low Vehicle IDV, Low No Claim...   
6  Old Vehicle Age, Low Vehicle IDV, Minimal Poli...   
7  Old Vehicle Age, Low Vehicle IDV, Tie Up with ...   
8  High Own-Damage Premium, Low No Claim Bonus Pe...   
9  Low Vehicle IDV, Low Discount with NCB, Minima...   

                                       Top 3 Reasons  
0  Low Vehicle IDV, Tie Up with Non-OEM and Old V...  
1  Low Vehicle IDV, Tie Up with Non-OEM and Old V...  
2  Low Discount with NCB, Low Vehicle IDV and Tie...  
3  Low Vehicle IDV, Tie Up with Non-OEM and Low N...  
4  Low Vehicle IDV, Tie Up with Non-OEM and Old V...  
5  Low Vehicle IDV, Tie Up with Non-OEM and Low N... 