In [None]:
import pandas as pd
import numpy as np

# --- 1. Load Data ---
df = pd.read_csv('/content/loan_collection_with_risk_score.csv')

print("--- Initial Data with Risk Scores ---")
print(df)
print("\n" + "="*50 + "\n")

# --- 2. Dynamic Thresholds ---
sentiment_low = df['SentimentScore'].quantile(0.25)
sentiment_high = df['SentimentScore'].quantile(0.75)
complaints_high = df['Complaints'].quantile(0.75)
interaction_high = df['InteractionAttempts'].quantile(0.75)
response_high = df['ResponseTimeHours'].quantile(0.75)
website_visits_high = df['WebsiteVisits'].quantile(0.75)

def assign_persona(row):
    if row['SentimentScore'] < sentiment_low or row['Complaints'] >= complaints_high:
        return 'Aggressive'
    elif row['SentimentScore'] > sentiment_high and row['InteractionAttempts'] <= df['InteractionAttempts'].median():
        return 'Cooperative'
    elif row['InteractionAttempts'] > interaction_high and row['ResponseTimeHours'] > response_high:
        return 'Evasive'
    elif row['WebsiteVisits'] > website_visits_high:
        return 'Confused'
    else:
        return 'Standard'

high_risk_threshold = df['Risk Score'].quantile(0.75)
medium_risk_threshold = df['Risk Score'].quantile(0.4)

def assign_risk_segment(risk_score):
    if risk_score > high_risk_threshold:
        return 'High Risk'
    elif risk_score > medium_risk_threshold:
        return 'Medium Risk'
    else:
        return 'Low Risk'

# --- Apply Segmentation ---
df['Persona'] = df.apply(assign_persona, axis=1)
df['RiskSegment'] = df['Risk Score'].apply(assign_risk_segment)

# --- 3. Recommendation Matrix ---
strategy_matrix = {
    'High Risk': {
        'Aggressive': 'Senior Escalation & Pause Automated Contact',
        'Cooperative': 'Empathetic Call from Human Agent to Offer Solutions also for settlement option',
        'Evasive': 'Assertive Human Follow-up & Multiple Channels',
        'Confused': 'Problem-Solving Call from Human Agent',
        'Standard': 'Firm Human Call to Discuss Payment'
    },
    'Medium Risk': {
        'Aggressive': 'Immediate Handoff to Human Agent',
        'Cooperative': 'Chatbot with Flexible Payment Options with loan restructuring',
        'Evasive': 'Automated Follow-up via SMS/Email',
        'Confused': 'Ask Chatbot to help with Step-by-Step Payment Guide',
        'Standard': 'Assertive Chatbot Reminder'
    },
    'Low Risk': {
        'Aggressive': 'Formal but Automated Notice',
        'Cooperative': 'Gentle Chatbot Nudge and early repayment option with discount etc',
        'Evasive': 'Proactive Chatbot Nudge with Direct Payment Link',
        'Confused': 'Informative Chatbot to Explain Bill',
        'Standard': 'Standard Automated Reminder'
    }
}

def get_recommendation(row):
    return strategy_matrix[row['RiskSegment']][row['Persona']]

df['RecommendedStrategy'] = df.apply(get_recommendation, axis=1)

# --- 4. Combined Master Dataset ---
master_dataset = df.copy()
print("--- Master Dataset with Segmentation and Strategy ---")
print(master_dataset)

# Save master dataset
master_dataset.to_csv('all_customers_segmented.csv', index=False)
print("\nSaved master dataset to all_customers_segmented.csv")

# --- 5. Persona-wise Subdatasets ---
persona_datasets = {}
for persona in master_dataset['Persona'].unique():
    persona_df = master_dataset[master_dataset['Persona'] == persona].copy()
    persona_datasets[persona] = persona_df
    persona_df.to_csv(f"{persona.lower()}_customers.csv", index=False)
    print(f"Saved '{persona}' customer dataset to {persona.lower()}_customers.csv")

print("\nProcessing complete. Master dataset + persona-wise files generated.")


--- Initial Data with Risk Scores ---
     CustomerID  Age   Income  LoanAmount  TenureMonths  InterestRate  \
0      CUST0001   59  1121522      985115             7         13.60   
1      CUST0002   49  1784166      717304            34         14.38   
2      CUST0003   35  1053938      889319             9          9.61   
3      CUST0004   63  1814021      655730             6          9.20   
4      CUST0005   28  1827781       99847            24         12.87   
...         ...  ...      ...         ...           ...           ...   
1995   CUST1996   52   385281      763325            39         11.91   
1996   CUST1997   21  1102506      565998            36         11.85   
1997   CUST1998   35  1737381      480450            36         14.14   
1998   CUST1999   61  1159766       96410            25          7.22   
1999   CUST2000   61   827391       98027            25         15.31   

      MissedPayments  DelaysDays  PartialPayments  InteractionAttempts  ...  \
0     