In [None]:
import pandas as pd

df = pd.read_csv('train_storming_round.csv')

agent_performance = df.groupby('agent_code')['new_policy_count'].sum().reset_index()


In [None]:
agent_metrics = df.groupby('agent_code').agg({
    'new_policy_count': ['sum', 'mean', 'std', 'max'],    # Sum, mean, std, max for policies sold
    'unique_proposal': 'sum',                          # Sum of unique proposals
    'unique_customers': 'sum',                          # Sum of unique customers
    'unique_quotations': 'sum'                          # Sum of unique quotations
}).reset_index()


agent_metrics.columns = [
    'Agent_ID', 'Total_Policies_Sold', 'Avg_Policies_Sold', 'Std_Policies_Sold', 'Max_Policies_Sold',
    'Total_Unique_Proposals', 'Total_Unique_Customers', 'Total_Unique_Quotations'
]


print(agent_metrics.head())

   Agent_ID  Total_Policies_Sold  Avg_Policies_Sold  Std_Policies_Sold  \
0  003c1999                  448              22.40           9.996842   
1  00d5bda3                  405              20.25          13.329331   
2  012a3553                  362              18.10          10.622221   
3  012f0e15                  393              19.65          10.100938   
4  0177ed5f                  425              21.25           7.383303   

   Max_Policies_Sold  Total_Unique_Proposals  Total_Unique_Customers  \
0                 35                     379                     318   
1                 37                     395                     307   
2                 34                     329                     315   
3                 37                     359                     311   
4                 34                     331                     303   

   Total_Unique_Quotations  
0                      298  
1                      275  
2                      278  
3     

Avg_Policies_Sold           0
Std_Policies_Sold          13
Max_Policies_Sold           0
Total_Unique_Proposals      0
Total_Unique_Customers      0
Total_Unique_Quotations     0
dtype: int64


In [None]:
from sklearn.cluster import KMeans
import pandas as pd




X = agent_metrics[['Avg_Policies_Sold', 'Std_Policies_Sold', 'Max_Policies_Sold', 
                   'Total_Unique_Proposals', 'Total_Unique_Customers', 'Total_Unique_Quotations']]

X = X.fillna(0)
# Applying KMeans clustering (assuming 3 clusters: High, Medium, Low)
kmeans = KMeans(n_clusters=3, random_state=42)
agent_metrics['Cluster'] = kmeans.fit_predict(X)


print(agent_metrics[['Agent_ID', 'Cluster']])


     Agent_ID  Cluster
0    003c1999        0
1    00d5bda3        0
2    012a3553        0
3    012f0e15        0
4    0177ed5f        0
..        ...      ...
900  ff8edcc9        0
901  ff955d18        1
902  ffca3177        0
903  ffe6c9e9        0
904  fffb525d        0

[905 rows x 2 columns]


In [15]:
import numpy as np

# Attach cluster centers and sort by Avg Policies
centers = kmeans.cluster_centers_
sorted_indices = np.argsort(centers[:, 0])  # Assuming Avg_Policies_Sold is the first feature

# Map cluster index to labels
cluster_labels = {sorted_indices[0]: 'Low', sorted_indices[1]: 'Medium', sorted_indices[2]: 'High'}
agent_metrics['Performance_Level'] = agent_metrics['Cluster'].map(cluster_labels)

# Final labeled data
print(agent_metrics[['Agent_ID', 'Performance_Level']].head())

agent_metrics.to_csv("clustered.csv",index=False)


   Agent_ID Performance_Level
0  003c1999            Medium
1  00d5bda3            Medium
2  012a3553            Medium
3  012f0e15            Medium
4  0177ed5f            Medium


In [16]:
agent_metrics.to_csv("agent_metrics.csv",index=False)