In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_pickle('../../data/contrats.pkl')
df_clients = pd.read_pickle('../../data/clients_phy.pkl')

Step 1: Aggregate Client-Level Metrics

In [4]:
# Group by client and calculate key metrics
client_metrics = df.groupby('REF_PERSONNE').agg(
    # Volume and Diversity Metrics
    total_contracts=('NUM_CONTRAT', 'count'),
    active_contracts=('LIB_ETAT_CONTRAT', lambda x: (x == 'EN COURS').sum()),
    product_variety=('LIB_PRODUIT', 'nunique'),
    branch_variety=('branche', 'nunique'),
    
    # Financial Value Metrics
    total_premiums_paid=('somme_quittances', 'sum'),
    avg_premium_per_contract=('somme_quittances', 'mean'),
    max_premium=('somme_quittances', 'max'),
    total_capital_assured=('Capital_assure', 'sum'),
    avg_capital_per_contract=('Capital_assure', 'mean'),
    
    # Payment Behavior Metrics
    paid_ratio=('statut_paiement', lambda x: (x == 'Payé').mean()),
    total_paid_contracts=('statut_paiement', lambda x: (x == 'Payé').sum()),
    total_unpaid_contracts=('statut_paiement', lambda x: (x == 'Non payé').sum()),
    
    # Contract Status Metrics
    expired_contracts=('LIB_ETAT_CONTRAT', lambda x: (x == 'EXPIRE').sum()),
    canceled_contracts=('LIB_ETAT_CONTRAT', lambda x: (x == 'RESILIE').sum()),
    active_ratio=('LIB_ETAT_CONTRAT', lambda x: (x == 'EN COURS').mean())
).reset_index()

# Create derived metrics
client_metrics['premium_per_contract_ratio'] = client_metrics['max_premium'] / client_metrics['avg_premium_per_contract'].clip(lower=1)
client_metrics['capital_premium_ratio'] = client_metrics['total_capital_assured'] / client_metrics['total_premiums_paid'].clip(lower=1)
client_metrics['product_density'] = client_metrics['product_variety'] / client_metrics['total_contracts'].clip(lower=1)

Step 2: Create Scoring Components

Component 1: Loyalty & Engagement Score (35%)

In [5]:
client_metrics['loyalty_score'] = (
    # Contract volume (30%)
    (client_metrics['total_contracts'] / client_metrics['total_contracts'].max() * 30) +
    
    # Product diversity (25%)
    (client_metrics['product_variety'] / client_metrics['product_variety'].max() * 25) +
    
    # Branch diversity (20%)
    (client_metrics['branch_variety'] / client_metrics['branch_variety'].max() * 20) +
    
    # Active engagement (25%)
    (client_metrics['active_contracts'] / client_metrics['total_contracts'].clip(lower=1) * 25)
)

Component 2: Financial Value Score (40%)

In [6]:
client_metrics['financial_score'] = (
    # Total revenue generated (35%)
    (client_metrics['total_premiums_paid'] / client_metrics['total_premiums_paid'].max() * 35) +
    
    # Average contract value (25%)
    (client_metrics['avg_premium_per_contract'] / client_metrics['avg_premium_per_contract'].max() * 25) +
    
    # Capital exposure (20%)
    (client_metrics['total_capital_assured'] / client_metrics['total_capital_assured'].max() * 20) +
    
    # Value consistency (20%)
    (1 / client_metrics['premium_per_contract_ratio'] * 20)  # Lower ratio = more consistent spending
)

Component 3: Payment Behaviour Score(25%)

In [7]:
client_metrics['payment_score'] = (
    # Payment reliability (40%)
    (client_metrics['paid_ratio'] * 40) +
    
    # Contract maintenance (30%)
    ((1 - client_metrics['canceled_contracts'] / client_metrics['total_contracts'].clip(lower=1)) * 30) +
    
    # Payment concentration (30%)
    (client_metrics['total_paid_contracts'] / client_metrics['total_contracts'].clip(lower=1) * 30)
)

Normalize All scores to 0-100 Scale

In [8]:
for score_col in ['loyalty_score', 'financial_score', 'payment_score']:
    client_metrics[score_col] = (
        (client_metrics[score_col] - client_metrics[score_col].min()) / 
        (client_metrics[score_col].max() - client_metrics[score_col].min()) * 100
    ).fillna(0)  # Fill NaN with 0 for clients with no contracts

Step 3: Creating Final Client Score and segments

In [9]:
# Calculate weighted final score
client_metrics['final_client_score'] = (
    client_metrics['loyalty_score'] * 0.35 +
    client_metrics['financial_score'] * 0.40 + 
    client_metrics['payment_score'] * 0.25
)

# Ensure score is between 0-100
client_metrics['final_client_score'] = client_metrics['final_client_score'].clip(0, 100)

# Create client segments
def segment_clients(score):
    if score >= 85:
        return 'Premium'
    elif score >= 70:
        return 'Gold'
    elif score >= 50:
        return 'Silver'
    elif score >= 30:
        return 'Bronze'
    else:
        return 'Prospect'

client_metrics['client_segment'] = client_metrics['final_client_score'].apply(segment_clients)

# Create risk assessment
client_metrics['risk_profile'] = client_metrics['payment_score'].apply(
    lambda x: 'Low Risk' if x >= 80 else 'Medium Risk' if x >= 50 else 'High Risk'
)

Step 4: Final Client Scoring Table

In [10]:
# Merge with client profile data
final_scoring_table = pd.merge(
    df_clients[['REF_PERSONNE', 'PROFESSION_GROUP', 'SECTEUR_ACTIVITE_GROUP', 'AGE', 'SITUATION_FAMILIALE']],
    client_metrics,
    on='REF_PERSONNE',
    how='left'
)

# Fill NaN values for clients without contracts
score_columns = ['loyalty_score', 'financial_score', 'payment_score', 'final_client_score']
for col in score_columns:
    final_scoring_table[col] = final_scoring_table[col].fillna(0)
final_scoring_table['client_segment'] = final_scoring_table['client_segment'].fillna('Prospect')
final_scoring_table['risk_profile'] = final_scoring_table['risk_profile'].fillna('High Risk')

# Select final columns for the scoring table
client_scoring = final_scoring_table[[
    'REF_PERSONNE', 'PROFESSION_GROUP', 'SECTEUR_ACTIVITE_GROUP', 'AGE', 'SITUATION_FAMILIALE',
    'total_contracts', 'active_contracts', 'product_variety',
    'total_premiums_paid', 'avg_premium_per_contract', 'total_capital_assured',
    'paid_ratio', 'total_paid_contracts', 'total_unpaid_contracts',
    'loyalty_score', 'financial_score', 'payment_score',
    'final_client_score', 'client_segment', 'risk_profile'
]].sort_values('final_client_score', ascending=False)

# Add performance metrics
print("Client Scoring Distribution:")
print(client_scoring['client_segment'].value_counts())
print(f"\nAverage Client Score: {client_scoring['final_client_score'].mean():.2f}")
print(f"Conversion Potential: {(client_scoring['client_segment'].isin(['Bronze', 'Prospect'])).sum()} clients need activation")

Client Scoring Distribution:
client_segment
Prospect    34304
Bronze       7761
Name: count, dtype: int64

Average Client Score: 23.61
Conversion Potential: 42065 clients need activation


In [11]:
client_scoring.shape

(42065, 20)

In [12]:
client_scoring.columns

Index(['REF_PERSONNE', 'PROFESSION_GROUP', 'SECTEUR_ACTIVITE_GROUP', 'AGE',
       'SITUATION_FAMILIALE', 'total_contracts', 'active_contracts',
       'product_variety', 'total_premiums_paid', 'avg_premium_per_contract',
       'total_capital_assured', 'paid_ratio', 'total_paid_contracts',
       'total_unpaid_contracts', 'loyalty_score', 'financial_score',
       'payment_score', 'final_client_score', 'client_segment',
       'risk_profile'],
      dtype='object')

exporting the data

In [13]:
client_scoring.to_pickle('../../data/client_phy_scoring.pkl')