# ðŸŽ¯ Hybrid Credit Score Generator



> ### Hybrid Score = (0.6 Ã— Rule-Based Credit Score) + (0.4 Ã— MLSignal-Based Credit Score)


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load rule-based scores
df_rule = pd.read_csv('../../../3. Data/5*. Hybrid_Data/feature_with_rule_score.csv')

# Load ML signal scores
df_ml = pd.read_csv('../../../3. Data/5*. Hybrid_Data/feature_with_ml_signal_score.csv')

### ðŸŽ² Noise Function for ML Training

Add realistic variation to hybrid scores using adaptive Gaussian noise

In [3]:
def add_hybrid_noise(hybrid_score: float, score_tier: str = 'auto') -> int:
    # Auto-determine tier if not specified
    if score_tier == 'auto':
        if hybrid_score >= 700:
            score_tier = 'high'
        elif hybrid_score >= 550:
            score_tier = 'medium'
        else:
            score_tier = 'low'
    
    # Adaptive noise based on score tier
    noise_params = {
        'high': 8,      # Very stable - high confidence
        'medium': 18,   # Moderate uncertainty - borderline cases
        'low': 12       # Bounded instability - risky
    }
    
    sigma = noise_params.get(score_tier, 15)
    
    # Generate Gaussian noise
    noise = np.random.normal(loc=0, scale=sigma)
    noisy_score = hybrid_score + noise
    
    # Clamp to valid range
    return int(np.clip(noisy_score, 300, 900))

### ðŸ”¢ Calculate Hybrid Credit Scores

In [4]:
print("Calculating hybrid credit scores...\n")

# Merge dataframes (use rule-based as base, add ML signal score)
df_hybrid = df_rule.copy()
df_hybrid['MLSignalBasedCreditScore'] = df_ml['MLSignalBasedCreditScore']
df_hybrid['PD'] = df_ml['PD']
df_hybrid['anomaly_score'] = df_ml['anomaly_score']
df_hybrid['prob_high'] = df_ml['prob_high']
df_hybrid['prob_medium'] = df_ml['prob_medium']
df_hybrid['prob_low'] = df_ml['prob_low']

# Calculate base hybrid score (60% rule + 40% ML)
df_hybrid['hybridScoreBase'] = (
    0.6 * df_hybrid['RuleBasedCreditScore'] + 
    0.4 * df_hybrid['MLSignalBasedCreditScore']
)

# Add noise to hybrid score
hybrid_scores_noisy = []
for score in df_hybrid['hybridScoreBase']:
    noisy_score = add_hybrid_noise(score)
    hybrid_scores_noisy.append(noisy_score)

df_hybrid['HybridCreditScore'] = hybrid_scores_noisy

print(f"\nScore Statistics:")
print(df_hybrid[['RuleBasedCreditScore', 'MLSignalBasedCreditScore', 'HybridCreditScore']].describe())

Calculating hybrid credit scores...


Score Statistics:
       RuleBasedCreditScore  MLSignalBasedCreditScore  HybridCreditScore
count          30000.000000              30000.000000       30000.000000
mean             586.462533                391.376233         508.068667
std              150.720055                 86.067903         115.671752
min              300.000000                300.000000         300.000000
25%              476.000000                307.000000         415.000000
50%              606.000000                352.000000         518.000000
75%              707.000000                481.000000         606.000000
max              887.000000                581.000000         761.000000


In [5]:
# Select final columns
output_columns = [
    # Original features
    'avgMonthlyIncome', 'incomeCV', 'expenseRatio', 'emiRatio',
    'avgMonthlyBalance', 'bounceCount', 'accountAgeMonths',
    # ML predictions
    'PD', 'anomaly_score', 'prob_high', 'prob_medium', 'prob_low',
    # Scores
    'RuleBasedCreditScore', 'MLSignalBasedCreditScore', 'HybridCreditScore'
]

# Create output dataframe
df_output = df_hybrid[output_columns].copy()

# Save to CSV
output_file = '../../../3. Data/5*. Hybrid_Data/*Hybrid_Credit_Scores.csv'
df_output.to_csv(output_file, index=False)

print(f"âœ… Hybrid credit scores saved!")
print(f"\nSample data:")
print(df_output[['RuleBasedCreditScore', 'MLSignalBasedCreditScore', 'HybridCreditScore']].describe())

âœ… Hybrid credit scores saved!

Sample data:
       RuleBasedCreditScore  MLSignalBasedCreditScore  HybridCreditScore
count          30000.000000              30000.000000       30000.000000
mean             586.462533                391.376233         508.068667
std              150.720055                 86.067903         115.671752
min              300.000000                300.000000         300.000000
25%              476.000000                307.000000         415.000000
50%              606.000000                352.000000         518.000000
75%              707.000000                481.000000         606.000000
max              887.000000                581.000000         761.000000
