# Rule-Based Credit Score Calculator

**Score Range:** 300 - 900  

## Sectors:
1. Income Strength
2. Expense Discipline
3. EMI Burden
4. Liquidity Buffer
5. Bounce Discipline
6. Account Vintage

In [109]:
import pandas as pd
import numpy as np
from typing import Dict, Any
from dataclasses import dataclass
from decimal import Decimal

## BankAnalysisResult Data Model

This class represents the analyzed features from bank transactions.

In [110]:
@dataclass
class BankAnalysisResult:
    avg_monthly_income: float
    income_cv: float  # Coefficient of variation
    expense_ratio: float
    emi_ratio: float
    avg_monthly_balance: float
    bounce_count: int
    account_age_months: int

In [111]:
class RuleBasedCreditScoreService:
    BASE_SCORE = 500
    MIN_SCORE = 300
    MAX_SCORE = 900
    
    def __init__(self):
        self.score_breakdown = {}
    
    def calculate(self, result: BankAnalysisResult) -> int:
        """Calculate final credit score from bank analysis"""
        score = self.BASE_SCORE
        
        # Calculate individual sector scores
        income_pts = self._income_score(result)
        stability_pts = self._stability_score(result)
        expense_pts = self._expense_score(result)
        emi_pts = self._emi_score(result)
        balance_pts = self._balance_score(result)
        bounce_pts = self._bounce_score(result)
        vintage_pts = self._vintage_score(result)
        
        # Store breakdown for explanation
        self.score_breakdown = {
            'base': self.BASE_SCORE,
            'income_strength': income_pts,
            'income_stability': stability_pts,
            'expense_discipline': expense_pts,
            'emi_burden': emi_pts,
            'liquidity_buffer': balance_pts,
            'bounce_discipline': bounce_pts,
            'account_vintage': vintage_pts
        }
        
        # Sum all components
        score += income_pts
        score += stability_pts
        score += expense_pts
        score += emi_pts
        score += balance_pts
        score += bounce_pts
        score += vintage_pts
        
        return self._clamp(score)
    
    # ========== Sector 1: Income Strength ==========
    def _income_score(self, r: BankAnalysisResult) -> int:
        """Score based on average monthly income level"""
        income = r.avg_monthly_income
        
        if income >= 150_000:
            return 90
        elif income >= 100_000:
            return 25
        elif income >= 60_000:
            return 15
        else:
            return -100
    
    # ========== Sector 1B: Income Stability ==========
    def _stability_score(self, r: BankAnalysisResult) -> int:
        """Score based on income coefficient of variation (CV)"""
        cv = r.income_cv
        
        if cv < 0.10:
            return 15
        elif cv < 0.20:
            return 15
        else:
            return -60
    
    # ========== Sector 2: Expense Discipline ==========
    def _expense_score(self, r: BankAnalysisResult) -> int:
        """Score based on expense to income ratio"""
        ratio = r.expense_ratio
        
        if ratio <= 0.50:
            return 25
        elif ratio <= 0.65:
            return 15
        elif ratio <= 0.80:
            return -10
        else:
            return -100
    
    # ========== Sector 3: EMI Burden ==========
    def _emi_score(self, r: BankAnalysisResult) -> int:
        """Score based on EMI to income ratio"""
        ratio = r.emi_ratio
        
        if ratio <= 0.30:
            return 50
        elif ratio <= 0.40:
            return 15
        elif ratio <= 0.50:
            return -60
        else:
            return -100
    
    # ========== Sector 4: Liquidity Buffer ==========
    def _balance_score(self, r: BankAnalysisResult) -> int:
        """Score based on average monthly balance"""
        balance = r.avg_monthly_balance
        
        if balance >= 100_000:
            return 25
        elif balance >= 50_000:
            return 15
        elif balance >= 20_000:
            return -10
        else:
            return -100
    
    # ========== Sector 5: Bounce Discipline ==========
    def _bounce_score(self, r: BankAnalysisResult) -> int:
        """Score based on number of payment bounces"""
        bounces = r.bounce_count
        
        if bounces == 0:
            return 70
        elif bounces == 1:
            return 15
        elif bounces <= 3:
            return -80
        else:
            return -150
    
    # ========== Sector 6: Account Vintage ==========
    def _vintage_score(self, r: BankAnalysisResult) -> int:
        """Score based on account age in months"""
        months = r.account_age_months
        
        if months >= 60:
            return 25
        elif months >= 36:
            return 15
        elif months >= 12:
            return 15
        else:
            return -60
    
    
    # ========== Score Clamping ==========
    def _clamp(self, score: int) -> int:
        """Ensure final score is within valid range"""
        if score < self.MIN_SCORE:
            return self.MIN_SCORE
        elif score > self.MAX_SCORE:
            return self.MAX_SCORE
        else:
            return score
    
    def get_breakdown(self) -> Dict[str, int]:
        """Get detailed score breakdown"""
        return self.score_breakdown.copy()
    
    def explain_score(self) -> pd.DataFrame:
        """Return score breakdown as a DataFrame for visualization"""
        if not self.score_breakdown:
            return pd.DataFrame()
        
        df = pd.DataFrame([
            {'sector': k, 'points': v} 
            for k, v in self.score_breakdown.items()
        ])
        df['cumulative'] = df['points'].cumsum()
        return df

### üé≤ Noise Function for ML Training


### Normal Distribution (Gaussian Noise)

We use a **normal distribution** because:

```
    œÉ = Standard Deviation (Controls spread)
    
      High Score (750+)  ‚Üí  Small œÉ (10)  = Very Stable
      Medium Score (600-750) ‚Üí  Large œÉ (25)  = More Uncertain  
      Low Score (<600)   ‚Üí  Medium œÉ (15) = Risky but Bounded
```

In [112]:
def add_industry_noise(rule_score: int) -> int:
    
    # Adaptive standard deviation based on score tier
    if rule_score >= 750:
        sigma = 10   # Very stable - high credit quality
    elif rule_score >= 600:
        sigma = 25   # Medium uncertainty - borderline cases
    else:
        sigma = 15   # Risky but bounded - low credit
    
    # Generate Gaussian noise
    noise = np.random.normal(loc=0, scale=sigma)
    noisy_score = rule_score + noise
    
    # Clamp to valid range
    return int(np.clip(noisy_score, 300, 900))

In [113]:
def feature_aware_noise(rule_score: int, emi_ratio: float, bounce_count: int) -> int:
    
    base_sigma = 20
    
    # Increase uncertainty for risky behaviors
    if emi_ratio > 0.45:
        base_sigma += 10  # Over-leveraged customers are unpredictable
    
    if bounce_count >= 2:
        base_sigma += 15  # Payment history instability
    
    noise = np.random.normal(0, base_sigma)
    return int(np.clip(rule_score + noise, 300, 900))

### üè≠ Calculate Score with Noise

In [114]:
def calculate_noisy_credit_score(bank_analysis_result: BankAnalysisResult) -> int:
    # Step 1: Calculate base rule-based score
    scorer = RuleBasedCreditScoreService()
    rule_score = scorer.calculate(bank_analysis_result)
    
    # Step 2: Apply both noise functions
    # First apply tier-based noise
    noisy_score = add_industry_noise(rule_score)
    
    # Then apply feature-aware noise for additional realism
    noisy_score = feature_aware_noise(
        noisy_score,
        bank_analysis_result.emi_ratio,
        bank_analysis_result.bounce_count
    )
    
    return noisy_score

---

## üè≠ Rule-Based Credit Score for Our Dataset

**Input CSV:**
- `feature_only.csv` ‚Äì Feature-only dataset

**Output CSV:**
- `feature_with_rule_score.csv` ‚Äì Feature with Rule-Based Credit Score



### üîÅ Process Flow

| avgMonthlyIncome | incomeCV | expenseRatio | emiRatio | avgMonthlyBalance | bounceCount | accountAgeMonths |
|-----------------|----------|--------------|----------|------------------|-------------|-----------------|
| 120000 | 0.12 | 0.55 | 0.28 | 75000 | 1 | 48 |

&nbsp;

<div align="center">

**‚ñº**

**Rule-Based Credit Score Engine**

**‚ñº**

</div>

&nbsp;

<div align="center">

| ... | RuleBasedCreditScore |
|-----|-----------|
| ... | 710 |

</div>


In [115]:
# Load feature_only.csv
df = pd.read_csv('data/synthetic/features_only.csv')

# Calculate rule-based scores for all rows
rule_scores = []
for idx, row in df.iterrows():
    result = BankAnalysisResult(
        avg_monthly_income=row['avgMonthlyIncome'],
        income_cv=row['incomeCV'],
        expense_ratio=row['expenseRatio'],
        emi_ratio=row['emiRatio'],
        avg_monthly_balance=row['avgMonthlyBalance'],
        bounce_count=int(row['bounceCount']),
        account_age_months=int(row['accountAgeMonths'])
    )
    score = calculate_noisy_credit_score(result)
    rule_scores.append(score)

# Add to dataframe and save
df['RuleBasedCreditScore'] = rule_scores
df.to_csv('data/synthetic/feature_with_rule_score.csv', index=False)
print(f"Saved rule-based credit scores to feature_with_rule_score.csv")


Saved rule-based credit scores to feature_with_rule_score.csv
