<a href="https://colab.research.google.com/github/Taha-Fintech/psx-data-reader/blob/master/Mortgage_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import random

def generate_sample_data(num_records):
    data = {
        'Account_Id': [],
        'Installment_Amount': [],
        'Expat_Flag': [],
        'LTV_Ratio': [],
        'Current_LTV': [],
        'Preferred_Flag': [],
        'BankingWith_Us': [],
        'Customer_Bscore': [],
        'Score_Type': []  # New column for Score_Type
    }

    for _ in range(num_records):
        account_id = f"MG{random.randint(100000000, 999999999)}"
        installment_amount = random.randint(1000, 20000)
        expat_flag = random.choice([0, 1])
        ltv_ratio = round(random.uniform(0.001, 1000), 3)
        current_ltv = round(random.uniform(0.001, 1000), 3)
        preferred_flag = random.choice([0, 1])
        banking_with_us = random.choice([0, 1])
        customer_bscore = random.randint(400, 1000)
        score_type = random.choice(['A-Score', 'B-Score'])

        data['Account_Id'].append(account_id)
        data['Installment_Amount'].append(installment_amount)
        data['Expat_Flag'].append(expat_flag)
        data['LTV_Ratio'].append(ltv_ratio)
        data['Current_LTV'].append(current_ltv)
        data['Preferred_Flag'].append(preferred_flag)
        data['BankingWith_Us'].append(banking_with_us)
        data['Customer_Bscore'].append(customer_bscore)
        data['Score_Type'].append(score_type)

    data = pd.DataFrame(data)

    # Define binning details for each variable
    bins_and_labels = {
        'Installment_Amount': ([0, 2000, 4000, 6000, 1000000], [0.907957531, 0.5539907, -0.000453425, -0.294633949]),
        'Expat_Flag': ([0, 1, 2], [0.50465208, -0.129312869]),
        'LTV_Ratio': ([-1000000, 0.99, 1.2, 1000000000000], [-0.461206544, 0.242451227, 0.658470652]),
        'Current_LTV': ([-1000000, 0.65, 1.1, 1.7, 10000000000], [-0.380283095, -0.02436255, 0.875635759, 1.62191051]),
        'Preferred_Flag': ([0, 1, 2], [-0.601576706, 0.428369848]),
        'BankingWith_Us': ([0, 1, 2], [-0.253502997, 0.278764902]),
        'Customer_Bscore': ([-1000000, 10, 400, 580, 680, 755, 810, 1000000000000], [0.00637587, 3.158442362, 2.167616612, 0.907456144, -1.010247178, -2.297054681, -3.960001818])
    }
    # Perform binning for each variable
    for col, (bins, labels) in bins_and_labels.items():
        data[f'{col}_Bin'] = pd.cut(data[col], bins=bins, labels=labels, right=False)

    return data

if __name__ == '__main__':
    num_records = 50
    sample_data = generate_sample_data(num_records)
    print(sample_data)


In [None]:
def calculate_score(installment_amount, expat_flag, ratio_of_ltv, ltv, preferred_list, BankingWith_Us, bscore):
    # Coefficients from the logistic regression equation
    coefficients = {
        'Intercept': -4.063439,
        'InstallmentAmount': 0.4444435,
        'ExpatFlag': 1.0808872,
        'RatioOfLTV': 0.3500753,
        'LTV': 0.5838034,
        'PreferredList': 0.3105226,
        'BankingWith_Us': 0.53339257,
        'Bscore': 0.9546275
    }

    # Calculate the score using the logistic regression equation
    score = coefficients['Intercept'] + \
            coefficients['InstallmentAmount'] * installment_amount + \
            coefficients['ExpatFlag'] * expat_flag + \
            coefficients['RatioOfLTV'] * ratio_of_ltv + \
            coefficients['LTV'] * ltv + \
            coefficients['PreferredList'] * preferred_list + \
            coefficients['BankingWith_Us'] * BankingWith_Us + \
            coefficients['Bscore'] * bscore

    # Calculate EstimatedPD using the logistic function
    estimated_pd = np.exp(score) / (1 + np.exp(score))

    # Calculate PredictedOdds
    predicted_odds = (1 - estimated_pd) / estimated_pd

    # Offset and Factor for Scaled Score transformation
    offset = 554
    factor = 43

    # Calculate Scaled Score using the transformation formula
    scaled_score = offset + factor * np.log(predicted_odds)

    # Add a lookup table for scaled_score to PD_TTC mapping
    scaled_score_to_pd_ttc = {
    -1000: 0.3354,
    610: 0.1677,
    640: 0.0612,
    690: 0.0306,
    720: 0.0138,
    760: 0.0054,
    800: 0.0031,
    810: 0.0019,
    840: 0.0012,
    850: 0.001,
    860: 0.0005,
    910: 0.0002,
    100000: 0.0051
}
    # Map scaled_score to PD_TTC using the lookup table
    pd_ttc = scaled_score.map(scaled_score_to_pd_ttc)

    return score, estimated_pd, predicted_odds, scaled_score, pd_ttc

    # Calculate scaled_score and PD_TTC for each row in the DataFrame
    (
        sample_data['Score'],
        sample_data['Estimated_PD'],
        sample_data['Predicted_Odds'],
        sample_data['Scaled_Score'],
        sample_data['PD_TTC']
    ) = zip(*sample_data.apply(lambda row: calculate_score(
        row['Installment_Amount_Bin'],
        row['Expat_Flag_Bin'],
        row['LTV_Ratio_Bin'],
        row['LTV_Bin'],
        row['Preferred_Flag_Bin'],
        row['BankingWith_Us_Bin'],
        row['Bscore_Bin']
    ), axis=1))

    # Calculate distribution of records based on PD_TTC
    pd_ttc_distribution = sample_data['PD_TTC'].value_counts()

    print("Sample Data:")
    print(sample_data)
    print("\nDistribution of records based on PD_TTC:")
    print(pd_ttc_distribution)
