In [None]:
import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import norm

In [None]:
def import_file(file_path: str, **kwargs) -> pd.DataFrame:
    try:
        file_extension = file_path.split(".")[-1].lower()

        if file_extension == "csv":
            df = pd.read_csv(file_path, **kwargs)
        elif file_extension == "xlsx":
            df = pd.read_excel(file_path, **kwargs)
        elif file_extension == "parquet":
            df = pd.read_parquet(file_path, **kwargs)
        else:
            raise ValueError(f"Unsupported file extension: {file_extension}")

        return df

    except FileNotFoundError:
        print(f"Error: The file {file_path} does not exist.")
        raise
    except pd.errors.ParserError as e:
        print(f"Error: Parsing error for file {file_path} - {str(e)}")
        raise
    except Exception as e:
        print(f"An unexpected error occured: {str(e)}")
        raise


def define_expected_default_rate():
    data = {
        "CompositeRate": ["A", "B1", "B2", "B3", "B4", "C1", "C2", "C3"],
        "corporate": [0.0001, 0.0025, 0.0025, 0.0075, 0.0125, 0.025, 0.075, 0.2],
        "sme": [0.0001, 0.0075, 0.0075, 0.0125, 0.0175, 0.025, 0.075, 0.2],
        "rsme": [0.0001, 0.0075, 0.0075, 0.0125, 0.025, 0.04, 0.075, 0.4],
    }

    expected_default_df = pd.DataFrame(data)

    return expected_default_df


def map_expected_default_rate(
    df: pd.DataFrame, expected_default_rate_df: pd.DataFrame, portfolio: str
) -> pd.DataFrame:

    port = portfolio.lower()

    df_1 = df.merge(
        expected_default_rate_df[["CompositeRate", f"{port}"]],
        how="left",
        on=["CompositeRate"],
    ).rename(columns={f"{port}": "ExpectedDR"})

    return df_1


def calculate_asset_correlation(df: pd.DataFrame, customer_type=str) -> pd.DataFrame:

    asset_correlation_series = []
    
    if customer_type in ['Corporate', "SMEs"]:
        for index, row in df.iterrows():
            expected_event_rate = row["ExpectedDR"]
            asset_correlation = 0.12 * (
                (1 - np.exp(-50 * expected_event_rate)) / (1 - np.exp(-50))
            ) + 0.24 * (1 - ((1 - np.exp(-50 * expected_event_rate)) / (1 - np.exp(-50))))
            asset_correlation_series.append(asset_correlation)
            
    elif customer_type in ["RSMEs"]:
        for index, row in df.iterrows():
            expected_event_rate = row["ExpectedDR"]
            asset_correlation = 0.03 * (
                (1 - np.exp(-35 * expected_event_rate)) / (1 - np.exp(-35))
            ) + 0.16 * (1 - ((1 - np.exp(-35 * expected_event_rate)) / (1 - np.exp(-35))))
            asset_correlation_series.append(asset_correlation)
    else:
        raise ValueError("Incorrect 'customer_type'.")
        
    output = df.copy()
    output["AssetCorrelation"] = asset_correlation_series
    
    return output

def modified_binomial_test(df: pd.DataFrame, alpha: float=0.05) -> pd.DataFrame:
    
    lower_bounds = []
    upper_bounds = []
    
    for index, row in df.iterrows():
        p = row["ExpectedDR"]
        rho = row["AssetCorrelation"]
        
        lower_bound = norm.cdf((norm.ppf(alpha / 2) * np.sqrt(rho) + norm.ppf(p)) / np.sqrt(1 - rho))
        upper_bound = norm.cdf((norm.ppf(1 - (alpha / 2)) * np.sqrt(rho) + norm.ppf(p)) / np.sqrt(1 - rho))
        
        lower_bounds.append(lower_bound)
        upper_bounds.append(upper_bound)
        
    output = df.copy()
    output["LowerBound"] = lower_bounds
    output["UpperBound"] = upper_bounds
        
    return output

def label_test_results(df: pd.DataFrame) -> pd.DataFrame:
    labels = []
    
    for index, row in df.iterrows():
        lower_bound = row["LowerBound"]
        upper_bound = row["UpperBound"]
        actual_default_rate = row["BadRate"]
        total_count = row["Good"] + row["Bad"]
        
        if 1 / total_count >= upper_bound:
            labels.append("-")
        else:
            if actual_default_rate <= lower_bound or actual_default_rate >= upper_bound:
                labels.append('Fail')
            else:
                labels.append('Pass')
    
    output = df.copy()
    output["Test Result"] = labels
    
    return output

In [None]:
corp_default_rate_df = import_file("../data/processed/03_rating_summary/corporate_rating_summary.parquet")
sme_default_rate_df = import_file("../data/processed/03_rating_summary/sme_rating_summary.parquet")
rsme_default_rate_df = import_file("../data/processed/03_rating_summary/rsme_rating_summary.parquet")
expected_default_rate_df = define_expected_default_rate()

In [None]:
base_corporate_df = map_expected_default_rate(corp_default_rate_df, expected_default_rate_df, "corporate")
base_sme_df = map_expected_default_rate(sme_default_rate_df, expected_default_rate_df, "sme")
base_rsme_df = map_expected_default_rate(rsme_default_rate_df, expected_default_rate_df, "rsme")

In [None]:
corp_result = label_test_results(modified_binomial_test(calculate_asset_correlation(base_corporate_df, "Corporate")))

In [None]:
sme_result = label_test_results(modified_binomial_test(calculate_asset_correlation(base_sme_df, "SMEs")))

In [None]:
rsme_result = label_test_results(modified_binomial_test(calculate_asset_correlation(base_rsme_df, "RSMEs")))

In [None]:
corp_result

In [None]:
sme_result

In [None]:
rsme_result