In [None]:
import pandas as pd
import numpy as np
import scipy as sp

In [None]:
def import_file(file_path: str, **kwargs) -> pd.DataFrame:
    try:
        file_extension = file_path.split(".")[-1].lower()

        if file_extension == "csv":
            df = pd.read_csv(file_path, **kwargs)
        elif file_extension == "xlsx":
            df = pd.read_excel(file_path, **kwargs)
        elif file_extension == "parquet":
            df = pd.read_parquet(file_path, **kwargs)
        else:
            raise ValueError(f"Unsupported file extension: {file_extension}")

        return df

    except FileNotFoundError:
        print(f"Error: The file {file_path} does not exist.")
        raise
    except pd.errors.ParserError as e:
        print(f"Error: Parsing error for file {file_path} - {str(e)}")
        raise
    except Exception as e:
        print(f"An unexpected error occured: {str(e)}")
        raise
    
def define_expected_default_rate():
    data = {
        "CompositeRate": ["A", "B1", "B2", "B3", "B4", "C1", "C2", "C3"],
        "corporate": [0.0001, 0.0025, 0.0025, 0.0075, 0.0125, 0.025, 0.075, 0.2],
        "sme": [0.0001, 0.0075, 0.0075, 0.0125, 0.0175, 0.025, 0.075, 0.2],
        "rsme": [0.0001, 0.0075, 0.0075, 0.0125, 0.025, 0.04, 0.075, 0.4],
    }

    expected_default_df = pd.DataFrame(data)

    return expected_default_df

def map_expected_default_rate(
    df: pd.DataFrame, expected_default_rate_df: pd.DataFrame, portfolio: str
) -> pd.DataFrame:
    
    port = portfolio.lower()
    
    df_1 = df.merge(
        expected_default_rate_df[["CompositeRate", f"{port}"]],
        how="left",
        on=["CompositeRate"],
    ).rename(columns={f"{port}": "ExpectedDR"})

    return df_1

def binomial_test(df: pd.DataFrame, alpha: float=0.05) -> pd.DataFrame:
    df = df.sort_values("CompositeRate")
    
    results = []
    
    for index, row in df.iterrows():
        bin_name = row["CompositeRate"]
        good = row["Good"]
        bad = row["Bad"]
        total_count = good + bad
        expected_rate = row["ExpectedDR"]
        actual_rate = row["BadRate"]
        
        critical_value_lower = sp.stats.binom.ppf(alpha/2, total_count, expected_rate)
        critical_value_upper = sp.stats.binom.ppf(1 - (alpha/2), total_count, expected_rate)
        
        results.append(
            {
                "CompositeRate": bin_name,
                "Good": good,
                "Bad": bad,
                "Expected Default Rate": expected_rate,
                "Actual Default Rate": actual_rate,
                "LowerBound": critical_value_lower,
                "UpperBound": critical_value_upper
            }
        )
    
    return pd.DataFrame(results)

def label_test_results(df: pd.DataFrame) -> pd.DataFrame:
    labels = []
    
    for index, row in df.iterrows():
        lower_bound = row["LowerBound"]
        upper_bound = row["UpperBound"]
        actual_default = row["Bad"]
        
        if lower_bound == 0 and upper_bound == 0:
            labels.append("-")
        elif lower_bound == 0:
            if actual_default >= upper_bound:
                labels.append('Fail')
            else:
                labels.append('Pass')
        else:
            if actual_default <= lower_bound or actual_default >= upper_bound:
                labels.append('Fail')
            else:
                labels.append('Pass')
    
    output = df.copy()
    output["Test Result"] = labels
    
    return output

In [None]:
corp_default_rate_df = import_file("../data/processed/03_rating_summary/corporate_rating_summary.parquet")
sme_default_rate_df = import_file("../data/processed/03_rating_summary/sme_rating_summary.parquet")
rsme_default_rate_df = import_file("../data/processed/03_rating_summary/rsme_rating_summary.parquet")
expected_default_rate_df = define_expected_default_rate()

In [None]:
base_corporate_df = map_expected_default_rate(corp_default_rate_df, expected_default_rate_df, "corporate")
base_sme_df = map_expected_default_rate(sme_default_rate_df, expected_default_rate_df, "sme")
base_rsme_df = map_expected_default_rate(rsme_default_rate_df, expected_default_rate_df, "rsme")

In [None]:
corp_result = label_test_results(binomial_test(base_corporate_df))

In [None]:
sme_result = label_test_results(binomial_test(base_sme_df))

In [None]:
rsme_result = label_test_results(binomial_test(base_rsme_df))

In [None]:
corp_result

In [None]:
sme_result

In [None]:
rsme_result