In [187]:
import os

import pandas as pd
import Python as py
import numpy as np

In [188]:
root = os.getcwd()
df_scheduled = pd.read_csv(root + "/data/scheduled_loan_repayments.csv")
df_actual = pd.read_csv(root + "/data/actual_loan_repayments.csv")
df_balances = py.calculate_df_balances(df_scheduled, df_actual)

In [189]:
def question_1(df_balances):
    """
    Calculate the percent of loans that defaulted as per the Type 1 default definition.

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function.

    Returns:
        float: The percentage of type 1 defaulted loans (ie 50.0 not 0.5).
    """
    # Determines if a row is defaulted accourding to Type 1 definition (ActualRepayment < ScheduledRepayment)
    df_balances["DefaultedType1"] = df_balances["ActualRepayment"] < df_balances["ScheduledRepayment"]

    # Group by LoanID and determine if any row in the group has defaulted
    loan_defaults = df_balances.groupby("LoanID")["DefaultedType1"].max()

    # Calculate the percentage of defaulted loans
    total_loans = len(loan_defaults)
    defaulted_loans = loan_defaults.sum()
    default_rate_percent = (defaulted_loans / total_loans) * 100

    return default_rate_percent

In [190]:
Q1 = question_1(df_balances)

print(Q1)

15.0


In [191]:
def question_2(df_scheduled, df_balances):
    """
    Calculate the percent of loans that defaulted as per the type 2 default definition.

    Args:
        df_balances (DataFrame): DataFrame containing the actual repayment data
        df_scheduled (DataFrame): DataFrame containing the scheduled repayment data

    Returns:
        float: The percentage of type 2 defaulted loans (i.e. 50.0, not 0.5)
    """

    # Group by LoanID and sum ActualRepayment and ScheduledRepayment for each loan
    actual_sum = df_balances.groupby("LoanID")["ActualRepayment"].sum()
    expected_sum = df_balances.groupby("LoanID")["ScheduledRepayment"].sum()

    # Merge the actual and expected sums on LoanID
    merged_df = pd.DataFrame({
        'ActualRepayment': actual_sum,
        'ScheduledRepayment': expected_sum
    }).reset_index()

    # Add a column to identify if a loan defaulted according to Type 2 definition
    merged_df["DefaultedType2"] = merged_df["ActualRepayment"] <= merged_df["ScheduledRepayment"] * 0.85
    
    # Calculate the percentage of loans that defaulted (at least one default)
    total_loans = len(merged_df)
    defaulted_loans = merged_df["DefaultedType2"].sum()
    default_rate_percent = (defaulted_loans / total_loans) * 100

    return default_rate_percent


In [192]:
Q2 = question_2(df_scheduled, df_balances)

print(Q2)

1.2


In [193]:
def question_3(df_balances):
    """
    Calculate the annualized portfolio CPR (as a %) from the geometric mean SMM.
    
    SMM is calculated as: (Unscheduled Principal) / (Start of Month Loan Balance)
    SMM_mean is calculated as (∏(1 + SMM))^(1/12) - 1
    CPR is calculated as: 1 - (1 - SMM_mean)^12

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        tuple: SMM Mean (float), Annualized CPR (float) as a percent, and the total number of records in the SMM column.
    """

    # Calculate Unscheduled Principal for each row
    df_balances["UnscheduledPrincipal"] = np.where(
        df_balances["ActualRepayment"] > df_balances["ScheduledRepayment"],
        df_balances["ActualRepayment"] - df_balances["ScheduledRepayment"],
        0
    )

    # Calculate SMM for each row
    df_balances["SMM"] = np.where(
        df_balances["LoanBalanceStart"] > 0,
        df_balances["UnscheduledPrincipal"] / df_balances["LoanBalanceStart"],
        0
    )

    # Calculate geometric mean SMM
    SMM_mean = np.prod((1 + df_balances["SMM"]) ** (1 / len(df_balances["SMM"]))) - 1
    SMM_mean_percent = SMM_mean * 100

    # Calculate the annualized CPR from the SMM Mean
    cpr = 1 - (1 - SMM_mean) ** 12
    cpr_percent = cpr * 100 

    # Return the results: SMM Mean percentage, CPR percentage, and the count of SMM values
    return cpr_percent


question_3(df_balances)

3.53495282240609

In [194]:
def question_4(df_balances):
    """
    Calculate the predicted total loss for the second year in the loan term.
    Use the equation: probability_of_default * total_loan_balance * (1 - recovery_rate).
    The probability_of_default value must be taken from either your question_1 or question_2 answer.
    Decide between the two answers based on which default definition you believe to be the more useful metric.
    Assume a recovery rate of 80%

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The predicted total loss for the second year in the loan term.
        """

    # Get the probability of default, using Type 2 definition
    default_rate_percent = question_2(df_balances, df_balances)
    probability_of_default = default_rate_percent / 100

    # Assume recovery rate of 80%
    recovery_rate = 0.80

    # Group by LoanID and get the LoanBalance for each loan
    unique_loans = df_balances.groupby("LoanID")["LoanAmount"].first()  # Take the first balance for each LoanID

    # Calculate the total predicted loss for each loan
    total_loss = (unique_loans * probability_of_default * (1 - recovery_rate)).sum()

    return total_loss

question_4(df_balances)

155879.99999999997

In [195]:
def question_4(df_balances):
    """
    Calculate the predicted total loss for the second year in the loan term.
    Use the equation: probability_of_default * total_loan_balance * (1 - recovery_rate).
    The probability_of_default value must be taken from either your question_1 or question_2 answer.
    Decide between the two answers based on which default definition you believe to be the more useful metric.
    Assume a recovery rate of 80%

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The predicted total loss for the second year in the loan term.
        """
    # Get the probability of default, using Type 2 definition
    default_rate_percent = question_2(df_balances, df_balances)  # Reusing question_2 to get the default rate
    probability_of_default = default_rate_percent / 100  # Convert to a decimal

    # Assume recovery rate of 80%
    recovery_rate = 0.80

    # Get the final loan balance for the year for each LoanID
    loan_balance_end_of_year = df_balances[df_balances['Month'] == 12].groupby('LoanID')['LoanBalanceEnd'].first()

    # Calculate the total predicted loss for each loan
    total_loss = (loan_balance_end_of_year * probability_of_default * (1 - recovery_rate)).sum()

    return total_loss
question_4(df_balances)

78365.85352799998