In [4]:
import numpy as np  # as always import the nesscary packages / libraries. 
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt


In [5]:
class DataTransform: # use the DataTransfrom class change to the right dtypes form the .csv df. 
    def __init__(self, df):
        if not isinstance(df, pd.DataFrame):
            raise ValueError("Input must be a Pandas DataFrame.")
        self.df = df

    def change_dtypes(self, dtype_dict):
        try:
            for column, dtype in dtype_dict.items():
                if column in self.df.columns:
                    if dtype == "datetime64":
                        self.df[column] = pd.to_datetime(self.df[column], errors='coerce')
                    elif dtype == "boolean":
                        self.df[column] = self.df[column].map({'y': True, 'n': False})
                    else:
                        self.df[column] = self.df[column].astype(dtype, errors='ignore')
                else:
                    raise KeyError(f"Column '{column}' not found in DataFrame.")
            return self.df
        except Exception as e:
            raise RuntimeError(f"Error whilst changing datatypes: {e}")

if __name__ == "__main__":
    df_og = pd.read_csv("/Users/max/coding_resources/finance_loan_project/flp_df/flp_df7_4analysis.csv")

    transformer = DataTransform(df_og)

    dtype_dict = {
        "id": "int64",
        "member_id": "int64",
        "loan_amount": "float64",
        "term": "category", 
        "int_rate": "float64",
        "grade": "category",
        "sub_grade": "category",
        "employment_length": "category",
        "home_ownership": "category",
        "annual_inc": "float64",
        "verification_status": "category",
        "issue_date": "datetime64",
        "loan_status": "category",
        "payment_plan": "bool",
        "purpose": "category",
        "dti": "float64",
        "delinq_2yrs": "int64",
        "earliest_credit_line": "datetime64",
        "inq_last_6mths": "int64",
        "open_accounts": "int64",
        "total_accounts": "int64",
        "out_prncp": "float64",
        "total_payment": "float64",
        "total_rec_int": "float64",
        "total_rec_late_fee": "float64",
        "recoveries": "float64",
        "collection_recovery_fee": "float64",
        "last_payment_date": "datetime64",
        "last_payment_amount": "float64",
        "last_credit_pull_date": "datetime64",
        "collections_12_mths_ex_med": "category",
        "policy_code": "int64",
        "application_type": "category"
    }

    df_fin_x = transformer.change_dtypes(dtype_dict)

df_fin = df_fin_x.drop(columns=["Unnamed: 0"])
    
print(df_fin)

             id  member_id  loan_amount  funded_amount  funded_amount_inv  \
0      13297208   15339420       8950.0         8950.0             8950.0   
1      10234817   12096968      11200.0        11200.0            11200.0   
2      10234813   12096964       8400.0         8400.0             8400.0   
3      10234796   12096947       9600.0         9600.0             9600.0   
4      10234755   12096906      15000.0        12000.0            15000.0   
...         ...        ...          ...            ...                ...   
20493    121673     121283       4500.0         4500.0             3000.0   
20494    120215     118760       4000.0         4000.0             3575.0   
20495    112245     112227       5000.0         5000.0             3975.0   
20496    111227     111223      20000.0        20000.0             2800.0   
20497     88046      88023       4400.0         4400.0             1400.0   

            term   int_rate  instalment grade sub_grade  ... total_rec_int 

In [51]:

class LoanRiskAnalysis:
    def __init__(self, df):
        self.df = df
        self.late_statuses = ["Late (16-30 days)", "Late (31-120 days)", "Default"]
        self.charged_off_status = "Charged Off"

    def calculate_late_loan_percentage(self):
        late_loans = self.df[self.df["loan_status"].isin(self.late_statuses)]
        percentage = (len(late_loans) / len(self.df)) * 100
        return percentage

    def calculate_late_loan_losses(self):
        if self.df["term"].dtype != "int64":
           self.df["term"] = self.df["term"].str.extract(r'(\d+)').astype('int64')
        late_loans = self.df[self.df["loan_status"].isin(self.late_statuses)].copy()
        late_loans["months_paid"] = ((late_loans["last_payment_date"] - late_loans["issue_date"]).dt.days // 30)
        late_loans["remaining_months"] = late_loans["term"] - late_loans["months_paid"]
        late_loans["remaining_amount"] = late_loans["remaining_months"] * late_loans["instalment"]
        late_loans["total_amount_due"] = late_loans["term"] * late_loans["instalment"]
        late_loans["projected_loss"] = late_loans["total_amount_due"] - late_loans["remaining_amount"]
        
        total_remaining_amount = late_loans["remaining_amount"].sum()
        total_projected_loss = late_loans["projected_loss"].sum()
        return len(late_loans), total_remaining_amount, total_projected_loss

    def calculate_combined_loss_percentage(self):
        if self.df["term"].dtype != "int64":
           self.df["term"] = self.df["term"].str.extract(r'(\d+)').astype('int64')
        late_and_charged_off = self.df[
            self.df["loan_status"].isin(self.late_statuses + [self.charged_off_status])
        ].copy()

        late_and_charged_off["months_paid"] = ((late_and_charged_off["last_payment_date"] - late_and_charged_off["issue_date"]).dt.days // 30)
        late_and_charged_off["remaining_months"] = late_and_charged_off["term"] - late_and_charged_off["months_paid"]
        late_and_charged_off["remaining_amount"] = late_and_charged_off["remaining_months"] * late_and_charged_off["instalment"]
        late_and_charged_off["total_amount_due"] = late_and_charged_off["term"] * late_and_charged_off["instalment"]
        late_and_charged_off["total_loss"] = late_and_charged_off["total_amount_due"] - late_and_charged_off["remaining_amount"]

        self.df['revenue_per_loan'] = self.df['instalment'] * self.df['term']
        total_revenue = self.df['revenue_per_loan'].sum()

        
        combined_loss = late_and_charged_off["total_loss"].sum()

        combined_loss_percentage = (combined_loss / total_revenue) * 100
        return combined_loss_percentage

    def summary_report(self):
        late_percentage = self.calculate_late_loan_percentage()
        total_late_customers, remaining_amount, projected_loss = self.calculate_late_loan_losses()
        combined_loss_percentage = self.calculate_combined_loss_percentage()

        summary = {
            "Percentage of Loans that are late & Defaulted": round(late_percentage),
            "Total Late & defaulted Customers": total_late_customers,
            "Projected Loss from Late & Defaulted Loans (in mil)": round(projected_loss / 1_000_000, 2),
            "Percentage of Revenue Loss (Late, Defaulted & Charged Off)": round(combined_loss_percentage)
        }

        summary_pd = pd.DataFrame([summary])

        return summary_pd



In [52]:
loan_risk = LoanRiskAnalysis(df_fin)

In [53]:
loan_risk.summary_report()

Unnamed: 0,Percentage of Loans that are late & Defaulted,Total Late & defaulted Customers,Projected Loss from Late & Defaulted Loans (in mil),"Percentage of Revenue Loss (Late, Defaulted & Charged Off)"
0,1,222,1.86,3
