In [1]:
import numpy as np  # as always import the nesscary packages / libraries. 
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt


In [5]:
class DataTransform: # use the DataTransfrom class change to the right dtypes form the .csv df. 
    def __init__(self, df):
        if not isinstance(df, pd.DataFrame):
            raise ValueError("Input must be a Pandas DataFrame.")
        self.df = df

    def change_dtypes(self, dtype_dict):
        try:
            for column, dtype in dtype_dict.items():
                if column in self.df.columns:
                    if dtype == "datetime64":
                        self.df[column] = pd.to_datetime(self.df[column], errors='coerce')
                    elif dtype == "boolean":
                        self.df[column] = self.df[column].map({'y': True, 'n': False})
                    else:
                        self.df[column] = self.df[column].astype(dtype, errors='ignore')
                else:
                    raise KeyError(f"Column '{column}' not found in DataFrame.")
            return self.df
        except Exception as e:
            raise RuntimeError(f"Error whilst changing datatypes: {e}")

if __name__ == "__main__":
    df_og = pd.read_csv("/Users/max/coding_resources/finance_loan_project/flp_df/flp_df7_4analysis.csv")

    transformer = DataTransform(df_og)

    dtype_dict = {
        "id": "int64",
        "member_id": "int64",
        "loan_amount": "float64",
        "term": "category", 
        "int_rate": "float64",
        "grade": "category",
        "sub_grade": "category",
        "employment_length": "category",
        "home_ownership": "category",
        "annual_inc": "float64",
        "verification_status": "category",
        "issue_date": "datetime64",
        "loan_status": "category",
        "payment_plan": "bool",
        "purpose": "category",
        "dti": "float64",
        "delinq_2yrs": "int64",
        "earliest_credit_line": "datetime64",
        "inq_last_6mths": "int64",
        "open_accounts": "int64",
        "total_accounts": "int64",
        "out_prncp": "float64",
        "total_payment": "float64",
        "total_rec_int": "float64",
        "total_rec_late_fee": "float64",
        "recoveries": "float64",
        "collection_recovery_fee": "float64",
        "last_payment_date": "datetime64",
        "last_payment_amount": "float64",
        "last_credit_pull_date": "datetime64",
        "collections_12_mths_ex_med": "category",
        "policy_code": "int64",
        "application_type": "category"
    }

    df_fin_x = transformer.change_dtypes(dtype_dict)

df_fin = df_fin_x.drop(columns=["Unnamed: 0"])
    
print(df_fin)

             id  member_id  loan_amount  funded_amount  funded_amount_inv  \
0      13297208   15339420       8950.0         8950.0             8950.0   
1      10234817   12096968      11200.0        11200.0            11200.0   
2      10234813   12096964       8400.0         8400.0             8400.0   
3      10234796   12096947       9600.0         9600.0             9600.0   
4      10234755   12096906      15000.0        12000.0            15000.0   
...         ...        ...          ...            ...                ...   
20493    121673     121283       4500.0         4500.0             3000.0   
20494    120215     118760       4000.0         4000.0             3575.0   
20495    112245     112227       5000.0         5000.0             3975.0   
20496    111227     111223      20000.0        20000.0             2800.0   
20497     88046      88023       4400.0         4400.0             1400.0   

            term   int_rate  instalment grade sub_grade  ... total_rec_int 

In [28]:
def charged_off_stats(df):

    charged_off_loans = df[df['loan_status'] == 'Charged Off']
    total_loans = len(df)
    num_charged_off = len(charged_off_loans)
    charged_off_percentage = (num_charged_off / total_loans) * 100
    total_paid_charged_off = charged_off_loans['total_payment'].sum()

    summary = {
        "Total Loans": total_loans,
        "Charged Off Loans": num_charged_off,
        "Percentage of Charged Off Loans (%)": round(charged_off_percentage, 2),
        "Total Paid Towards Charged Off Loans": total_paid_charged_off
    }

    summary_df = pd.DataFrame([summary])

    return summary_df


In [29]:
charged_off_summary = charged_off_stats(df_fin)

charged_off_summary




Unnamed: 0,Total Loans,Charged Off Loans,Percentage of Charged Off Loans (%),Total Paid Towards Charged Off Loans
0,20498,1118,5.45,5793173.19


In [31]:
charged_off_summary_round = charged_off_summary.copy()

charged_off_summary_round["Total Paid Towards Charged Off Loans (in mil)"] = \
    round(charged_off_summary["Total Paid Towards Charged Off Loans"] / 1_000_000, 2)

charged_off_summary_round


Unnamed: 0,Total Loans,Charged Off Loans,Percentage of Charged Off Loans (%),Total Paid Towards Charged Off Loans,Total Paid Towards Charged Off Loans (in mil)
0,20498,1118,5.45,5793173.19,5.79
