In [None]:
import pandas as pd # import the nesscessary librarys and packages. 

class DataTransform: # create the Class 
    def __init__(self, df): # Class intialization parameter of self and the df to be used.
        if not isinstance(df, pd.DataFrame): # makes sure the df input is a Pandas (pd) df.
            raise ValueError("Input must be a Pandas DataFrame.") # value error if df isn't a pd df.
        self.df = df

    def change_dtypes(self, dtype_dict):
        try:
            for column, dtype in dtype_dict.items():
                if column in self.df.columns:
                    if dtype == "datetime64":
                        self.df[column] = pd.to_datetime(self.df[column], errors='coerce', format='%b-%Y')
                        """
                        Here the it makes sure the if the column is listed a datetime64 in the dict,
                        the column type is changed to a pd datetime with the errors coerced into a format
                        of "%b-%Y" or the abbreviated month and year, ignoring days. 
                        
                        """
                    elif dtype == "boolean":
                        self.df[column] = self.df[column].map({'y': True, 'n': False})
                        """
                        Here if the dict dtype is boolean, it is iterated over using the .map function
                        converted y : True and n : False. There is only one boolean in the df - payment_plan
                        which uses lower case y & n, so this code was written with that in mind. 

                        """
                    else:
                        self.df[column] = self.df[column].astype(dtype, errors='ignore')
                        """
                         This is for every other dtype category to be changed, uses astype, with the dtype
                         manually referenced in the dict, and made to ignore the errors. 
                    
                        """
                else: # error handling if key in the key in the key dict pairs is not a column in the df. 
                    raise KeyError(f"Column '{column}' not found in DataFrame.")
            return self.df
        except Exception as e: # generic error 
            raise RuntimeError(f"Error whilst changing datatypes: {e}")
        

if __name__ == "__main__": # only uses code in place 
    df = pd.read_csv("/Users/max/coding_resources/finance_loan_project/flp_df/flp_df_og.csv") # file path for df save in .csv format

    print("Original dytypes of DataFrame:")
    print(df.dtypes)

    transformer = DataTransform(df) # Class intialized

    dtype_dict = {
        "id": "int64",
        "member_id": "int64",
        "loan_amount": "float64",
        "funded_amount": "float64",
        "funded_amount_inv": "float64",
        "term": "category",
        "int_rate": "float64",
        "instalment": "float64",
        "grade": "category",
        "sub_grade": "category",
        "employment_length": "category",
        "home_ownership": "category",
        "annual_inc": "float64",
        "verification_status": "category",
        "issue_date": "datetime64",
        "loan_status": "category",
        "payment_plan": "boolean",
        "purpose": "category",
        "dti": "float64",
        "delinq_2yrs": "int64",
        "earliest_credit_line": "datetime64",
        "inq_last_6mths": "int64",
        "mths_since_last_delinq": "int64",
        "mths_since_last_record": "int64",
        "open_accounts": "int64",
        "total_accounts": "int64",
        "out_prncp": "float64",
        "out_prncp_inv": "float64",
        "total_payment": "float64",
        "total_payment_inv": "float64",
        "total_rec_prncp": "float64",
        "total_rec_int": "float64",
        "total_rec_late_fee": "float64",
        "recoveries": "float64",
        "collection_recovery_fee": "float64",
        "last_payment_date": "datetime64",
        "last_payment_amount": "float64",
        "next_payment_date": "datetime64",
        "last_credit_pull_date": "datetime64",
        "collections_12_mths_ex_med": "category",
        "mths_since_last_major_derog": "int64",
        "policy_code": "int64",
        "application_type": "category"
    } # the dictionary containing all the columns as keys and the desired dtypes to change as pair values. 

    transformed_df = transformer.change_dtypes(dtype_dict) # change_dtype Class method called on the df, using the dict as a parameter instruct

    print("\nTransformed DataFrame dtypes:")
    print(transformed_df.dtypes)

csv_file_path = "/Users/max/coding_resources/finance_loan_project/flp_df/flp_df1_dtype.csv"
transformed_df.to_csv(csv_file_path, index=False) # db with changed dtypes saved in .csv format 