In [1]:
import pandas as pd # as always import ness librarys / packages.
from pandasgui import show

csv_lp = "/Users/max/coding_resources/finance_loan_project/flp_df/flp_df1_dtype.csv" # load df as .csv. 
df_t2 = pd.read_csv(csv_lp) # convert .csv to pd df 

class DataFrameInfo: 
    def __init__ (self, df):
        if not isinstance(df, pd.DataFrame): # make sure the df is pd df. 
            raise ValueError("Input must be a Pandas DataFrame.")
        self.df = df 
    
    def col_dtypes(self): # method for creating a pd df of the columns dtypes using inbuilt pd functions. 
        return pd.DataFrame({
            "Colmuns": self.df.columns,
            "Datatype": self.df.dtypes
        }).reset_index(drop=True) 
    
    def extract_stats(self): # method for extracting numeric stats, using inbuilt pandas methods 
        numeric_cols = self.df.select_dtypes(include=['number'])
        stats = pd.DataFrame({
            "Median": numeric_cols.median(),
            "Mean": numeric_cols.mean(),
            "Standard Deviation": numeric_cols.std()
        })
        return stats.reset_index().rename(columns={"index": "Column"}) # make sure the 'Column' column isn't labeled as the index
    
    def distinct_cat_counts(self): # method for distinguishing the distinct varibles in categorical columns. 
        cat_cols = self.df.select_dtypes(include=['object', 'category']) # selects either objects or categorys. 
        count = cat_cols.nunique().reset_index() # get rid of any duplicated and rest the index 
        count.columns = ["Column", "Distinct Count"] # organise data by column type and then count. 
        return count
    
    def print_df_shape(self): # find shape of df 
        observe = self.df.shape # use inbuilt pd .shape function
        (row, col) = observe # turn results into a tuple 
        print(f"The df has {row} rows and {col} columns.") # present information in clear message. 
    
    def num_null(self): # method to find the total number of nulls in each column 
        null_df = pd.DataFrame({
            "Columns": self.df.columns, # first column of new df is the og df colmuns 
            "Null Value Count": self.df.isnull().sum() # second is the count of nulls in that column, using pd inbuilt functions
        })
        return null_df.reset_index(drop=True)
    
    def numeric_stats(self): # describes some of the basic numeric stats, some method overlap here as this also shows averages etc.
        return self.df.describe().round(2) # rounds all number to 2dp
    
    def cat_stats(self): # stats for categorical data
        cat_cols = self.df.select_dtypes(include=["object", "category"]) # selects just the categorical datatypes
        stats = [] # list to be appended 
        for col in cat_cols: # selecting the columns in the categorical df 
            col_data = self.df[col].dropna() # dropping all null values 
            if col_data.empty:
                stats.append({
                    "Column": col,
                    "Modes": None,
                    "Mode Frequency": 0
                }) # in case there is nothing in a column the information is manually inputted. 
                continue
            modes = col_data.mode() 
            modes_str = ", ".join(modes.astype(str)) if not modes.empty else None 
            freq_mode = col_data.value_counts().iloc[0] if not col_data.value_counts().empty else 0
            """
            Here mode is calulated using .mode inbuilt python function. 
            Then mode is converted into a string, if there is more one mode they are joined with a ","
            freq_mode is used using .value_counts and .iloc to select the first index number 
            which is always the highest value using .value_counts and thus the freq of the mode. 

            """

            stats.append({
                "Column": col,
                "Modes": modes_str,
                "Mode Frequency": freq_mode
            }) # stats list appended with the each iterable result. 

        return pd.DataFrame(stats)

        

test = DataFrameInfo(df_t2)

test.num_null()

Unnamed: 0,Columns,Null Value Count
0,id,0
1,member_id,0
2,loan_amount,0
3,funded_amount,3007
4,funded_amount_inv,0
5,term,4772
6,int_rate,5169
7,instalment,0
8,grade,0
9,sub_grade,0
