Pandas Dataframe Custom Info Retrieval

It returns a DF containing info about all columns in the input dataframe, namely:
- Column name
- Missing Data %
- Num of unique values
- Data type
- "Feature type" --> Needs to be taken with a pinch of salt (very basic criteria)

In [None]:
def df_custom_info(df, disc_threshold = 5):
    
    percent_missing = df.isnull().sum() * 100 / len(df)
    
    info_df = pd.DataFrame({"Column Name": df.columns,
                            "Missing Data Percentage": percent_missing})
    
    threshold_num = np.ceil(df.shape[0] * (disc_threshold / 100))
    unique_val_counts = []
    for col in df.columns:
        unique_val_counts.append(len(list(df[col].value_counts().index)))    
    
    info_df['Num. Unique Values'] = unique_val_counts
    
    data_types = []
    for col in df.columns:
        data_types.append(df[col].dtype.name)   
    
    info_df['Data Type'] = data_types
    
    info_df['Feature Type'] = ''
    info_df.loc[info_df['Num. Unique Values'] > threshold_num,'Feature Type'] = 'Continuous'
    info_df.loc[info_df['Num. Unique Values'] < threshold_num,'Feature Type'] = 'Discrete'
    info_df.loc[info_df['Num. Unique Values'] == 2,'Feature Type'] = 'Binary'
    
    info_df.sort_values("Missing Data Percentage", ascending=False, inplace=True)
    
    return info_df 

------

Prints a heatmap with the statistical correlation between all columns in a df

The input is actually the previously computed correlation matrix (df.corr())

In [1]:
def plot_corr_heatmap(corr_mat,figsize=(16,16)):
    ''' Takes a correlation matrix (from Pandas) and returns
    a heatmap based on its values. '''
    # Mask for concealing upper triangle (redundant info)
    mask = np.zeros_like(corr_mat, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    f, ax = plt.subplots(figsize=figsize)
    _ = sns.heatmap(corr_mat, mask=mask, cmap='bwr',annot = True, fmt='2.2f', center=0,
                square=True, linewidths=.5, cbar_kws={"shrink": .6})

-----