In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def get_info_and_stats(df):
    '''
    get_info_stats takes a DataFrame and returns information
    about the content of the DataFrame, the number of null
    values and a statistical description of the DataFrame
    '''
    print(df.info(), df.isna().mean(), df.describe(), sep='\n#############################################\n')

def multi_frequency(df,vars):
    '''
    multi_frequency takes a dataframe and a list of columns,
    return a dataframe with the count and the frequency of the missing data
    '''
    frequency=df[vars].isnull().sum()
    percentage=df[vars].isnull().sum()*100/(len(df))
    df=pd.concat([frequency,percentage], axis=1, keys=['num_rows_missing', 'pct_rows_missing'])
    return df

def value_counts(df):
    cat_cols = df.columns[[df[col].dtype == 'O' for col in df.columns]]
    for col in cat_cols:
        print(df[col].value_counts())
        print(df[col].value_counts(normalize=True, dropna=False))
        print('\n###################################\n')

In [5]:
def plot_continuous_distribution(data: pd.DataFrame = None, column: str = None, height: int = 12):
  _ = sns.displot(data, x=column, kde=True, height=height, aspect=height/5, palette='colorblind').set(title=f'Distribution of {column}');

def get_unique_values(data, column):
  num_unique_values = len(data[column].unique())
  value_counts = data[column].value_counts()
  print(f"Column: {column} has {num_unique_values} unique values\n")
  print(value_counts, '\n')

def plot_categorical_distribution(data: pd.DataFrame = None, column: str = None, height: int = 8, aspect: int = 2):
  plot = sns.catplot(
      data=data,
      x=column, 
      kind='count', 
      height=height, aspect=aspect, palette='colorblind').set(title=f'Distribution of {column}')
  plot.set_xticklabels(rotation=90)


def correlation_plot(data: pd.DataFrame = None):
  corr = df.corr()
  corr.style.background_gradient(cmap='coolwarm')

In [6]:
# # Plotting with seaborn and rotating the x axis

# sns.set(rc={'figure.figsize':(17,10)})
# chart = sns.histplot(
#     data=df,
#     x='category',
#     palette='colorblind',
#     hue='category',
# )
# var = chart.set_xticklabels(chart.get_xticklabels(), rotation=45)