In [4]:
import pandas as pd
import numpy as np

#visualization tools
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
def distribution(df):
    """
    This function returns the distribution and the violinplot of each variable of the dataFrame
    """

    plt.figure(figsize=(16,16))

    # subplots location
    odd = [i for i in range(1,2*df.shape[1],2)]
    even = [i for i in range(2,2*df.shape[1]+1,2)]

    for i in range(df.shape[1]):

        # histogram
        plt.subplot(df.shape[1],2,odd[i])
        sns.histplot(x=df.columns[i], data=df)

        # violinplot/boxplot
        plt.subplot(df.shape[1],2,even[i])
        sns.violinplot(x=df.columns[i], data=df)

    plt.tight_layout()
    plt.show()

In [6]:
def correlation_plot(df, method):
    """ determines the Pearson, Spearman and Kendall correlation coefficients and generates the heatmaps
    Args:
        method (str): {'pearson', 'kendall', 'spearman'}
    Returns:
        heatmap
    """
    plt.figure(figsize=(30, 30))
    plt.title(method + ' correlation coefficient of the continuous features and the target variable')

    # find the correlation coefficients
    correlation_matrix = df.corr(method=method)

    # create a mask to remove the upper triangular values
    mask = np.zeros_like(correlation_matrix)
    mask[np.triu_indices_from(mask)] = True

    # create the heatmap of the correlation coefficients
    sns.heatmap(correlation_matrix, linewidth=0.1, vmax=1.0, vmin=-1.0,
                square=True, cmap='RdYlGn', linecolor='white', annot=True, mask=mask)

    # display the heatmaps
    plt.show()