# Import librairies

In [1]:
# Import main librairies
import numpy as np
import pandas as pd

# Import matplotlib librairies
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# Labelling

In [2]:
def labelling(X, y):
    
    ##====================
    ## SETUP
    ##====================
    
    # Numeric variables names
    variables = X.columns

    # copy dataframe
    df_tmp = X.copy()

    # If it's univariate
    if len(variables) == 1:
        # add temporary y axis variable set to zero
        df_tmp['y'] = np.zeros(len(df_tmp.copy())).copy()
        # Set variable to be evaluated
        variables = [variables[0], 'y']

    # Get temporary dataframe with cluster variables
    df_tmp = df_tmp[variables]

    # Add label that will allow to label our points
    df_tmp['Label'] = y

    # Drop null values
    df_tmp = df_tmp.dropna()

    # Set x and y axis
    v1 = df_tmp[variables[0]]
    v2 = df_tmp[variables[1]]
    if len(variables) == 3:
        v3 = df_tmp[variables[2]]

    # Get unique values from the label variable
    unique_values = np.unique(df_tmp['Label'])

    # define map colors for data points
    colors = ['#DF2020', '#81DF20', '#005544', '#009944', '#990044', '#990099', '#992000']

    # Set 
    map_set = {}

    # For each unique value from the label variable
    for i in range(0, len(unique_values)):
        # Map to his respective color for all concerned data points that have this label value
        map_set.update({unique_values[i]:colors[i]})

    # Add column that will contains label colors for each data points
    df_tmp['c'] = y.map(map_set)
    
    ##====================
    ## PLOT DATA
    ##====================
    
    # Create figure
    fig = plt.figure(figsize=(10, 10))

    # Add subplot
    if len(variables) == 3:
        ax = fig.add_subplot(projection='3d')
    else:
        ax = fig.add_subplot()

    # Set up Legend
    legend_elements = [Line2D([0], [0], marker='o', color='w', label=unique_values[i], 
                   markerfacecolor=mcolor, markersize=10) for i, mcolor in enumerate(colors[0:len(unique_values)])]
    # plot legend
    plt.legend(handles=legend_elements, loc='upper right', fontsize=15)

    # plot data points
    if len(variables) == 3:
        ax.scatter(v1, v2, v3, c=df_tmp.c, alpha = 0.8, s=40)
        # View Orientation
        ax.view_init(elev=15, azim=45)
    else:
        ax.scatter(v1, v2, c=df_tmp.c, alpha = 0.8, s=40)

    # Plot axis names
    ax.set_xlabel(v1.name, fontsize=20)
    ax.set_ylabel(v2.name, fontsize=20)
    if len(variables) == 3:
        ax.set_zlabel(v3.name, fontsize=20)

    plt.title("Cluster with : " + str(variables), fontsize=20)
    plt.show()

    print("Nombre de patients :", df_tmp.shape[0])