In [None]:
def plot_trajectories(df_values, df_timestamps):

    # Set figure size
    plt.figure(figsize=(15, 6))

    # For each trajectory id
    for values in df_values.index.values:
        # Get values and timestamps from the trajectory
        values_filter, timestamps_filter = get_filtered_trajectory_values_timestamps(index, df_values, df_timestamps)
        
        # Plot trajectories
        plt.plot(timestamps_filter, values_filter, marker='o',linestyle='--', label = str(tr_id))

    # Set title
    plt.title('Trajectories starting from early follow-up with ' + score_var, size=15)
    # Set x axis label
    plt.xlabel('Duration in days')
    # Set y axis label
    plt.ylabel('Score ' + score_var)
    
    plt.legend(title='ID Patient')
    plt.show()

In [None]:
def plot_trajectory_clusters(df_values, df_timestamps, dict_legend, cluster_labels):
    
    # Set figure size
    plt.figure(figsize=(15, 6))
    
    max_duration = get_greater_trajectory_duration(df_values, df_timestamps)

    # For each trajectory id
    for i, index in enumerate(df_values.index.values):
        # Get values and timestamps from the trajectory
        values_filter, timestamps_filter = get_filtered_trajectory_values_timestamps(index, df_values, df_timestamps)
        # Plot trajectories
        plt.plot(timestamps_filter, values_filter, marker='o',linestyle='--', color=dict_legend.get(cluster_labels[i]))

    # Set title
    plt.title('Trajectories starting from early follow-up with ' + score_var, size=20)
    # Set x axis label
    plt.xlabel('Duration in days', size=15)
    # Set y axis label
    plt.ylabel('Score ' + score_var, size=15)

    # Set up Legend
    label_elements = [Line2D([0], [0], marker='o', color='w', label= "Cluster " + str(e), markerfacecolor=dict_legend[e], markersize=10) for e in dict_legend]
    # Set x axis limit
    plt.xlim(-20, max_duration + 50)
    # plot legend
    plt.legend(handles=label_elements, fontsize=15)
    plt.show()

In [1]:
def plot_each_trajectory_clusters(df_values, df_timestamps, dict_legend, cluster_labels):

    max_duration = get_greater_trajectory_duration(df_values, df_timestamps)
    
    for e in dict_legend:    

        # Set figure size
        plt.figure(figsize=(15, 6))

        # For each trajectory id
        for i, index in enumerate(df_values.index.values):
            # Get values and timestamps from the trajectory
            values_filter, timestamps_filter = get_filtered_trajectory_values_timestamps(index, df_values, df_timestamps)
            # Plot trajectories
            label = cluster_labels[i]
            if label == e:
                plt.plot(timestamps_filter, values_filter, marker='o',linestyle='--', color=dict_legend.get(cluster_labels[i]))

        # Set title
        plt.title('Trajectories starting from early follow-up with ' + score_var, size=20)
        # Set x axis label
        plt.xlabel('Duration in days', size=15)
        # Set y axis label
        plt.ylabel('Score ' + score_var, size=15)

        # Set up Legend
        label_elements = [Line2D([0], [0], marker='o', color='w', label= "Cluster " + str(e), markerfacecolor=dict_legend[e], markersize=10)]
        # Set x axis limit
        plt.xlim(-20, max_duration + 50)
        # plot legend
        plt.legend(handles=label_elements, fontsize=15)

        plt.show()

In [None]:
def plot_clusters(data, model, dict_legend, cluster_labels):
    
    """
    Plot clusters from a model for each data point.
    :param data: dimensions to plot
    :param dict_legend: Dictionary that contains a color for each label
    :param cluster_labels: List of labels for each data point
    """
    
    # Set figure size
    fig = plt. figure(figsize=(10,8))
    
    # Plot data points
    for i, row in enumerate(data):
        plt.scatter(row[0], row[1], color = dict_legend[cluster_labels[i]])

    # Plot centers
    if isinstance(model, KMedoids):
        plt.plot( model.cluster_centers_[:, 0], model.cluster_centers_[:, 1], 'o', 
                markerfacecolor='orange',
                markeredgecolor='k',
                markersize=10,)
        plt.title("K-medoid with " + score_var, size=20)

    elif isinstance(model, DBSCAN):
        plt.title('DBSCAN with ' + score_var, size=20)

    # Set up Legend
    label_elements = [Line2D([0], [0], marker='o', color='w', label="Cluster " + str(e), markerfacecolor = dict_legend[e], markersize=10) for e in dict_legend]
    # plot legend
    plt.legend(handles = label_elements, fontsize = 15)

In [None]:
def plot_silhouette(data, model, cluster_labels):
    
    """
    Plot silhouette metric from label data points.
    :param data: dimensions to plot
    :param model: Clustering model
    :param cluster_labels: List of labels for each data point
    """
    
    if len(np.unique(cluster_labels)) < 2:
        raise ValueError("Cannot continue, number of differents labels has to be >= 2")
    
    # Set figure
    fig, ax = plt.subplots(2, 2, figsize=(15,10))

    for n_clusters in [2, 3, 4, 5]:

        q, mod = divmod(n_clusters, 2)
        
        # Fit clustering model
        if isinstance(model, SpectralClustering):
            model = SpectralClustering(n_clusters, affinity=affinity)
        elif isinstance(model, KMedoids):
            model = KMedoids(n_clusters=n_clusters, metric=metric, method=method)
        else:
            raise ValueError("This clustering model cannot plot silhouette")
            
        cluster_labels = model.fit_predict(data)
        
        # Compute averaged silhouette for n_clusters 
        silhouette_avg = silhouette_score(data, cluster_labels)

        # The 1st subplot is the silhouette plot
        # The silhouette coefficient can range from -1, 1
        ax[q-1][mod].set_xlim([-1, 1])
        # The (n_clusters+1)*10 is for inserting blank space between silhouette
        # plots of individual clusters, to demarcate them clearly.
        ax[q-1][mod].set_ylim([0, len(tr_id_list) + (n_clusters + 1) * 15])

        # Compute the silhouette scores for each sample
        sample_silhouette_values = silhouette_samples(data, cluster_labels)

        y_lower = 10
        for i in range(n_clusters):
            # Aggregate the silhouette scores for samples belonging to
            # cluster i, and sort them
            ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]

            ith_cluster_silhouette_values.sort()

            size_cluster_i = ith_cluster_silhouette_values.shape[0]
            y_upper = y_lower + size_cluster_i

            color = cm.nipy_spectral(float(i) / n_clusters)
            ax[q-1][mod].fill_betweenx(
                    np.arange(y_lower, y_upper),
                    0,
                    ith_cluster_silhouette_values,
                    facecolor=color,
                    edgecolor=color,
                    alpha=0.7,
            )

            # Label the silhouette plots with their cluster numbers at the middle
            ax[q-1][mod].text(-0.05, y_lower + 0.4 * size_cluster_i, str(i))

            # Compute the new y_lower for next plot
            y_lower = y_upper + 20  # 10 for the 0 samples

        ax[q-1][mod].set_title(str(n_clusters) + " Clusters", size=15)
        ax[q-1][mod].set_xlabel("The silhouette coefficient values", size=13)
        ax[q-1][mod].set_ylabel("Cluster label", size=13)

        # The vertical line for average silhouette score of all the values
        ax[q-1][mod].axvline(x=silhouette_avg, color="red", linestyle="--")

        ax[q-1][mod].set_yticks([])  # Clear the yaxis labels / ticks
        ax[q-1][mod].set_xticks([-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1])


    plt.suptitle(str(type(model).__name__) + " silhouette score for several number of clusters with " + score_var, fontsize=25, fontweight="bold")

    plt.show()