In [None]:
def label_trajectories_by_stat(df_values, df_timestamps, coef, target):
    
    """
    Label each trajectory from the chosen statistic
    
    :param df_values: Dataframe that contains values from trajectories
    :param df_timestamps: Dataframe that contains timestamps from trajectories
    :param coef: Numerical value that tweak clustering result
    :param target: statistic that we want to discretize into several groups
    :return: Dataframe that contains group label predicted for each patient ID
    """
    
    # Get 
    df_tr_stats = get_trajectory_statistics(df_values, df_timestamps)
    
    df_labels = pd.DataFrame({'LABEL': []})
    avg_dist = np.average(df_tr_stats[target])
    std_dist = np.std(df_tr_stats[target])
    
    for index in df_values.index.values:
        
        # Get values and timestamps from the trajectory
        values_filter, timestamps_filter = get_filtered_trajectory_values_timestamps(index, df_values, df_timestamps)
        
        # Compute difference between first data point and last data point
        if target == "AVG":
            score = np.average(values_filter)
        if target == "STD":
            score = np.std(values_filter)
        if target == "PED":
            score = get_periodicity(timestamps_filter)
        if target == "NBPOINT":
            score = len(values_filter)
        if target == "DIFF_TENDENCY_SCORE":
            score = (values_filter[-1] - values_filter[0])/(v_filter[0])
        if target == "DIFF_TENDENCY_TIMESTAMP":
            score = (values_filter[-1] - values_filter[0])/(timestamps_filter[-1])
        if target == "DIFF_TENDENCY_NBPOINT":
            score = (values_filter[-1] - values_filter[0])/(len(timestamps_filter))
        if target == "DURATION":
            score = timestamps_filter[-1]
        
        # Fill dataframe with corresponding label
        if score <= avg_dist + coef * std_dist: df_labels.loc[index] = ["High"]
        elif score > avg_dist - coef * std_dist: df_labels.loc[index] = ["Low"]
        else: df_labels.loc[index] = ["Unknow"]
        
    # Concatenate two previous datframes
    df_tr_labels = pd.concat([df_tr_stats, df_labels], axis=1)
    
    # Rename index as ID patient
    df_tr_labels.index.rename("ID")
    
    return df_tr_labels

In [None]:
def label_trajectories_by_umap(embed, cluster_method, cluster_params):
    
    """
    Label each trajectory with UMAP from the chosen clustering model.
    
    :param embed: UMAP dimensions
    :param cluster_method: String that specify the clustering approach
    :param cluster_params: List of parameters for the clustering model
    :return: Dataframe that contains group label predicted for each patient ID
    """

    # Set Clustering
    if cluster_method == "kmedoid":
        n_clusters = cluster_params[0]
        metric = cluster_params[1]
        method = cluster_params[2]
        
        # Fit Model
        km = KMedoids(n_clusters=n_clusters, metric=metric, method=method)
        cluster_labels = km.fit(embed)
        
        # Get legend for each label
        dict_legend = get_dict_legend(cluster_labels.labels_)
        
        # Plot created clusters with umap
        plot_clusters(embed, km, dict_legend, cluster_labels.labels_)
        plot_silhouette(embed, km, cluster_labels.labels_)
        
    elif cluster_method == "dbscan":
        eps = cluster_params[0]
        min_samples = cluster_params[1]
        
        # Fit Model
        dbscan = DBSCAN(eps=eps, min_samples=min_samples)
        cluster_labels = dbscan.fit(embed)
        
        # Get legend for each label
        dict_legend = get_dict_legend(cluster_labels.labels_)
        
        # Plot created clusters with umap
        plot_clusters(embed, dbscan, dict_legend, cluster_labels.labels_)
    
    df_tr_labels = get_trajectory_statistics(df_values, df_timestamps)
    df_tr_labels["LABEL"] = "C" + pd.DataFrame({'LABEL': cluster_labels.labels_})['LABEL'].astype(str)
    
    return df_tr_labels