In [3]:
def get_periodicity(timestamps):
    
    """
    Compute average periodicity for a trajectory.
    
    :param timestamps: List of timestamps for a trajectory 
    :return: Average periodicity
    """
    
    periodicities = []
    for i, t in enumerate(timestamps):
        if i + 1 < len(timestamps):
            periodicities.append(np.abs(timestamps[i+1] - timestamps[i]))
    return np.average(periodicities)

In [4]:
def compute_statistics(values, timestamps):
    
    """
    Compute statistics from a trajectory.
    :param values: List of values from a trajectory
    :param timestamps: List of timestamps from a trajectory 
    :return: Vector of several computed statistics
    """
    
    # Compute statistics
    avg = np.average(values)
    std = np.std(values)
    ped = get_periodicity(timestamps)
    nb_point = len(values)
    duration = timestamps[-1]

    # Compute difference between first data point and last data point
    diff_tendency_timestamp = (values[-1] - values[0])/(timestamps[-1])
    diff_tendency_score = (values[-1] - values[0])/(values[0])
    diff_tendency_nbpoint = (values[-1] - values[0])/(len(timestamps))

    
    return avg, std, ped, nb_point, duration, diff_tendency_timestamp, diff_tendency_score, diff_tendency_nbpoint

In [5]:
def get_trajectory_statistics(df_values, df_timestamps):
    
    """
    Create dataframe that contains several computed statistics from each trajectory.
    :param df_values: Dataframe that contains values from trajectories
    :param df_timestamps: Dataframe that contains timestamps from trajectories
    :return: Vector of several computed statistics
    """

    # Create Dataframe with these statistics
    df_tr_stats = pd.DataFrame({'AVG':[], 'STD':[], 'PED':[], 'NBPOINT':[], 'DURATION':[],
                                'DIFF_TENDENCY_TIMESTAMP':[], 'DIFF_TENDENCY_SCORE':[], 'DIFF_TENDENCY_NBPOINT':[]})
    
    for index in np.unique(df_values.index):

        # Get values and timestamps from the trajectory
        values_filter, timestamps_filter = get_filtered_trajectory_values_timestamps(index, df_values, df_timestamps)

        # Compute statistics
        avg, std, ped, np_point, duration, diff_tendency_timestamp, diff_tendency_score, diff_tendency_nbpoint = compute_statistics(values_filter, timestamps_filter)
        
        # Fill dataframe
        df_tr_stats.loc[index] = [avg, 
                                  std, 
                                  ped, 
                                  np_point,
                                  duration,
                                  diff_tendency_timestamp, 
                                  diff_tendency_score, 
                                  diff_tendency_nbpoint]

    # Rename index as ID patient
    df_tr_stats.index.rename("ID")
    
    return df_tr_stats

In [None]:
def get_score_coef(y_true_name, y_pred_name, coefficients, trajectory_dataframes, target, average, encoding_rules=None):
    
    """
    Compute several metrics (ACCURACY, RECALL...) from a list of coefficients
    :param y_true_name: Variable name to predict
    :param y_pred_name: Variable name predicted
    :param coefficients: List of coefficients to test
    :param trajectory_dataframes: List of dataframes that contains [df_values, df_timestamps, df_cat]
    :param target: Variable name to discretize into groups
    :param average: required to set the way how metrics are computed
    :param encoding_rules: List of rules to encode categorical variables from y_true and y_pred. (Optional)
    :return: Dataframe that contains score metrics for each coefficient
    """
    
    # Set dataframe with results
    df_scores = pd.DataFrame({"COEF":[], "ACCURACY":[], "RECALL":[], "F1":[], "ARI":[]})

    # For each coef
    for tmp_coef in coefficients:
        
        # Join statistic labeled (Predicted values) and categorical values (True values)
        df_tr_labels = label_trajectories_by_stat(trajectory_dataframes[0], trajectory_dataframes[1], tmp_coef, target)
        df_tr_labels_cat = df_tr_labels.join(trajectory_dataframes[2])[[y_true_name, y_pred_name]]

        # Encode categorical value into numerical
        if encoding_rules != None: df_tr_labels_cat = df_tr_labels_cat.replace(encoding_rules)

        # Get labels and categorical values
        y_true = np.array(df_tr_labels_cat[y_true_name]).tolist()
        y_pred = np.array(df_tr_labels_cat[y_pred_name]).tolist()

        # Get metric scores
        f1 = f1_score(y_true, y_pred, average=average)
        accuracy = accuracy_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred, average=average)
        adjusted_ri = adjusted_rand_score(y_true, y_pred)

        # Store results into dataframe
        df_scores.loc[len(df_scores.index)] = [tmp_coef, accuracy, recall, f1, adjusted_ri]

    return df_scores.sort_values(by="ARI", ascending=False)