# Import librairies

In [2]:
# Import general librairies
import numpy as np
import pandas as pd

# Import librairies to connect with the database
import sqlalchemy

# Import math libraries
import math

# Import matplotlib libraries
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# Import twed
import pytwed

# Import random libraries
import random

# Import UMAP library
import umap.umap_ as umap

# Import clustering librairies
from sklearn_extra.cluster import KMedoids
from sklearn.cluster import DBSCAN

# Import silhouette score librairies
from sklearn.metrics import silhouette_samples, silhouette_score
from yellowbrick.cluster import SilhouetteVisualizer
import matplotlib.cm as cm

# Import Rand index librairy
from sklearn.metrics.cluster import adjusted_rand_score

# Import interactive librairies
import bokeh.plotting as bpl
from bokeh.plotting import figure, show

# Import classification metrics
from sklearn.metrics import f1_score, accuracy_score, recall_score

# Import Confusion matrix librairies
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

# Import train test split librairy
from sklearn.model_selection import train_test_split

# Import Classifier
from sklearn.tree import DecisionTreeClassifier

# Import librairie to plot trees
from sklearn import tree

# Import Cross Validation librairies
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

# Import undersampling librairies
from imblearn.under_sampling import CondensedNearestNeighbour
from imblearn.under_sampling import NearMiss
from imblearn.under_sampling import TomekLinks
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.under_sampling import OneSidedSelection

# Import statistic test librairies
from scipy.stats import shapiro
from scipy.stats import kstest

# Import scipy librairy
import scipy

# disable chained assignments
pd.options.mode.chained_assignment = None

In [1]:
# Import useful notebooks
%run Tools/prepare_trajectory.ipynb
%run Tools/plot_trajectory.ipynb
%run Tools/get_data.ipynb
%run Tools/label_trajectory.ipynb
%run Tools/statistic_trajectory.ipynb
%run Tools/classification_trajectory.ipynb
%run Tools/misc_trajectory.ipynb

# Create Cost Matrix

In [14]:
def create_matrix(tr_id_list, df_values, df_timestamps, nu, lmbda):
    
    """
    Create matrix from trajectories with TWED similarity measure for each trajectory pair.
    
    :param tr_id_list: List that contains patient ID
    :param df_values: Dataframe that contains values from trajectories
    :param df_timestamps: Dataframe that contains timestamps from trajectories
    :param nu: TWED parameter. Add penalty on trajectory elasticity
    :param nu: TWED parameter. Add penalty on trajectory when deleting an appointment
    :return: Cost matrix
    """

    # Set cost matrix with zeros
    cost_matrix = np.zeros((len(tr_id_list), len(tr_id_list))).copy()

    # For each line from the matrix
    for i, tr1_id in enumerate(tr_id_list):
        # For each column from the matrix
        for y, tr2_id in enumerate(tr_id_list):

            # Get values and timestamps from the trajectory
            v1, t1 = get_trajectory_values_timestamps(tr1_id, df_values, df_timestamps)
            v2, t2 = get_trajectory_values_timestamps(tr2_id, df_values, df_timestamps)

            # Filter trajectories by removing null values
            v1_filter, t1_filter = filter_trajectory(v1, t1)
            v2_filter, t2_filter = filter_trajectory(v2, t2)

            # Compute twed
            cost = pytwed.twed(
                v1_filter,
                v2_filter,
                t1_filter,
                t2_filter,
                nu = nu,
                lmbda = lmbda,
            p = 2)

            # Set cost into cost matrix
            cost_matrix[i][y] = cost
            cost_matrix[y][i] = cost
            
    return cost_matrix