In [6]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine

def load_data(file_path):
    """Load data from a CSV file and return it as a NumPy array."""
    df = pd.read_csv(file_path)
    return df.values[:, 1:]  # Skip the header and return only the data

def dtw_distance(seq1, seq2):
    """Calculate the DTW distance between two sequences."""
    n, m = len(seq1), len(seq2)
    dtw_matrix = np.zeros((n + 1, m + 1))
    
    # Initialize the DTW matrix
    for i in range(n + 1):
        dtw_matrix[i][0] = float('inf')
    for j in range(m + 1):
        dtw_matrix[0][j] = float('inf')
    dtw_matrix[0][0] = 0

    # Fill the DTW matrix
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = cosine(seq1[i - 1], seq2[j - 1])  # Calculate the cosine distance
            dtw_matrix[i][j] = cost + min(dtw_matrix[i - 1][j],    # Insertion
                                           dtw_matrix[i][j - 1],    # Deletion
                                           dtw_matrix[i - 1][j - 1])  # Match

    return dtw_matrix

def optimal_warping_path(dtw_matrix):
    """Retrieve the optimal warping path from the DTW matrix."""
    i, j = dtw_matrix.shape[0] - 1, dtw_matrix.shape[1] - 1
    path = []

    while i > 0 or j > 0:
        path.append((i - 1, j - 1))
        if i == 0:
            j -= 1
        elif j == 0:
            i -= 1
        else:
            # Move to the minimum of the three possible predecessors
            if dtw_matrix[i - 1][j] < dtw_matrix[i][j - 1] and dtw_matrix[i - 1][j] < dtw_matrix[i - 1][j - 1]:
                i -= 1
            elif dtw_matrix[i][j - 1] < dtw_matrix[i - 1][j]:
                j -= 1
            else:
                i -= 1
                j -= 1

    path.reverse()  # Reverse the path to get it from start to end
    return path

def calculate_dtw(datafile1, datafile2):
    """Calculate the DTW distance and return the optimal warping path."""
    seq1 = load_data(datafile1)
    seq2 = load_data(datafile2)

    dtw_matrix = dtw_distance(seq1, seq2)
    path = optimal_warping_path(dtw_matrix)

    return dtw_matrix, path

dtw_matrix, path = calculate_dtw('datafile1.csv', 'datafile2.csv')
print("Optimal Warping Path:", path)
print("DTW Distance:")
print(dtw_matrix)

Optimal Warping Path: [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (17, 17), (18, 18)]
DTW Distance:
[[0.00000000e+00            inf            inf            inf
             inf            inf            inf            inf
             inf            inf            inf            inf
             inf            inf            inf            inf
             inf            inf            inf            inf]
 [           inf 0.00000000e+00 4.15765644e-08 3.09423196e-07
  9.69425659e-07 1.88629472e-06 6.21188448e-06 1.25061204e-05
  2.12907051e-05 3.40847666e-05 1.38433365e-04 2.71869172e-04
  4.09250096e-04 5.71518639e-04 7.52721508e-04 9.47863254e-04
  1.15531675e-03 1.36802456e-03 1.56302761e-03 1.73527694e-03]
 [           inf 4.15765644e-08 0.00000000e+00 3.31756003e-07
  1.09604147e-06 1.98725044e-06 6.50845397e-06 1.28105036e-05
  2.15427592e-05 3.44175369e-05 1.40498850e-04 

In [5]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean

def load_data(file_path):
    """Load data from a CSV file and return it as a NumPy array."""
    df = pd.read_csv(file_path)
    return df.values[:, 1:]  # Skip the header and return only the data

def dtw_distance(seq1, seq2):
    """Calculate the DTW distance between two sequences using Euclidean distance."""
    n, m = len(seq1), len(seq2)
    dtw_matrix = np.zeros((n + 1, m + 1))
    
    # Initialize the DTW matrix
    for i in range(n + 1):
        dtw_matrix[i][0] = float('inf')
    for j in range(m + 1):
        dtw_matrix[0][j] = float('inf')
    dtw_matrix[0][0] = 0

    # Fill the DTW matrix
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = euclidean(seq1[i - 1], seq2[j - 1])  # Calculate the Euclidean distance
            dtw_matrix[i][j] = cost + min(dtw_matrix[i - 1][j],    # Insertion
                                           dtw_matrix[i][j - 1],    # Deletion
                                           dtw_matrix[i - 1][j - 1])  # Match

    return dtw_matrix

def optimal_warping_path(dtw_matrix):
    """Retrieve the optimal warping path from the DTW matrix."""
    i, j = dtw_matrix.shape[0] - 1, dtw_matrix.shape[1] - 1
    path = []

    while i > 0 or j > 0:
        path.append((i - 1, j - 1))
        if i == 0:
            j -= 1
        elif j == 0:
            i -= 1
        else:
            # Move to the minimum of the three possible predecessors
            if dtw_matrix[i - 1][j] < dtw_matrix[i][j - 1] and dtw_matrix[i - 1][j] < dtw_matrix[i - 1][j - 1]:
                i -= 1
            elif dtw_matrix[i][j - 1] < dtw_matrix[i - 1][j]:
                j -= 1
            else:
                i -= 1
                j -= 1

    path.reverse()  # Reverse the path to get it from start to end
    return path

def calculate_dtw(datafile1, datafile2):
    """Calculate the DTW distance and return the optimal warping path."""
    seq1 = load_data(datafile1)
    seq2 = load_data(datafile2)

    dtw_matrix = dtw_distance(seq1, seq2)
    path = optimal_warping_path(dtw_matrix)

    return dtw_matrix, path

# Example usage
dtw_matrix, path = calculate_dtw('datafile1.csv', 'datafile2.csv')
print("Optimal Warping Path:", path)
print("DTW Distance:")
print(dtw_matrix)

Optimal Warping Path: [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (17, 17), (18, 18)]
DTW Distance:
[[  0.                  inf          inf          inf          inf
           inf          inf          inf          inf          inf
           inf          inf          inf          inf          inf
           inf          inf          inf          inf          inf]
 [         inf   0.           0.61319622   1.91206496   3.97946429
    6.50892282  11.52112912  17.66388862  24.97195017  33.88710143
   65.35459237 100.52401879 135.90159147 173.30470761 212.0037742
  251.70721971 292.41933049 333.76190393 374.44762012 413.99255796]
 [         inf   0.61319622   0.           1.29462341   3.35398387
    5.79147545  10.80857946  16.91586603  24.14331717  32.9750989
   64.32788996  99.39433226 134.66969768 172.0018942  210.65198071
  250.31737416 290.99565104 332.30006813 372.92032471 41