In [None]:
%pip install numpy pandas

## Load and Preprocess the Dataset

In [1]:
import pandas as pd

# Load dataset 
dataset_path = 'dtw_test.csv'
data = pd.read_csv(dataset_path)

In [2]:
import numpy as np

# Convert string representation of lists to actual lists
data['series_a'] = data['series_a'].apply(lambda x: np.array(eval(x)))
data['series_b'] = data['series_b'].apply(lambda x: np.array(eval(x)))

## Dynamic Time Warping (DTW) Implementation

In [3]:
# Euclidean Distance Calculation
def euclidean_distance(a, b):
    return np.sqrt((a - b) ** 2)

In [4]:
# DTW Distance between 2 given time series (a, b)
def dtw_distance(series_a, series_b):
    n, m = len(series_a), len(series_b)
    dtw_matrix = np.full((n + 1, m + 1), np.inf)
    dtw_matrix[0, 0] = 0

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = euclidean_distance(series_a[i - 1], series_b[j - 1])
            dtw_matrix[i, j] = cost + min(dtw_matrix[i - 1, j - 1],
                                          dtw_matrix[i - 1, j],    
                                          dtw_matrix[i, j - 1])   
                                           
    
    return dtw_matrix[n, m]

In [None]:
import time

# Compute DTW distances between all given sets of time series
results = []
start_time = time.time()

for idx, row in data.iterrows():
    distance = dtw_distance(row['series_a'], row['series_b'])
    results.append([idx + 1, distance])

end_time = time.time()
time_taken = end_time - start_time
print(f'Total time taken: {time_taken:.4f} seconds')

Total time taken: 2730.1174 seconds


In [None]:
# Save results to CSV
results_df = pd.DataFrame(results, columns=['id', 'DTW distance'])
results_df.to_csv('dtw.csv', index=False)