In [1]:
import numpy as np
from dtw import dtw

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [2]:
def normalized_dtw_distance(series1: np.ndarray, series2: np.ndarray) -> float:
    """
    Computes the normalized DTW distance between two time series.

    This function calculates the DTW distance between two input time series
    and normalizes it by the length of the longer series to provide a
    length-invariant measure of similarity.

    Parameters:
    series1 (np.ndarray): The first time series.
    series2 (np.ndarray): The second time series.

    Returns:
    float: The normalized DTW distance.
    """
    # Compute DTW distance using the dtw package
    alignment = dtw(series1, series2, keep_internals=True)
    dtw_distance = alignment.distance

    # Determine the length of the longer time series
    max_length = max(len(series1), len(series2))

    # Normalize the DTW distance by the length of the longer series
    normalized_distance = dtw_distance / max_length

    return normalized_distance


In [3]:
# Example time series data
ts1 = np.array([1, 2, 3, 4, 5])
ts2 = np.array([2, 3, 4])

# Compute normalized DTW distance
distance = normalized_dtw_distance(ts1, ts2)
print(f"Normalized DTW Distance: {distance}")


Normalized DTW Distance: 0.4


In [4]:
# Example time series data
ts1 = np.array([1, 2, 3, 4, 5])
ts2 = np.array([2, 10, 4])

# Compute normalized DTW distance
distance = normalized_dtw_distance(ts1, ts2)
print(f"Normalized DTW Distance: {distance}")

Normalized DTW Distance: 2.0


# Load the data and compute the normalized DTW distance from event with largest peak intensity to other events

In [5]:
path_to_data = 'C:/Users/the_3/Documents/github/keras-functional-api/data/electron_cme_data_split_v8/full'


In [8]:
import pandas as pd
import os

# Read all files to get event IDs and extract time series
event_data = []
for file_name in os.listdir(path_to_data):
    if file_name.endswith('_ie_trim.csv'):
        event_id = int(file_name.split('_')[2])  # Extract event number
        file_path = os.path.join(path_to_data, file_name)
        df = pd.read_csv(file_path)
        
        # Get proton intensity time series after log transform
        log_proton_series = np.log1p(df['Proton Intensity'])
        
        # Get peak values
        peak_intensity = np.max(df['Proton Intensity'])
        log_peak_intensity = np.log1p(peak_intensity)
        peak_delta = np.max(df['delta_log_Intensity'])
        
        event_data.append({
            'event_id': event_id,
            'time_series': log_proton_series,
            'peak_intensity': peak_intensity,
            'log_peak_intensity': log_peak_intensity,
            'peak_delta': peak_delta
        })

# Use event 12 as anchor
anchor_event = next(event for event in event_data if event['event_id'] == 12)
anchor_series = anchor_event['time_series']

# Calculate distances
distances = []
for event in event_data:
    distance = normalized_dtw_distance(anchor_series, event['time_series'])
    distances.append({
        'event_id': event['event_id'],
        'distance': distance,
        'peak_intensity': event['peak_intensity'],
        'log_peak_intensity': event['log_peak_intensity'], 
        'peak_delta': event['peak_delta']
    })

# Create DataFrame and sort by distance
results_df = pd.DataFrame(distances)
results_df = results_df.sort_values('distance')

# Display the results
print("\nResults table (sorted by distance from Event 12):")
print(results_df.to_string(index=False, float_format=lambda x: '{:.4f}'.format(x)))



Results table (sorted by distance from Event 12):
 event_id  distance  peak_intensity  log_peak_intensity  peak_delta
       12    0.0000        353.9040              5.8718      1.7164
       10    0.1807        229.3620              5.4397      1.3985
       31    0.2302         93.0270              4.5436      0.5265
       44    0.2386         78.7868              4.3794      2.3158
       27    0.2488         81.2694              4.4100      1.2491
       38    0.2677        129.7824              4.8735      1.2445
       13    0.3746         41.7726              3.7559      1.7283
       11    0.3801         58.6158              4.0879      1.0971
       35    0.4717         25.2998              3.2696      0.9356
       43    0.4924         63.7880              4.1711      1.0562
       28    0.5651         19.8000              3.0350      1.1481
        5    0.5766         20.4640              3.0664      1.0373
       19    0.8202          9.9734              2.3955      1.04