In [4]:
import pandas as pd
import numpy as np
import datetime
from matplotlib import pyplot as plt
import plotly.graph_objects as go
#from dtw import *
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean, hamming

#function that transforms input file in a dataframe with necessary features
def init_total_dataset(file_name):
    df = pd.read_csv(file_name)
    #sensor names in categorical value
    df['sensor_name'] = df['sensor_name'].astype('category')
    #transform sensors state in binary value
    df['sensor_state'] = df['sensor_state'].map({'ON': 1, 'OFF': 0}) 
    #convert datetime strings to datetime type
    df['datetime'] = pd.to_datetime(df['datetime'])
    #add column for seconds passed from the beginning
    starting_time = df['datetime'][0].timestamp()
    #ending_time = df['datetime'][len(df) - 1].timestamp()
    df['timestamp'] = df['datetime'].apply(lambda x: x.timestamp() - starting_time)
    return df
    
#function used to create a dictionary of dataframes: key is the sensor name, the dataframe refers to that specific sensor
def build_datasets(sensor_names, total_df):
    dfs_dictionary = {}
    for sensor in sensor_names:
        sensors_df = total_df[total_df['sensor_name'] == sensor]
        sensors_df = sensors_df.reset_index(drop=True)
        dfs_dictionary[sensor] = sensors_df
    return dfs_dictionary

#function that, from an array of timestamps, gets the corresponding signal from a dataset
def get_signal_from_timestamps(timestamps, sensor_df):
    df = sensor_df.copy(deep = True)
    result = []
    current_state = int(1 - df.loc[0, 'sensor_state'])
    for t in timestamps:
        if len(df) > 0 and df.loc[0, 'timestamp'] == t:
            current_state = int(1 - current_state)
            df = df.iloc[1:]
            df = df.reset_index(drop=True)
        result.append(current_state)
    return result

#function that builds a sensor signal dataframe
def prepare_sensor_signal_df(sensor_df, starting_timestamp, ending_timestamp):
    timestamps = range(int(starting_timestamp),int(ending_timestamp))
    signal = get_signal_from_timestamps(timestamps, sensor_df)
    d = {'timestamp': timestamps, 'sensor_state': signal}
    sensor_signal_df = pd.DataFrame(data = d)
    return sensor_signal_df
    
#function that plots a sensor signal dataframe
def plot_sensor_signal(df, x_limit=None):
    if x_limit and x_limit > len(df):
        print("X-axis limit is greater than the dataset dimension. Plotting the whole dataset.")
        N = len(df)
    elif x_limit:
        N = x_limit
    else:
        N = len(df)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.loc[:N, 'timestamp'], y=df.loc[:N, 'sensor_state'],
                        mode='lines',
                        name='lines'))
    fig.show()
    

In [5]:
df = init_total_dataset('dataset.csv')
sensors = df['sensor_name'].unique()
starting_time = df['timestamp'][0]
ending_time = df['timestamp'][len(df) - 1]

dfs = build_datasets(sensors, df)


In [6]:
bed = prepare_sensor_signal_df(dfs['bed'], starting_time, ending_time)

In [7]:
toilet = prepare_sensor_signal_df(dfs['toilet'], starting_time, ending_time)

In [None]:
plot_sensor_signal(bed)

In [45]:
alignment = dtw(bed.loc[:8400,'sensor_state'], toilet.loc[:8400,'sensor_state'], keep_internals=True)

alignment.plot(type="threeway")

In [None]:
x = np.array([[row['timestamp'], row['sensor_state']] for index, row in bed.iterrows()])

In [56]:
y = np.array([[row['timestamp'], row['sensor_state']] for index, row in toilet.iterrows()])

In [11]:
x = bed.to_numpy()
y = toilet.to_numpy()

In [12]:
distance, path = fastdtw(x[:86400], y[:86400], dist=hamming)
print(distance)

27280.0
