In [1]:
import warnings
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown
from scipy import signal
import numpy as np

In [None]:
train_events = pd.read_csv("C:\\Users\\Tolga\\Downloads\\train_events.csv")
train_series = pd.read_parquet("C:\\Users\\Tolga\Downloads\\train_series.parquet")
train_events.info()

In [None]:
series_has_NaN = train_events.groupby('series_id')['step'].apply(lambda x: x.isnull().any())
series_has_NaN.value_counts()
df_has_NaN = series_has_NaN.to_frame()
df_has_NaN.reset_index(inplace=True)
notNaN = df_has_NaN.loc[df_has_NaN.step == False]["series_id"].to_list()
print(f"Number of series that do not contain NaN values: {len(notNaN)}, {notNaN}")

In [None]:
train_events = train_events.dropna()

In [None]:
def get_train_series(train_series, train_events, series):
    current_series = train_series[train_series["series_id"] == series]
    current_events = train_events[train_events["series_id"] == series]

    # cleaning etc.
    current_events = current_events.dropna()
    current_events["step"] = current_events["step"].astype("int")
    current_events["awake"] = current_events["event"].replace({"onset": 1, "wakeup": 0})

    train = pd.merge(current_series, current_events[['step', 'awake']], on='step', how='left')
    train["awake"] = train["awake"].bfill(axis='rows')
    # final section:
    # train_events.groupby('series_id').tail(1)["event"].unique()
    # Result: the last event is always a "wakeup"
    train['awake'] = train['awake'].fillna(1)  # awake
    train["awake"] = train["awake"].astype("int")
    return train

In [None]:
clean_train_data = []
import sys
sys.path.insert(1,'../src')
from spectogram import spectogram

for series_id in notNaN[:1]:
    train = get_train_series(train_series, train_events, series_id)
    print(train.dtypes)
    clean_train_data.append(train)

    # get the indices of when a state transition occurs to visualize in the stft plots
    #display(train['awake'])
    diffs = np.diff(train['awake'])
    idxs = np.where(np.abs(diffs) == 1)
    print(np.shape(idxs))
    # the timestamps are 5 seconds per step
    timestamps = np.multiply(idxs, [5])
    # separate the anglez and enmo values for later
    anglez = train['anglez']
    enmo = train['enmo']
    # use stft from scipy to generate fft over time curve
    # Important: To be able to use fft and stft we need to know the sampling freq.
    # ie. frequency at which our data was collected. Since we get 1 sample every 5 seconds our
    # Fs = 0.2 Hz
    fs = 0.2

    # visualize anglez over steps
    display(Markdown('###  anglez for series ' + series_id))
    plt.figure(figsize=(20, 3))
    sns.lineplot(data=train, x="step", y="anglez",hue="awake", linewidth = 0.5)
    plt.show()

    # do stft on the anglez data
    f, t, Zxx = signal.stft(anglez.to_numpy(), fs, nperseg=256)
    Y_abs = np.abs(Zxx)

    #plot the stft results for anglez
    plt.figure(figsize=(20, 3))
    #Set the max scale of the colormesh to mean+5*std otherwise outliers make the graph unreadable
    plt.pcolormesh(t, f, np.abs(Zxx),vmax=np.mean(Y_abs)+5*np.std(Y_abs),shading='gouraud')
    plt.vlines(timestamps, ymin=0, ymax = fs/2)
    plt.title('STFT Magnitude for anglez')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [s]')
    plt.show()

    # display the enmo values over time
    display(Markdown('###  enmo for series ' + series_id))
    plt.figure(figsize=(20, 3))
    sns.lineplot(data=train, x="step", y="enmo",hue="awake", linewidth = 0.5)
    plt.show()

    # calculate the stft results for the enmo
    f, t, Zxx = signal.stft(enmo.to_numpy(), fs, nperseg=256)
    Y_abs = np.abs(Zxx)
    #img = spectogram(plot = False, y = enmo.to_numpy(), fs = fs, nperseg = 256)
    #plt.imshow(img)

    #plot the stft results for the enmo
    plt.figure(figsize=(20, 3))
    #Set the max scale of the colormesh to mean+5*std otherwise outliers make the graph unreadable
    plt.pcolormesh(t, f, np.abs(Zxx),vmax=np.mean(Y_abs)+5*np.std(Y_abs),shading='gouraud')
    plt.vlines(timestamps, ymin=0, ymax = fs/2)
    plt.title('STFT Magnitude for enmo')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [s]')

    del train