In [1]:
import sys
import gc
sys.path.append('../')

from scoring.event_detection_matrix import competition_score
from models.mlp import PyTorchMLP, LightningModel

from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm

## Load Model

In [2]:
checkpoint_path = "../models/neural-nappers/dsgyuxte/checkpoints/epoch=1-step=394540.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

## Load Data

In [3]:
df_train_series = pd.read_parquet('../data/processed/train_series_split.parquet')
df_validation_series = pd.read_parquet('../data/processed/validation_series_split.parquet')
df_validation_events = pd.read_csv('../data/processed/validation_events_split.csv')

In [4]:
df_validation_series = df_validation_series[df_validation_series.series_id == '038441c925bb']

In [5]:
df_validation_events = df_validation_events[df_validation_events.series_id == '038441c925bb']

## Prepare Data

In [6]:
FEATURES = ['anglez', 'enmo',
            'hour',
            'anglez_abs', 'anglez_diff', 'enmo_diff', 'anglez_x_enmo',
            'anglez_rolling_mean', 'enmo_rolling_mean', 'anglez_rolling_max', 'enmo_rolling_max', 'anglez_rolling_min',
            'anglez_rolling_std', 'enmo_rolling_std']

LABEL = ['awake']

In [7]:
X_train = df_train_series[FEATURES].astype('float32')
scaler = StandardScaler()
scaler.fit(X_train.values)

In [8]:
del X_train
del df_train_series
gc.collect()

198

## Predict

In [9]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
print(device)

cpu


In [10]:
def predict(row):
    X, y = row[FEATURES].astype('float32'), row[LABEL].astype('int64')
    X = np.expand_dims(X, axis=0)
    X = scaler.transform(X)
    X = torch.from_numpy(X)
    with torch.no_grad():
        logits = model(X)
    label = torch.argmax(logits, dim=-1).item()
    confidence = torch.softmax(logits, dim=-1)
    confidence_0 = confidence[0][0].item()
    confidence_1 = confidence[0][1].item()
    return label, confidence_0, confidence_1

In [11]:
model.eval()
for index, row in tqdm(df_validation_series.iterrows(), total=df_validation_series.shape[0]):
    label, confidence_0, confidence_1 = predict(row)
    df_validation_series.at[index, 'prediction_class'] = label
    df_validation_series.at[index, 'prediction_confidence_0'] = confidence_0
    df_validation_series.at[index, 'prediction_confidence_1'] = confidence_1

100%|█████████████████████████████████████████████████████████████████████████| 389880/389880 [24:39<00:00, 263.50it/s]


In [13]:
df_validation_series.head()

Unnamed: 0,series_id,step,timestamp,anglez,enmo,awake,hour,anglez_abs,anglez_diff,enmo_diff,...,enmo_rolling_mean,anglez_rolling_max,enmo_rolling_max,anglez_rolling_min,enmo_rolling_min,anglez_rolling_std,enmo_rolling_std,prediction_class,prediction_confidence_0,prediction_confidence_1
0,038441c925bb,0,2018-08-14 15:30:00,2.6367,0.0217,1,15,2.6367,41.462601,-0.0047,...,0.02233,54.8498,0.0395,2.4129,0.0166,15.339381,0.004213,1.0,0.002662,0.997338
1,038441c925bb,1,2018-08-14 15:30:05,2.6368,0.0215,1,15,2.6368,41.462601,-0.0047,...,0.02233,54.8498,0.0395,2.4129,0.0166,15.339381,0.004213,1.0,0.002667,0.997333
2,038441c925bb,2,2018-08-14 15:30:10,2.637,0.0216,1,15,2.637,41.462601,-0.0047,...,0.02233,54.8498,0.0395,2.4129,0.0166,15.339381,0.004213,1.0,0.002664,0.997336
3,038441c925bb,3,2018-08-14 15:30:15,2.6368,0.0213,1,15,2.6368,41.462601,-0.0047,...,0.02233,54.8498,0.0395,2.4129,0.0166,15.339381,0.004213,1.0,0.002673,0.997327
4,038441c925bb,4,2018-08-14 15:30:20,2.6368,0.0215,1,15,2.6368,41.462601,-0.0047,...,0.02233,54.8498,0.0395,2.4129,0.0166,15.339381,0.004213,1.0,0.002667,0.997333


## Extract Events

In [47]:
def get_events_smoothed(test_series) :
    series_ids = test_series['series_id'].unique()[:1]
    events = []

    for idx in series_ids: 

        # Collecting sample and normalizing features
        X = test_series[test_series.series_id == idx]
                
        smoothing_length = 12 * 30 # 30 Minutes
        X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().fillna(method="bfill").fillna(method="ffill")
        X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().fillna(method="bfill").fillna(method="ffill")

        # Binarize the smoothing column
        X["smooth"] = X["smooth"].round()

        # Getting predicted onset and wakeup time steps
        pred_onsets = X[X['smooth'].diff() > 0]['step'].tolist()
        pred_wakeups = X[X['smooth'].diff() < 0]['step'].tolist()
     
        if len(pred_onsets) > 0 : 

            # Ensuring all predicted sleep periods begin and end
            if min(pred_wakeups) < min(pred_onsets) : 
                pred_wakeups = pred_wakeups[1:]

            if max(pred_onsets) > max(pred_wakeups) :
                pred_onsets = pred_onsets[:-1]

            # Keeping sleep periods longer than 30 minutes
            sleep_periods = [(onset, wakeup) for onset, wakeup in zip(pred_onsets, pred_wakeups) if wakeup - onset >= 12 * 30]

            for onset, wakeup in sleep_periods :
                # Scoring using mean probability over period
                score = X[(X['step'] >= onset) & (X['step'] <= wakeup)]['score'].mean()

                # Adding sleep event to dataframe
                onset_row = {'row_id': len(events), 'series_id': idx, 'step': onset, 'event': 'onset', 'score': score}                
                events.append(onset_row)

                wakeup_row = {'row_id': len(events), 'series_id': idx, 'step': wakeup, 'event': 'wakeup', 'score': score}
                events.append(wakeup_row)

    return pd.DataFrame(events)

In [48]:
predicted_validation_events = get_events_smoothed(df_validation_series)

  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().fillna(method="bfill").fillna(method="ffill")
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().fillna(method="bfill").fillna(method="ffill")


In [49]:
predicted_validation_events.head()

Unnamed: 0,row_id,series_id,step,event,score
0,0,038441c925bb,4979,onset,0.106305
1,1,038441c925bb,11017,wakeup,0.106305
2,2,038441c925bb,21184,onset,0.379724
3,3,038441c925bb,21664,wakeup,0.379724
4,4,038441c925bb,22051,onset,0.145708


## Evaluate

In [50]:
competition_score(df_validation_events, predicted_validation_events)

0.11070937600361574

In [52]:
df_validation_series.to_csv('predicted_series_mlp.csv', index=False)