In [1]:
import sys
import gc
sys.path.append('../')

from scoring.event_detection_matrix import competition_score
from models.mlp import PyTorchMLP, LightningModel

from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset

## Load Model

In [2]:
checkpoint_path = "../models/neural-nappers/dsgyuxte/checkpoints/epoch=1-step=394540.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

## Load Data

In [3]:
df_train_series = pd.read_parquet('../data/processed/train_series_split.parquet')
df_validation_series = pd.read_parquet('../data/processed/validation_series_split.parquet')
df_validation_events = pd.read_csv('../data/processed/validation_events_split.csv')

In [4]:
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp
0,038441c925bb,1,onset,4992.0,2018-08-14T22:26:00-0400
1,038441c925bb,1,wakeup,10932.0,2018-08-15T06:41:00-0400
2,038441c925bb,2,onset,20244.0,2018-08-15T19:37:00-0400
3,038441c925bb,2,wakeup,27492.0,2018-08-16T05:41:00-0400
4,038441c925bb,3,onset,39996.0,2018-08-16T23:03:00-0400
...,...,...,...,...,...
11231,fcca183903b7,34,wakeup,584052.0,2019-04-29T08:11:00-0400
11232,fcca183903b7,35,onset,595344.0,2019-04-29T23:52:00-0400
11233,fcca183903b7,35,wakeup,602136.0,2019-04-30T09:18:00-0400
11234,fcca183903b7,36,onset,,


In [5]:
df_validation_series = df_validation_series[df_validation_series.series_id == '038441c925bb']
df_validation_events = df_validation_events[df_validation_events.series_id == '038441c925bb']
print(df_validation_series.shape)

(389880, 19)


## Prepare Data

In [6]:
FEATURES = ['anglez', 'enmo',
            'hour',
            'anglez_abs', 'anglez_diff', 'enmo_diff', 'anglez_x_enmo',
            'anglez_rolling_mean', 'enmo_rolling_mean', 'anglez_rolling_max', 'enmo_rolling_max', 'anglez_rolling_min',
            'anglez_rolling_std', 'enmo_rolling_std']

LABEL = ['awake']

In [7]:
X_train = df_train_series[FEATURES].astype('float32')
scaler = StandardScaler()
scaler.fit(X_train)

del X_train
gc.collect()

198

In [8]:
X_validation = df_validation_series[FEATURES].astype('float32')
y_validation = df_validation_series[LABEL].astype('int64')

X_validation = scaler.transform(X_validation)

X_validation = torch.from_numpy(X_validation)
y_validation = torch.from_numpy(y_validation.to_numpy()).squeeze(1)

validation_dataset = TensorDataset(X_validation, y_validation)
validation_dataloader = DataLoader(validation_dataset, batch_size=5000)

## Predict

In [9]:
def predict(batch):
    X, y = batch
    with torch.no_grad():
        logits = model(X)
    label = torch.argmax(logits, dim=-1)
    confidence = torch.softmax(logits, dim=-1)
    confidence_0 = confidence[:, 0]
    confidence_1 = confidence[:, 1]
    return label, confidence_0, confidence_1

In [10]:
model.eval()
label_list = []
confidence_0_list = []
confidence_1_list = []

for index, batch in enumerate(tqdm(validation_dataloader)) :
    label, confidence_0, confidence_1 = predict(batch)
    
    label_list.append(label)
    confidence_0_list.append(confidence_0)
    confidence_1_list.append(confidence_1)      

100%|██████████████████████████████████████████████████████████████████████████████████| 78/78 [00:07<00:00, 10.27it/s]


In [11]:
label_list = torch.cat(label_list).numpy()
confidence_0_list = torch.cat(confidence_0_list).numpy()
confidence_1_list = torch.cat(confidence_1_list).numpy()

In [12]:
df_validation_series['prediction_class'] = label_list
df_validation_series['prediction_confidence_0'] = confidence_0_list
df_validation_series['prediction_confidence_1'] = confidence_1_list

In [13]:
df_validation_series

Unnamed: 0,series_id,step,timestamp,anglez,enmo,awake,hour,anglez_abs,anglez_diff,enmo_diff,...,enmo_rolling_mean,anglez_rolling_max,enmo_rolling_max,anglez_rolling_min,enmo_rolling_min,anglez_rolling_std,enmo_rolling_std,prediction_class,prediction_confidence_0,prediction_confidence_1
0,038441c925bb,0,2018-08-14 15:30:00,2.636700,0.0217,1,15,2.636700,41.462601,-0.0047,...,0.022330,54.849800,0.0395,2.4129,0.0166,15.339381,0.004213,1,0.002662,0.997338
1,038441c925bb,1,2018-08-14 15:30:05,2.636800,0.0215,1,15,2.636800,41.462601,-0.0047,...,0.022330,54.849800,0.0395,2.4129,0.0166,15.339381,0.004213,1,0.002667,0.997333
2,038441c925bb,2,2018-08-14 15:30:10,2.637000,0.0216,1,15,2.637000,41.462601,-0.0047,...,0.022330,54.849800,0.0395,2.4129,0.0166,15.339381,0.004213,1,0.002664,0.997336
3,038441c925bb,3,2018-08-14 15:30:15,2.636800,0.0213,1,15,2.636800,41.462601,-0.0047,...,0.022330,54.849800,0.0395,2.4129,0.0166,15.339381,0.004213,1,0.002673,0.997327
4,038441c925bb,4,2018-08-14 15:30:20,2.636800,0.0215,1,15,2.636800,41.462601,-0.0047,...,0.022330,54.849800,0.0395,2.4129,0.0166,15.339381,0.004213,1,0.002667,0.997333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389875,038441c925bb,389875,2018-09-06 04:59:35,-27.373899,0.0110,1,4,27.373899,-0.731300,-0.0013,...,0.012915,-26.317699,0.0186,-29.5296,0.0110,0.790384,0.002109,0,0.909866,0.090134
389876,038441c925bb,389876,2018-09-06 04:59:40,-27.493799,0.0110,1,4,27.493799,-0.751799,-0.0014,...,0.012915,-26.317699,0.0186,-29.5296,0.0110,0.790384,0.002109,0,0.909402,0.090598
389877,038441c925bb,389877,2018-09-06 04:59:45,-27.533701,0.0111,1,4,27.533701,-0.909601,-0.0009,...,0.012915,-26.317699,0.0186,-29.5296,0.0110,0.790384,0.002109,0,0.909706,0.090294
389878,038441c925bb,389878,2018-09-06 04:59:50,-28.003599,0.0111,1,4,28.003599,-1.461800,-0.0009,...,0.012915,-26.317699,0.0186,-29.5296,0.0110,0.790384,0.002109,0,0.906979,0.093021


## Extract Events

In [14]:
def get_events_smoothed(test_series) :
    series_ids = test_series['series_id'].unique()[:1]
    events = []

    for idx in series_ids: 

        # Collecting sample and normalizing features
        X = test_series[test_series.series_id == idx]
                
        smoothing_length = 12 * 30 # 30 Minutes
        X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
        X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()

        # Binarize the smoothing column
        X["smooth"] = X["smooth"].round()

        # Getting predicted onset and wakeup time steps
        pred_onsets = X[X['smooth'].diff() > 0]['step'].tolist()
        pred_wakeups = X[X['smooth'].diff() < 0]['step'].tolist()
     
        if len(pred_onsets) > 0 : 

            # Ensuring all predicted sleep periods begin and end
            if min(pred_wakeups) < min(pred_onsets) : 
                pred_wakeups = pred_wakeups[1:]

            if max(pred_onsets) > max(pred_wakeups) :
                pred_onsets = pred_onsets[:-1]

            # Keeping sleep periods longer than 30 minutes
            sleep_periods = [(onset, wakeup) for onset, wakeup in zip(pred_onsets, pred_wakeups) if wakeup - onset >= 12 * 30]

            for onset, wakeup in sleep_periods :
                # Scoring using mean probability over period
                score = X[(X['step'] >= onset) & (X['step'] <= wakeup)]['score'].mean()

                # Adding sleep event to dataframe
                onset_row = {'row_id': len(events), 'series_id': idx, 'step': onset, 'event': 'onset', 'score': score}                
                events.append(onset_row)

                wakeup_row = {'row_id': len(events), 'series_id': idx, 'step': wakeup, 'event': 'wakeup', 'score': score}
                events.append(wakeup_row)

    return pd.DataFrame(events)

In [15]:
predicted_validation_events = get_events_smoothed(df_validation_series)

In [16]:
predicted_validation_events.head()

Unnamed: 0,row_id,series_id,step,event,score
0,0,038441c925bb,4979,onset,0.106305
1,1,038441c925bb,11017,wakeup,0.106305
2,2,038441c925bb,21184,onset,0.379724
3,3,038441c925bb,21664,wakeup,0.379724
4,4,038441c925bb,22051,onset,0.145708


In [17]:
df_validation_events.head()

Unnamed: 0,series_id,night,event,step,timestamp
0,038441c925bb,1,onset,4992.0,2018-08-14T22:26:00-0400
1,038441c925bb,1,wakeup,10932.0,2018-08-15T06:41:00-0400
2,038441c925bb,2,onset,20244.0,2018-08-15T19:37:00-0400
3,038441c925bb,2,wakeup,27492.0,2018-08-16T05:41:00-0400
4,038441c925bb,3,onset,39996.0,2018-08-16T23:03:00-0400


## Evaluate

In [18]:
competition_score(df_validation_events, predicted_validation_events)

0.11070937600361574

In [19]:
df_validation_series.to_csv('predicted_series_mlp_new.csv', index=False)