In [8]:
import sys
import os
import gc
sys.path.append('../../')

from scoring.event_detection_matrix import competition_score
from models.transformer.encoder import LightningModel

from sklearn.preprocessing import StandardScaler
from joblib import load

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

In [9]:
checkpoint_path = "../../models/transformer/neural-nappers/pei7u45k/checkpoints/transformer.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

C:\Users\Michael Jakober\anaconda3\envs\aich\lib\site-packages\lightning\pytorch\utilities\migration\utils.py:55: The loaded checkpoint was produced with Lightning v2.1.1, which is newer than your current Lightning version: v2.1.0


In [10]:
validation_overview = pd.read_parquet('../../data/processed/transformer/validation/overview.parquet', columns=['num_series_id', 'step', 'awake', 'series_index'])
df_validation_events = pd.read_csv('../../data/processed/validation_events_split.csv')

In [11]:
df_validation_events = df_validation_events[df_validation_events.step.notnull()]
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp,num_series_id
0,062dbd4c95e6,1,onset,7872.0,2018-08-22T23:11:00-0400,7
1,062dbd4c95e6,1,wakeup,14484.0,2018-08-23T08:22:00-0400,7
6,062dbd4c95e6,4,onset,60720.0,2018-08-26T00:35:00-0400,7
7,062dbd4c95e6,4,wakeup,68400.0,2018-08-26T11:15:00-0400,7
8,062dbd4c95e6,5,onset,77304.0,2018-08-26T23:37:00-0400,7
...,...,...,...,...,...,...
2955,fcca183903b7,33,wakeup,565824.0,2019-04-28T06:52:00-0400,276
2956,fcca183903b7,34,onset,577344.0,2019-04-28T22:52:00-0400,276
2957,fcca183903b7,34,wakeup,584052.0,2019-04-29T08:11:00-0400,276
2958,fcca183903b7,35,onset,595344.0,2019-04-29T23:52:00-0400,276


In [12]:
LAGS_FUTURE = [f"t_lag_{i}" for i in range(-1, -25, -1)]
LAGS_PAST = reversed([f"t_lag_{i}" for i in range(1, 25)])
FEATURES = [*LAGS_PAST, 't_0', *LAGS_FUTURE]

LABEL = ['awake']

In [13]:
def predict(batch):
    X = batch
    with torch.no_grad():
        logits = model(X[0])
    label = torch.argmax(logits, dim=-1)
    confidence = torch.softmax(logits, dim=-1)
    confidence_0 = confidence[:, 0]
    confidence_1 = confidence[:, 1]
    return label, confidence_0, confidence_1

In [14]:
def predict_series(validation_dataloader):
    model.eval()
    label_list = []
    confidence_0_list = []
    confidence_1_list = []
    
    for index, batch in enumerate(tqdm(validation_dataloader)) :
        label, confidence_0, confidence_1 = predict(batch)
        
        label_list.extend(label)
        confidence_0_list.extend(confidence_0)
        confidence_1_list.extend(confidence_1)
    return label_list, confidence_0_list, confidence_1_list

In [15]:
steps = []
num_series_ids = []
true_values = []
label_list = []
confidence_0_list = []
confidence_1_list = []

for i, num_series_id in enumerate(validation_overview.num_series_id.unique()):
    if i == 0:
        print(f'Series {i + 1} of {validation_overview.num_series_id.nunique()}')
        series_X = torch.load('../../data/processed/transformer/validation/' + str(num_series_id) + '.pt')
        validation_dataset = TensorDataset(series_X)
        validation_dataloader = DataLoader(validation_dataset, batch_size=10000)
        series_label, series_confidence_0, series_confidence_1 = predict_series(validation_dataloader)
        
        steps.extend(validation_overview[validation_overview.num_series_id == num_series_id]['step'].values)
        num_series_ids.extend(validation_overview[validation_overview.num_series_id == num_series_id]['num_series_id'].values)
        true_values.extend(validation_overview[validation_overview.num_series_id == num_series_id]['awake'].values)
        label_list.extend([tensor.item() for tensor in series_label])
        confidence_0_list.extend([tensor.item() for tensor in series_confidence_0])
        confidence_1_list.extend([tensor.item() for tensor in series_confidence_1])

Series 1 of 54


100%|██████████████████████████████████████████████████████████████████████████████████| 75/75 [05:59<00:00,  4.80s/it]


In [34]:
df_validation = pd.DataFrame(
    {'step': steps,
     'num_series_id': num_series_id,
     'awake': true_values,
     'prediction_class': label_list,
     'prediction_confidence_1': confidence_0_list,
     'prediction_confidence_0': confidence_1_list
    })

In [35]:
df_validation

Unnamed: 0,step,num_series_id,awake,prediction_class,prediction_confidence_1,prediction_confidence_0
0,0,276,1,1,0.131363,0.868637
1,1,276,1,1,0.135859,0.864141
2,2,276,1,1,0.046935,0.953065
3,3,276,1,1,0.026337,0.973663
4,4,276,1,1,0.015758,0.984242
...,...,...,...,...,...,...
744115,778675,276,1,0,0.950231,0.049769
744116,778676,276,1,0,0.950231,0.049769
744117,778677,276,1,0,0.950231,0.049769
744118,778678,276,1,0,0.950231,0.049769


In [41]:
def get_events_smoothed(test_series) :
    series_ids = test_series['num_series_id'].unique()
    events = []

    for idx in tqdm(series_ids):
        # Collecting sample and normalizing features
        X = test_series[test_series.num_series_id == idx]
                
        smoothing_length = 12 * 60 # 30 Minutes
        X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
        X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()

        # Binarize the smoothing column
        X["smooth"] = X["smooth"].round()

        # Getting predicted onset and wakeup time steps
        pred_onsets = X[X['smooth'].diff() > 0]['step'].tolist()
        pred_wakeups = X[X['smooth'].diff() < 0]['step'].tolist()
     
        if len(pred_onsets) > 0 : 

            # Ensuring all predicted sleep periods begin and end
            if min(pred_wakeups) < min(pred_onsets) : 
                pred_wakeups = pred_wakeups[1:]

            if max(pred_onsets) > max(pred_wakeups) :
                pred_onsets = pred_onsets[:-1]

            # Keeping sleep periods longer than 30 minutes
            sleep_periods = [(onset, wakeup) for onset, wakeup in zip(pred_onsets, pred_wakeups) if wakeup - onset >= 12 * 30]

            for onset, wakeup in sleep_periods :
                # Scoring using mean probability over period
                score = X[(X['step'] >= onset) & (X['step'] <= wakeup)]['score'].mean()

                # Adding sleep event to dataframe
                onset_row = {'row_id': len(events), 'series_id': idx, 'step': onset, 'event': 'onset', 'score': score}                
                events.append(onset_row)

                wakeup_row = {'row_id': len(events), 'series_id': idx, 'step': wakeup, 'event': 'wakeup', 'score': score}
                events.append(wakeup_row)

    return pd.DataFrame(events)

In [42]:
predicted_validation_events = get_events_smoothed(df_validation)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.20it/s]


In [43]:
predicted_validation_events

Unnamed: 0,row_id,series_id,step,event,score
0,0,276,14747,onset,0.119325
1,1,276,26577,wakeup,0.119325
2,2,276,26746,onset,0.170844
3,3,276,36855,wakeup,0.170844
4,4,276,39429,onset,0.131120
...,...,...,...,...,...
133,133,276,752337,wakeup,0.084220
134,134,276,752506,onset,0.117530
135,135,276,769617,wakeup,0.117530
136,136,276,769786,onset,0.300670


## Evaluation

### Accuracy

In [44]:
accuracy_score(df_validation['awake'].values, df_validation['prediction_class'].values)

0.8154518088480353

### Competition Score

In [45]:
df_validation_events['series_id'] = df_validation_events['num_series_id']

In [46]:
competition_score(df_validation_events, predicted_validation_events)

0.00018194909238413955