In [1]:
import sys
import os
import gc
sys.path.append('../../')

from scoring.event_detection_matrix import competition_score
from models.fcn.fcn import LightningModel

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

In [2]:
checkpoint_path = "../../models/fcn/neural-nappers/boprds3u/checkpoints/epoch=3-step=48708.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

In [3]:
validation_overview = pd.read_parquet('../../data/processed/transformer-downsample-2h/validation/overview.parquet', columns=['num_series_id', 'step', 'awake', 'series_index'])
df_validation_events = pd.read_csv('../../data/processed/validation_events_split.csv')

In [4]:
df_validation_events = df_validation_events[df_validation_events.step.notnull()]
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp,num_series_id
0,062dbd4c95e6,1,onset,7872.0,2018-08-22T23:11:00-0400,7
1,062dbd4c95e6,1,wakeup,14484.0,2018-08-23T08:22:00-0400,7
6,062dbd4c95e6,4,onset,60720.0,2018-08-26T00:35:00-0400,7
7,062dbd4c95e6,4,wakeup,68400.0,2018-08-26T11:15:00-0400,7
8,062dbd4c95e6,5,onset,77304.0,2018-08-26T23:37:00-0400,7
...,...,...,...,...,...,...
2955,fcca183903b7,33,wakeup,565824.0,2019-04-28T06:52:00-0400,276
2956,fcca183903b7,34,onset,577344.0,2019-04-28T22:52:00-0400,276
2957,fcca183903b7,34,wakeup,584052.0,2019-04-29T08:11:00-0400,276
2958,fcca183903b7,35,onset,595344.0,2019-04-29T23:52:00-0400,276


In [5]:
def predict(batch):
    X = batch
    with torch.no_grad():
        logits = model(X[0])
    label = torch.argmax(logits, dim=-1)
    confidence = torch.softmax(logits, dim=-1)
    confidence_0 = confidence[:, 0]
    confidence_1 = confidence[:, 1]
    return label, confidence_0, confidence_1

In [6]:
def predict_series(validation_dataloader):
    model.eval()
    label_list = []
    confidence_0_list = []
    confidence_1_list = []
    
    for index, batch in enumerate(tqdm(validation_dataloader)) :
        label, confidence_0, confidence_1 = predict(batch)

        label_list.extend(label)
        confidence_0_list.extend(confidence_0)
        confidence_1_list.extend(confidence_1)
    return label_list, confidence_0_list, confidence_1_list

In [7]:
steps = []
num_series_ids = []
true_values = []
label_list = []
confidence_0_list = []
confidence_1_list = []

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i, num_series_id in enumerate(validation_overview.num_series_id.unique()):
    print(f'Series {i + 1} of {validation_overview.num_series_id.nunique()}')
    series_X = torch.load('../../data/processed/transformer-downsample-2h/validation/' + str(num_series_id) + '.pt')
    validation_dataset = TensorDataset(torch.transpose(series_X, 1, 2).to(device))
    validation_dataloader = DataLoader(validation_dataset, batch_size=10000)
    series_label, series_confidence_0, series_confidence_1 = predict_series(validation_dataloader)

    steps.extend(validation_overview[validation_overview.num_series_id == num_series_id]['step'])
    num_series_ids.extend(validation_overview[validation_overview.num_series_id == num_series_id]['num_series_id'])
    true_values.extend(validation_overview[validation_overview.num_series_id == num_series_id]['awake'])
    label_list.extend([tensor.item() for tensor in series_label])
    confidence_0_list.extend([tensor.item() for tensor in series_confidence_0])
    confidence_1_list.extend([tensor.item() for tensor in series_confidence_1])

Series 1 of 54


100%|██████████| 4/4 [00:02<00:00,  1.53it/s]


Series 2 of 54


100%|██████████| 2/2 [00:00<00:00,  7.04it/s]


Series 3 of 54


100%|██████████| 4/4 [00:00<00:00, 10.67it/s]


Series 4 of 54


100%|██████████| 4/4 [00:00<00:00,  7.17it/s]


Series 5 of 54


100%|██████████| 1/1 [00:00<00:00, 20.99it/s]


Series 6 of 54


100%|██████████| 4/4 [00:00<00:00, 10.30it/s]


Series 7 of 54


100%|██████████| 3/3 [00:00<00:00,  8.44it/s]


Series 8 of 54


100%|██████████| 4/4 [00:00<00:00,  7.63it/s]


Series 9 of 54


100%|██████████| 4/4 [00:00<00:00, 10.67it/s]


Series 10 of 54


100%|██████████| 4/4 [00:00<00:00,  9.74it/s]


Series 11 of 54


100%|██████████| 3/3 [00:01<00:00,  1.78it/s]


Series 12 of 54


100%|██████████| 4/4 [00:00<00:00,  7.77it/s]


Series 13 of 54


100%|██████████| 3/3 [00:01<00:00,  1.74it/s]


Series 14 of 54


100%|██████████| 2/2 [00:00<00:00, 16.58it/s]


Series 15 of 54


100%|██████████| 5/5 [00:00<00:00,  8.61it/s]


Series 16 of 54


100%|██████████| 4/4 [00:02<00:00,  1.68it/s]


Series 17 of 54


100%|██████████| 1/1 [00:00<00:00, 16.11it/s]


Series 18 of 54


100%|██████████| 1/1 [00:00<00:00, 79.74it/s]


Series 19 of 54


100%|██████████| 4/4 [00:02<00:00,  1.78it/s]


Series 20 of 54


100%|██████████| 4/4 [00:00<00:00,  7.46it/s]


Series 21 of 54


100%|██████████| 5/5 [00:00<00:00, 11.73it/s]


Series 22 of 54


100%|██████████| 1/1 [00:00<00:00, 40.59it/s]


Series 23 of 54


100%|██████████| 5/5 [00:00<00:00,  8.66it/s]


Series 24 of 54


100%|██████████| 1/1 [00:00<00:00,  4.79it/s]


Series 25 of 54


100%|██████████| 4/4 [00:00<00:00, 11.25it/s]


Series 26 of 54


100%|██████████| 4/4 [00:00<00:00, 11.34it/s]


Series 27 of 54


100%|██████████| 3/3 [00:00<00:00,  9.89it/s]


Series 28 of 54


100%|██████████| 4/4 [00:00<00:00,  7.60it/s]


Series 29 of 54


100%|██████████| 4/4 [00:00<00:00,  6.82it/s]


Series 30 of 54


100%|██████████| 2/2 [00:00<00:00, 18.24it/s]


Series 31 of 54


100%|██████████| 4/4 [00:00<00:00,  7.10it/s]


Series 32 of 54


100%|██████████| 5/5 [00:00<00:00,  7.61it/s]


Series 33 of 54


100%|██████████| 2/2 [00:00<00:00, 22.57it/s]


Series 34 of 54


100%|██████████| 1/1 [00:00<00:00, 26.02it/s]


Series 35 of 54


100%|██████████| 4/4 [00:00<00:00,  7.32it/s]


Series 36 of 54


100%|██████████| 1/1 [00:00<00:00, 24.53it/s]


Series 37 of 54


100%|██████████| 3/3 [00:00<00:00,  9.39it/s]


Series 38 of 54


100%|██████████| 4/4 [00:00<00:00,  7.51it/s]


Series 39 of 54


100%|██████████| 4/4 [00:00<00:00, 10.18it/s]


Series 40 of 54


100%|██████████| 1/1 [00:00<00:00, 59.07it/s]


Series 41 of 54


100%|██████████| 5/5 [00:00<00:00,  8.01it/s]


Series 42 of 54


100%|██████████| 3/3 [00:00<00:00,  8.69it/s]


Series 43 of 54


100%|██████████| 3/3 [00:00<00:00,  9.26it/s]


Series 44 of 54


100%|██████████| 1/1 [00:00<00:00,  4.78it/s]


Series 45 of 54


100%|██████████| 3/3 [00:00<00:00, 21.09it/s]


Series 46 of 54


100%|██████████| 4/4 [00:00<00:00,  7.39it/s]


Series 47 of 54


100%|██████████| 3/3 [00:00<00:00,  9.09it/s]


Series 48 of 54


100%|██████████| 2/2 [00:00<00:00,  7.27it/s]


Series 49 of 54


100%|██████████| 1/1 [00:00<00:00, 19.31it/s]


Series 50 of 54


100%|██████████| 5/5 [00:03<00:00,  1.57it/s]


Series 51 of 54


100%|██████████| 2/2 [00:00<00:00,  6.32it/s]


Series 52 of 54


100%|██████████| 4/4 [00:00<00:00, 10.52it/s]


Series 53 of 54


100%|██████████| 2/2 [00:00<00:00,  8.06it/s]


Series 54 of 54


100%|██████████| 6/6 [00:00<00:00,  9.05it/s]


In [8]:
df_validation = pd.DataFrame(
    {'step': steps,
     'num_series_id': num_series_ids,
     'awake': true_values,
     'prediction_class': label_list,
     'prediction_confidence_0': confidence_0_list,
     'prediction_confidence_1': confidence_1_list
    })

In [9]:
df_validation

Unnamed: 0,step,num_series_id,awake,prediction_class,prediction_confidence_0,prediction_confidence_1
0,0,7,1,1,3.206824e-07,1.000000
1,12,7,1,1,3.533790e-07,1.000000
2,24,7,1,1,3.895374e-07,1.000000
3,36,7,1,1,4.301727e-07,1.000000
4,48,7,1,1,4.603106e-07,1.000000
...,...,...,...,...,...,...
1373679,620581,276,1,0,7.056178e-01,0.294382
1373680,620593,276,1,0,7.084275e-01,0.291573
1373681,620605,276,1,0,7.106867e-01,0.289313
1373682,620617,276,1,0,7.121412e-01,0.287859


In [16]:
def get_events_smoothed(test_series) :
    series_ids = test_series['num_series_id'].unique()
    events = []

    for idx in tqdm(series_ids):
        # Collecting sample and normalizing features
        X = test_series[test_series.num_series_id == idx]
                
        smoothing_length = 30 # 30 Minutes
        # We average the confidence, that the participant is awake
        X["confidence_awake"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
        X["asleep"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()

        # Binarize the asleep column
        X["asleep"] = X["asleep"].round()

        # Getting predicted onset and wakeup time steps
        pred_onsets = X[X['asleep'].diff() > 0]['step'].tolist() # diff is > 0 if it changes from 0 (awake) to 1 (asleep)
        pred_wakeups = X[X['asleep'].diff() < 0]['step'].tolist() # diff is < 0 if it changes from 1 (asleep) to 0 (awake)
     
        if len(pred_onsets) > 0:

            # Ensuring all predicted sleep periods begin and end
            if min(pred_wakeups) < min(pred_onsets):
                pred_wakeups = pred_wakeups[1:]

            if max(pred_onsets) > max(pred_wakeups):
                pred_onsets = pred_onsets[:-1]

            # Keeping sleep periods longer than 30 minutes
            sleep_periods = [(onset, wakeup) for onset, wakeup in zip(pred_onsets, pred_wakeups) if wakeup - onset >= 12 * 30]

            for onset, wakeup in sleep_periods:
                # We take the score
                score = 1 - X[(X['step'] >= onset) & (X['step'] < wakeup)]['confidence_awake'].mean()

                # Adding sleep event to dataframe
                onset_row = {'row_id': len(events), 'series_id': idx, 'step': onset, 'event': 'onset', 'score': score}                
                events.append(onset_row)

                wakeup_row = {'row_id': len(events), 'series_id': idx, 'step': wakeup, 'event': 'wakeup', 'score': score}
                events.append(wakeup_row)

    return pd.DataFrame(events)

In [17]:
predicted_validation_events = get_events_smoothed(df_validation)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["confidence_awake"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["asleep"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-v

## Evaluation

### Accuracy

In [18]:
accuracy_score(df_validation['awake'].values, df_validation['prediction_class'].values)

0.9340284956365511

### Competition Score

In [19]:
df_validation_events['series_id'] = df_validation_events['num_series_id']

In [22]:
predicted_validation_events

Unnamed: 0,row_id,series_id,step,event,score
0,0,7,7920,onset,0.937915
1,1,7,14676,wakeup,0.937915
2,2,7,60350,onset,0.931896
3,3,7,67550,wakeup,0.931896
4,4,7,77402,onset,0.932156
...,...,...,...,...,...
1983,1983,276,549144,wakeup,0.943360
1984,1984,276,559824,onset,0.925977
1985,1985,276,565776,wakeup,0.925977
1986,1986,276,577392,onset,0.891539


In [23]:
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp,num_series_id
0,7,1,onset,7872.0,2018-08-22T23:11:00-0400,7
1,7,1,wakeup,14484.0,2018-08-23T08:22:00-0400,7
6,7,4,onset,60720.0,2018-08-26T00:35:00-0400,7
7,7,4,wakeup,68400.0,2018-08-26T11:15:00-0400,7
8,7,5,onset,77304.0,2018-08-26T23:37:00-0400,7
...,...,...,...,...,...,...
2955,276,33,wakeup,565824.0,2019-04-28T06:52:00-0400,276
2956,276,34,onset,577344.0,2019-04-28T22:52:00-0400,276
2957,276,34,wakeup,584052.0,2019-04-29T08:11:00-0400,276
2958,276,35,onset,595344.0,2019-04-29T23:52:00-0400,276


In [20]:
competition_score(df_validation_events, predicted_validation_events)

0.364877817219852

In [15]:
df_validation.to_parquet('./test/predicted_series_4h.parquet')