In [1]:
import sys
import gc
sys.path.append('../')

from scoring.event_detection_matrix import competition_score
from models.mlp import PyTorchMLP, LightningModel

from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset

## Load Model

In [2]:
checkpoint_path = "../models/neural-nappers/dsgyuxte/checkpoints/epoch=1-step=394540.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

## Load Data

In [3]:
df_train_series = pd.read_parquet('../data/processed/train_series_split.parquet')
df_validation_series = pd.read_parquet('../data/processed/validation_series_split.parquet')
df_validation_events = pd.read_csv('../data/processed/validation_events_split.csv')

In [4]:
df_validation_events = df_validation_events[df_validation_events.step.notnull()]
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp
0,062dbd4c95e6,1,onset,7872.0,2018-08-22T23:11:00-0400
1,062dbd4c95e6,1,wakeup,14484.0,2018-08-23T08:22:00-0400
6,062dbd4c95e6,4,onset,60720.0,2018-08-26T00:35:00-0400
7,062dbd4c95e6,4,wakeup,68400.0,2018-08-26T11:15:00-0400
8,062dbd4c95e6,5,onset,77304.0,2018-08-26T23:37:00-0400
...,...,...,...,...,...
2955,fcca183903b7,33,wakeup,565824.0,2019-04-28T06:52:00-0400
2956,fcca183903b7,34,onset,577344.0,2019-04-28T22:52:00-0400
2957,fcca183903b7,34,wakeup,584052.0,2019-04-29T08:11:00-0400
2958,fcca183903b7,35,onset,595344.0,2019-04-29T23:52:00-0400


In [5]:
#df_validation_series = df_validation_series[df_validation_series.series_id == '038441c925bb']
#df_validation_events = df_validation_events[df_validation_events.series_id == '038441c925bb']
print(df_validation_series.shape)

(26187300, 19)


## Prepare Data

In [6]:
FEATURES = ['anglez', 'enmo',
            'hour',
            'anglez_abs', 'anglez_diff', 'enmo_diff', 'anglez_x_enmo',
            'anglez_rolling_mean', 'enmo_rolling_mean', 'anglez_rolling_max', 'enmo_rolling_max', 'anglez_rolling_min',
            'anglez_rolling_std', 'enmo_rolling_std']

LABEL = ['awake']

In [7]:
X_train = df_train_series[FEATURES].astype('float32')
scaler = StandardScaler()
scaler.fit(X_train)

del X_train
gc.collect()

198

In [8]:
X_validation = df_validation_series[FEATURES].astype('float32')
y_validation = df_validation_series[LABEL].astype('int64')

X_validation = scaler.transform(X_validation)

X_validation = torch.from_numpy(X_validation)
y_validation = torch.from_numpy(y_validation.to_numpy()).squeeze(1)

validation_dataset = TensorDataset(X_validation, y_validation)
validation_dataloader = DataLoader(validation_dataset, batch_size=10000)

## Predict

In [9]:
def predict(batch):
    X, y = batch
    with torch.no_grad():
        logits = model(X)
    label = torch.argmax(logits, dim=-1)
    confidence = torch.softmax(logits, dim=-1)
    confidence_0 = confidence[:, 0]
    confidence_1 = confidence[:, 1]
    return label, confidence_0, confidence_1

In [10]:
model.eval()
label_list = []
confidence_0_list = []
confidence_1_list = []

for index, batch in enumerate(tqdm(validation_dataloader)) :
    label, confidence_0, confidence_1 = predict(batch)
    
    label_list.append(label)
    confidence_0_list.append(confidence_0)
    confidence_1_list.append(confidence_1)      

100%|██████████████████████████████████████████████████████████████████████████████| 2619/2619 [08:49<00:00,  4.95it/s]


In [11]:
label_list = torch.cat(label_list).numpy()
confidence_0_list = torch.cat(confidence_0_list).numpy()
confidence_1_list = torch.cat(confidence_1_list).numpy()

In [12]:
df_validation_series['prediction_class'] = label_list
df_validation_series['prediction_confidence_0'] = confidence_0_list
df_validation_series['prediction_confidence_1'] = confidence_1_list

In [13]:
df_validation_series = df_validation_series[['series_id', 'step', 'timestamp', 'awake', 'prediction_class', 'prediction_confidence_0', 'prediction_confidence_1']]
df_validation_series

Unnamed: 0,series_id,step,timestamp,awake,prediction_class,prediction_confidence_0,prediction_confidence_1
2992140,062dbd4c95e6,0,2018-08-22 12:15:00,1,1,0.000060,0.999940
2992141,062dbd4c95e6,1,2018-08-22 12:15:05,1,1,0.000058,0.999942
2992142,062dbd4c95e6,2,2018-08-22 12:15:10,1,1,0.000051,0.999949
2992143,062dbd4c95e6,3,2018-08-22 12:15:15,1,1,0.000069,0.999931
2992144,062dbd4c95e6,4,2018-08-22 12:15:20,1,1,0.000048,0.999952
...,...,...,...,...,...,...,...
124229695,fcca183903b7,620635,2019-05-01 10:59:35,1,1,0.138705,0.861295
124229696,fcca183903b7,620636,2019-05-01 10:59:40,1,1,0.138705,0.861295
124229697,fcca183903b7,620637,2019-05-01 10:59:45,1,1,0.138705,0.861295
124229698,fcca183903b7,620638,2019-05-01 10:59:50,1,1,0.138705,0.861295


## Extract Events

In [24]:
def get_events_smoothed(test_series) :
    series_ids = test_series['series_id'].unique()
    events = []

    for idx in tqdm(series_ids):
        # Collecting sample and normalizing features
        X = test_series[test_series.series_id == idx]
                
        smoothing_length = 12 * 30 # 30 Minutes
        X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
        X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()

        # Binarize the smoothing column
        X["smooth"] = X["smooth"].round()

        # Getting predicted onset and wakeup time steps
        pred_onsets = X[X['smooth'].diff() > 0]['step'].tolist()
        pred_wakeups = X[X['smooth'].diff() < 0]['step'].tolist()
     
        if len(pred_onsets) > 0 : 

            # Ensuring all predicted sleep periods begin and end
            if min(pred_wakeups) < min(pred_onsets) : 
                pred_wakeups = pred_wakeups[1:]

            if max(pred_onsets) > max(pred_wakeups) :
                pred_onsets = pred_onsets[:-1]

            # Keeping sleep periods longer than 30 minutes
            sleep_periods = [(onset, wakeup) for onset, wakeup in zip(pred_onsets, pred_wakeups) if wakeup - onset >= 12 * 30]

            for onset, wakeup in sleep_periods :
                # Scoring using mean probability over period
                score = X[(X['step'] >= onset) & (X['step'] <= wakeup)]['score'].mean()

                # Adding sleep event to dataframe
                onset_row = {'row_id': len(events), 'series_id': idx, 'step': onset, 'event': 'onset', 'score': score}                
                events.append(onset_row)

                wakeup_row = {'row_id': len(events), 'series_id': idx, 'step': wakeup, 'event': 'wakeup', 'score': score}
                events.append(wakeup_row)

    return pd.DataFrame(events)

In [25]:
predicted_validation_events = get_events_smoothed(df_validation_series)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

100%|██████████████████████████████████████████████████████████████████████████████████| 54/54 [01:05<00:00,  1.21s/it]


In [26]:
predicted_validation_events

Unnamed: 0,row_id,series_id,step,event,score
0,0,062dbd4c95e6,7877,onset,0.093210
1,1,062dbd4c95e6,14589,wakeup,0.093210
2,2,062dbd4c95e6,25826,onset,0.322840
3,3,062dbd4c95e6,31149,wakeup,0.322840
4,4,062dbd4c95e6,43106,onset,0.322840
...,...,...,...,...,...
3763,3763,fcca183903b7,584011,wakeup,0.058985
3764,3764,fcca183903b7,595329,onset,0.083788
3765,3765,fcca183903b7,602079,wakeup,0.083788
3766,3766,fcca183903b7,612559,onset,0.166891


## Evaluate

In [27]:
competition_score(df_validation_events, predicted_validation_events)

0.09868303387489089

In [19]:
df_validation_series.to_parquet('predicted_series_mlp_new_full.parquet')