In [1]:
import sys
import os
import gc
sys.path.append('../../')

from scoring.event_detection_matrix import competition_score
from models.fcn.fcn import LightningModel

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from scipy.signal import find_peaks

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
checkpoint_path = "../../models/fcn/neural-nappers/ssvgx4kg/checkpoints/epoch=4-step=63530.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

In [3]:
validation_overview = pd.read_parquet('../../data/processed/transformer-downsample-4h-v2/validation/overview.parquet', columns=['num_series_id', 'step', 'onset_critical_event_point', 'wakeup_critical_event_point', 'series_index'])
df_validation_events = pd.read_csv('../../data/processed/validation_events_split.csv')

In [4]:
df_validation_events = df_validation_events[df_validation_events.step.notnull()]
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp,num_series_id
0,062dbd4c95e6,1,onset,7872.0,2018-08-22T23:11:00-0400,7
1,062dbd4c95e6,1,wakeup,14484.0,2018-08-23T08:22:00-0400,7
6,062dbd4c95e6,4,onset,60720.0,2018-08-26T00:35:00-0400,7
7,062dbd4c95e6,4,wakeup,68400.0,2018-08-26T11:15:00-0400,7
8,062dbd4c95e6,5,onset,77304.0,2018-08-26T23:37:00-0400,7
...,...,...,...,...,...,...
2955,fcca183903b7,33,wakeup,565824.0,2019-04-28T06:52:00-0400,276
2956,fcca183903b7,34,onset,577344.0,2019-04-28T22:52:00-0400,276
2957,fcca183903b7,34,wakeup,584052.0,2019-04-29T08:11:00-0400,276
2958,fcca183903b7,35,onset,595344.0,2019-04-29T23:52:00-0400,276


In [5]:
def predict(batch):
    X = batch
    with torch.no_grad():
        logits = model(X[0])
    confidence = torch.sigmoid(logits)
    confidence_onset = confidence[:, 0]
    confidence_wakeup = confidence[:, 1]

    return confidence_onset, confidence_wakeup

In [6]:
def predict_series(validation_dataloader):
    model.eval()
    confidence_onset_list = []
    confidence_wakeup_list = []
    
    for index, batch in enumerate(tqdm(validation_dataloader)):
        confidence_onset, confidence_wakeup = predict(batch)
        confidence_onset_list.extend(confidence_onset)
        confidence_wakeup_list.extend(confidence_wakeup)

    return confidence_onset_list, confidence_wakeup_list

In [7]:
steps = []
num_series_ids = []
true_onset_values = []
true_wakeup_values = []
confidence_onset_list = []
confidence_wakeup_list = []

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i, num_series_id in enumerate(validation_overview.num_series_id.unique()):
    print(f'Series {i + 1} of {validation_overview.num_series_id.nunique()}')
    series_X = torch.load('../../data/processed/transformer-downsample-4h-v2/validation/' + str(num_series_id) + '.pt')
    validation_dataset = TensorDataset(torch.transpose(series_X, 1, 2).to(device))
    validation_dataloader = DataLoader(validation_dataset, batch_size=10000)
    series_confidence_onset, series_confidence_wakeup = predict_series(validation_dataloader)

    steps.extend(validation_overview[validation_overview.num_series_id == num_series_id]['step'])
    num_series_ids.extend(validation_overview[validation_overview.num_series_id == num_series_id]['num_series_id'])
    true_onset_values.extend(validation_overview[validation_overview.num_series_id == num_series_id]['onset_critical_event_point'])
    true_wakeup_values.extend(validation_overview[validation_overview.num_series_id == num_series_id]['wakeup_critical_event_point'])
    confidence_onset_list.extend([tensor.item() for tensor in series_confidence_onset])
    confidence_wakeup_list.extend([tensor.item() for tensor in series_confidence_wakeup])

Series 1 of 54


100%|██████████| 5/5 [00:01<00:00,  3.71it/s]


Series 2 of 54


100%|██████████| 2/2 [00:00<00:00,  8.16it/s]


Series 3 of 54


100%|██████████| 4/4 [00:02<00:00,  1.79it/s]


Series 4 of 54


100%|██████████| 4/4 [00:00<00:00, 11.40it/s]


Series 5 of 54


100%|██████████| 1/1 [00:00<00:00, 22.61it/s]


Series 6 of 54


100%|██████████| 4/4 [00:00<00:00,  8.13it/s]


Series 7 of 54


100%|██████████| 3/3 [00:00<00:00,  9.85it/s]


Series 8 of 54


100%|██████████| 4/4 [00:00<00:00, 11.66it/s]


Series 9 of 54


100%|██████████| 4/4 [00:00<00:00, 12.14it/s]


Series 10 of 54


100%|██████████| 4/4 [00:00<00:00, 10.75it/s]


Series 11 of 54


100%|██████████| 3/3 [00:01<00:00,  1.79it/s]


Series 12 of 54


100%|██████████| 4/4 [00:00<00:00,  8.66it/s]


Series 13 of 54


100%|██████████| 4/4 [00:02<00:00,  1.72it/s]


Series 14 of 54


100%|██████████| 3/3 [00:00<00:00, 26.97it/s]


Series 15 of 54


100%|██████████| 5/5 [00:00<00:00,  9.41it/s]


Series 16 of 54


100%|██████████| 4/4 [00:00<00:00, 11.73it/s]


Series 17 of 54


100%|██████████| 2/2 [00:00<00:00,  7.85it/s]


Series 18 of 54


100%|██████████| 1/1 [00:00<00:00, 66.39it/s]


Series 19 of 54


100%|██████████| 4/4 [00:00<00:00, 13.14it/s]


Series 20 of 54


100%|██████████| 4/4 [00:00<00:00, 11.94it/s]


Series 21 of 54


100%|██████████| 5/5 [00:00<00:00,  8.96it/s]


Series 22 of 54


100%|██████████| 1/1 [00:00<00:00, 30.37it/s]


Series 23 of 54


100%|██████████| 5/5 [00:00<00:00, 13.02it/s]


Series 24 of 54


100%|██████████| 1/1 [00:00<00:00,  3.97it/s]


Series 25 of 54


100%|██████████| 4/4 [00:00<00:00, 12.24it/s]


Series 26 of 54


100%|██████████| 4/4 [00:00<00:00, 12.00it/s]


Series 27 of 54


100%|██████████| 3/3 [00:00<00:00, 10.46it/s]


Series 28 of 54


100%|██████████| 4/4 [00:00<00:00, 11.86it/s]


Series 29 of 54


100%|██████████| 5/5 [00:00<00:00,  8.79it/s]


Series 30 of 54


100%|██████████| 2/2 [00:00<00:00, 20.83it/s]


Series 31 of 54


100%|██████████| 4/4 [00:00<00:00,  7.67it/s]


Series 32 of 54


100%|██████████| 5/5 [00:00<00:00, 12.03it/s]


Series 33 of 54


100%|██████████| 2/2 [00:00<00:00,  7.52it/s]


Series 34 of 54


100%|██████████| 1/1 [00:00<00:00, 24.81it/s]


Series 35 of 54


100%|██████████| 4/4 [00:00<00:00, 11.38it/s]


Series 36 of 54


100%|██████████| 1/1 [00:00<00:00,  4.51it/s]


Series 37 of 54


100%|██████████| 3/3 [00:00<00:00,  9.56it/s]


Series 38 of 54


100%|██████████| 4/4 [00:00<00:00, 11.71it/s]


Series 39 of 54


100%|██████████| 4/4 [00:00<00:00, 11.50it/s]


Series 40 of 54


100%|██████████| 1/1 [00:00<00:00, 28.10it/s]


Series 41 of 54


100%|██████████| 5/5 [00:00<00:00,  8.61it/s]


Series 42 of 54


100%|██████████| 3/3 [00:00<00:00,  8.82it/s]


Series 43 of 54


100%|██████████| 3/3 [00:00<00:00,  9.35it/s]


Series 44 of 54


100%|██████████| 1/1 [00:00<00:00, 36.23it/s]


Series 45 of 54


100%|██████████| 3/3 [00:00<00:00,  9.29it/s]


Series 46 of 54


100%|██████████| 4/4 [00:00<00:00, 11.58it/s]


Series 47 of 54


100%|██████████| 3/3 [00:00<00:00,  9.70it/s]


Series 48 of 54


100%|██████████| 2/2 [00:00<00:00, 18.91it/s]


Series 49 of 54


100%|██████████| 1/1 [00:00<00:00,  4.35it/s]


Series 50 of 54


100%|██████████| 6/6 [00:00<00:00,  9.74it/s]


Series 51 of 54


100%|██████████| 3/3 [00:00<00:00,  9.71it/s]


Series 52 of 54


100%|██████████| 4/4 [00:00<00:00, 11.40it/s]


Series 53 of 54


100%|██████████| 2/2 [00:00<00:00, 26.33it/s]


Series 54 of 54


100%|██████████| 6/6 [00:00<00:00,  9.41it/s]


In [8]:
df_validation = pd.DataFrame(
    {'step': steps,
     'num_series_id': num_series_ids,
     'onset_critical_event_point': true_onset_values,
     'wakeup_critical_event_point': true_wakeup_values,
     'prediction_confidence_onset': confidence_onset_list,
     'prediction_confidence_wakeup': confidence_wakeup_list,
    })

In [9]:
smoothing_length = 30 * 12 # 30min
day_step_len = 12 * 60 * 24 # 24h

def get_events_day(series_id, X, prominence):
    start_step = X.iloc[0].name
    peaks_onset = find_peaks(X.prediction_confidence_onset.values, prominence=prominence)
    peaks_wakeup = find_peaks(X.prediction_confidence_wakeup.values, prominence=prominence)

    events = []
    for i in range(len(peaks_onset[0])):
        events.append({'series_id': series_id, 'step': start_step + peaks_onset[0][i], 'event': 'onset', 'score': peaks_onset[1]['prominences'][i]})

    for i in range(len(peaks_wakeup[0])):
        events.append({'series_id': series_id, 'step': start_step + peaks_wakeup[0][i], 'event': 'wakeup', 'score': peaks_wakeup[1]['prominences'][i]})
    
    return events

def get_events_smoothed(test_series, prominence) :
    series_ids = test_series['num_series_id'].unique()
    events = []

    for idx in tqdm(series_ids):
        X = test_series[test_series.num_series_id == idx]
        X = X.set_index('step')
        X = X.reindex(range(X.index.min(), X.index.max() + 1))
        X = X.ffill()
        seq_len = len(X)

        for j in range(0, seq_len, day_step_len):
            events.extend(get_events_day(idx, X[j:j+day_step_len], prominence))

            
    return pd.DataFrame(events).reset_index().rename(columns={'index': 'row_id'})

In [10]:
df_validation_events['series_id'] = df_validation_events['num_series_id']

In [11]:
for i in range(1, 9):
    prominence = i / 10
    predicted_validation_events = get_events_smoothed(df_validation, prominence)
    score = competition_score(df_validation_events, predicted_validation_events)
    print(f'Prominence: {prominence}, Score: {score}') 

100%|██████████| 54/54 [00:01<00:00, 34.94it/s]


Prominence: 0.1, Score: 0.5023974104925462


100%|██████████| 54/54 [00:01<00:00, 35.11it/s]


Prominence: 0.2, Score: 0.49448979098038276


100%|██████████| 54/54 [00:01<00:00, 35.07it/s]


Prominence: 0.3, Score: 0.4867677629798064


100%|██████████| 54/54 [00:01<00:00, 35.00it/s]


Prominence: 0.4, Score: 0.4746867839979203


100%|██████████| 54/54 [00:01<00:00, 35.13it/s]


Prominence: 0.5, Score: 0.45506674108722933


100%|██████████| 54/54 [00:01<00:00, 35.12it/s]


Prominence: 0.6, Score: 0.4135855687825979


100%|██████████| 54/54 [00:01<00:00, 34.92it/s]


Prominence: 0.7, Score: 0.3304009096902748


100%|██████████| 54/54 [00:01<00:00, 35.32it/s]


Prominence: 0.8, Score: 0.12901445419401952


In [12]:
predicted_validation_events = get_events_smoothed(df_validation, 0.1)

100%|██████████| 54/54 [00:01<00:00, 35.10it/s]


In [13]:
predicted_validation_events

Unnamed: 0,row_id,series_id,step,event,score
0,0,7,7877,onset,0.791293
1,1,7,14597,wakeup,0.752681
2,2,7,24749,onset,0.473838
3,3,7,36894,onset,0.589692
4,4,7,60583,onset,0.506161
...,...,...,...,...,...
3139,3139,276,595397,onset,0.724513
3140,3140,276,596285,onset,0.256537
3141,3141,276,604505,onset,0.443325
3142,3142,276,602165,wakeup,0.717527


## Evaluation

In [14]:
competition_score(df_validation_events, predicted_validation_events)

0.5023974104925462

In [15]:
df_validation.to_parquet('./test/predicted_series_cp_4h_v2.parquet')