In [1]:
import sys
import os
import gc
sys.path.append('../../')

from scoring.event_detection_matrix import competition_score
from models.fcn.fcn import LightningModel

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from scipy.signal import find_peaks

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
checkpoint_path = "../../models/fcn/neural-nappers/0jg78qoh/checkpoints/epoch=4-step=60885.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

In [3]:
validation_overview = pd.read_parquet('../../data/processed/transformer-downsample-2h/validation/overview.parquet', columns=['num_series_id', 'step', 'critical_event_point', 'series_index'])
df_validation_events = pd.read_csv('../../data/processed/validation_events_split.csv')

In [4]:
df_validation_events = df_validation_events[df_validation_events.step.notnull()]
df_validation_events

Unnamed: 0,series_id,night,event,step,timestamp,num_series_id
0,062dbd4c95e6,1,onset,7872.0,2018-08-22T23:11:00-0400,7
1,062dbd4c95e6,1,wakeup,14484.0,2018-08-23T08:22:00-0400,7
6,062dbd4c95e6,4,onset,60720.0,2018-08-26T00:35:00-0400,7
7,062dbd4c95e6,4,wakeup,68400.0,2018-08-26T11:15:00-0400,7
8,062dbd4c95e6,5,onset,77304.0,2018-08-26T23:37:00-0400,7
...,...,...,...,...,...,...
2955,fcca183903b7,33,wakeup,565824.0,2019-04-28T06:52:00-0400,276
2956,fcca183903b7,34,onset,577344.0,2019-04-28T22:52:00-0400,276
2957,fcca183903b7,34,wakeup,584052.0,2019-04-29T08:11:00-0400,276
2958,fcca183903b7,35,onset,595344.0,2019-04-29T23:52:00-0400,276


In [5]:
def predict(batch):
    X = batch
    with torch.no_grad():
        logits = model(X[0])
    confidence = torch.sigmoid(logits)
    return confidence

In [6]:
def predict_series(validation_dataloader):
    model.eval()
    confidence_list = []
    
    for index, batch in enumerate(tqdm(validation_dataloader)) :
        confidence = predict(batch)
        confidence_list.extend(confidence)
    return confidence_list

In [7]:
steps = []
num_series_ids = []
true_values = []
confidence_list = []

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i, num_series_id in enumerate(validation_overview.num_series_id.unique()):
    print(f'Series {i + 1} of {validation_overview.num_series_id.nunique()}')
    series_X = torch.load('../../data/processed/transformer-downsample-2h/validation/' + str(num_series_id) + '.pt')
    validation_dataset = TensorDataset(torch.transpose(series_X, 1, 2).to(device))
    validation_dataloader = DataLoader(validation_dataset, batch_size=10000)
    series_confidence = predict_series(validation_dataloader)

    steps.extend(validation_overview[validation_overview.num_series_id == num_series_id]['step'])
    num_series_ids.extend(validation_overview[validation_overview.num_series_id == num_series_id]['num_series_id'])
    true_values.extend(validation_overview[validation_overview.num_series_id == num_series_id]['critical_event_point'])
    confidence_list.extend([tensor.item() for tensor in series_confidence])


Series 1 of 54


100%|██████████| 4/4 [00:02<00:00,  1.52it/s]


Series 2 of 54


100%|██████████| 2/2 [00:00<00:00,  7.09it/s]


Series 3 of 54


100%|██████████| 4/4 [00:00<00:00, 13.35it/s]


Series 4 of 54


100%|██████████| 4/4 [00:00<00:00, 10.60it/s]


Series 5 of 54


100%|██████████| 1/1 [00:00<00:00, 21.12it/s]


Series 6 of 54


100%|██████████| 4/4 [00:00<00:00,  7.37it/s]


Series 7 of 54


100%|██████████| 3/3 [00:00<00:00,  6.92it/s]


Series 8 of 54


100%|██████████| 4/4 [00:00<00:00, 10.00it/s]


Series 9 of 54


100%|██████████| 4/4 [00:00<00:00, 10.40it/s]


Series 10 of 54


100%|██████████| 4/4 [00:00<00:00,  9.77it/s]


Series 11 of 54


100%|██████████| 3/3 [00:01<00:00,  2.00it/s]


Series 12 of 54


100%|██████████| 4/4 [00:00<00:00, 10.96it/s]


Series 13 of 54


100%|██████████| 3/3 [00:01<00:00,  1.81it/s]


Series 14 of 54


100%|██████████| 2/2 [00:00<00:00, 22.63it/s]


Series 15 of 54


100%|██████████| 5/5 [00:00<00:00,  8.01it/s]


Series 16 of 54


100%|██████████| 4/4 [00:02<00:00,  1.82it/s]


Series 17 of 54


100%|██████████| 1/1 [00:00<00:00, 20.74it/s]


Series 18 of 54


100%|██████████| 1/1 [00:00<00:00, 74.85it/s]


Series 19 of 54


100%|██████████| 4/4 [00:02<00:00,  1.81it/s]


Series 20 of 54


100%|██████████| 4/4 [00:00<00:00, 10.25it/s]


Series 21 of 54


100%|██████████| 5/5 [00:00<00:00, 11.17it/s]


Series 22 of 54


100%|██████████| 1/1 [00:00<00:00, 41.97it/s]


Series 23 of 54


100%|██████████| 5/5 [00:00<00:00, 10.63it/s]


Series 24 of 54


100%|██████████| 1/1 [00:00<00:00,  3.58it/s]


Series 25 of 54


100%|██████████| 4/4 [00:00<00:00, 11.24it/s]


Series 26 of 54


100%|██████████| 4/4 [00:00<00:00, 10.36it/s]


Series 27 of 54


100%|██████████| 3/3 [00:00<00:00, 29.44it/s]


Series 28 of 54


100%|██████████| 4/4 [00:00<00:00, 10.47it/s]


Series 29 of 54


100%|██████████| 4/4 [00:00<00:00,  9.94it/s]


Series 30 of 54


100%|██████████| 2/2 [00:00<00:00,  6.43it/s]


Series 31 of 54


100%|██████████| 4/4 [00:00<00:00, 10.27it/s]


Series 32 of 54


100%|██████████| 5/5 [00:00<00:00,  8.15it/s]


Series 33 of 54


100%|██████████| 2/2 [00:00<00:00, 29.11it/s]


Series 34 of 54


100%|██████████| 1/1 [00:00<00:00, 31.46it/s]


Series 35 of 54


100%|██████████| 4/4 [00:00<00:00, 10.31it/s]


Series 36 of 54


100%|██████████| 1/1 [00:00<00:00, 26.97it/s]


Series 37 of 54


100%|██████████| 3/3 [00:00<00:00,  8.81it/s]


Series 38 of 54


100%|██████████| 4/4 [00:00<00:00, 11.18it/s]


Series 39 of 54


100%|██████████| 4/4 [00:00<00:00, 11.20it/s]


Series 40 of 54


100%|██████████| 1/1 [00:00<00:00, 70.70it/s]


Series 41 of 54


100%|██████████| 5/5 [00:00<00:00,  8.44it/s]


Series 42 of 54


100%|██████████| 3/3 [00:00<00:00,  8.57it/s]


Series 43 of 54


100%|██████████| 3/3 [00:00<00:00, 24.50it/s]


Series 44 of 54


100%|██████████| 1/1 [00:00<00:00, 37.75it/s]


Series 45 of 54


100%|██████████| 3/3 [00:00<00:00,  9.73it/s]


Series 46 of 54


100%|██████████| 4/4 [00:00<00:00, 11.85it/s]


Series 47 of 54


100%|██████████| 3/3 [00:00<00:00,  9.58it/s]


Series 48 of 54


100%|██████████| 2/2 [00:00<00:00, 25.36it/s]


Series 49 of 54


100%|██████████| 1/1 [00:00<00:00,  4.23it/s]


Series 50 of 54


100%|██████████| 5/5 [00:03<00:00,  1.60it/s]


Series 51 of 54


100%|██████████| 2/2 [00:00<00:00,  6.03it/s]


Series 52 of 54


100%|██████████| 4/4 [00:00<00:00, 12.04it/s]


Series 53 of 54


100%|██████████| 2/2 [00:00<00:00, 33.86it/s]


Series 54 of 54


100%|██████████| 6/6 [00:00<00:00,  9.27it/s]


In [8]:
df_validation = pd.DataFrame(
    {'step': steps,
     'num_series_id': num_series_ids,
     'critical_event_point': true_values,
     'prediction_confidence': confidence_list,
    })

In [9]:
df_validation

Unnamed: 0,step,num_series_id,critical_event_point,prediction_confidence
0,0,7,0.0,0.000003
1,12,7,0.0,0.000003
2,24,7,0.0,0.000004
3,36,7,0.0,0.000004
4,48,7,0.0,0.000004
...,...,...,...,...
1373679,620581,276,0.0,0.007201
1373680,620593,276,0.0,0.007468
1373681,620605,276,0.0,0.007698
1373682,620617,276,0.0,0.007942


In [10]:
smoothing_length = 30 * 12 # 30min
day_step_len = 12 * 60 * 24 # 24h

def get_events_day(series_id, X, prominence):
    start_step = X.iloc[0].name
    peaks = find_peaks(X.prediction_confidence.values, prominence=prominence)
    peaks_steps = peaks[0][0:2]
    peaks_scores = peaks[1]['prominences'][0:2]
    event_types=['onset', 'wakeup']

    events = []
    for i in range(len(peaks_steps)):
        events.append({'series_id': series_id, 'step': start_step + peaks_steps[i], 'event': event_types[i], 'score': peaks_scores[i]})
    
    return events

def get_events_smoothed(test_series, prominence) :
    series_ids = test_series['num_series_id'].unique()
    events = []

    for idx in tqdm(series_ids):
        X = test_series[test_series.num_series_id == idx]
        X = X.set_index('step')
        X = X.reindex(range(X.index.min(), X.index.max() + 1))
        X = X.ffill()
        seq_len = len(X)

        for j in range(0, seq_len, day_step_len):
            events.extend(get_events_day(idx, X[j:j+day_step_len], prominence))

            
    return pd.DataFrame(events).reset_index().rename(columns={'index': 'row_id'})

In [14]:
df_validation_events['series_id'] = df_validation_events['num_series_id']

In [15]:
for i in range(1, 10):
    prominence = i / 10
    predicted_validation_events = get_events_smoothed(df_validation, prominence)
    score = competition_score(df_validation_events, predicted_validation_events)
    print(f'Prominence: {prominence}, Score: {score}') 

100%|██████████| 54/54 [00:01<00:00, 51.54it/s]


Prominence: 0.1, Score: 0.34193061364990274


100%|██████████| 54/54 [00:01<00:00, 53.57it/s]


Prominence: 0.2, Score: 0.40818223387853025


100%|██████████| 54/54 [00:01<00:00, 51.13it/s]


Prominence: 0.3, Score: 0.4304095073024963


100%|██████████| 54/54 [00:01<00:00, 52.16it/s]


Prominence: 0.4, Score: 0.43772746807369106


100%|██████████| 54/54 [00:01<00:00, 51.61it/s]


Prominence: 0.5, Score: 0.42678671678452884


100%|██████████| 54/54 [00:01<00:00, 50.46it/s]


Prominence: 0.6, Score: 0.38336422057291686


100%|██████████| 54/54 [00:01<00:00, 51.25it/s]


Prominence: 0.7, Score: 0.28840502890155445


100%|██████████| 54/54 [00:01<00:00, 53.23it/s]


Prominence: 0.8, Score: 0.14885670461336803


100%|██████████| 54/54 [00:01<00:00, 53.30it/s]


Prominence: 0.9, Score: 0.0011095513349848609


In [16]:
predicted_validation_events = get_events_smoothed(df_validation, 0.4)

100%|██████████| 54/54 [00:01<00:00, 52.37it/s]


In [17]:
predicted_validation_events

Unnamed: 0,row_id,series_id,step,event,score
0,0,7,7901,onset,0.873822
1,1,7,14549,wakeup,0.730794
2,2,7,36954,onset,0.568854
3,3,7,60523,onset,0.601836
4,4,7,77395,onset,0.882182
...,...,...,...,...,...
1744,1744,276,559781,onset,0.722449
1745,1745,276,565925,wakeup,0.469872
1746,1746,276,577349,onset,0.656788
1747,1747,276,584081,wakeup,0.773573


## Evaluation

In [18]:
competition_score(df_validation_events, predicted_validation_events)

0.43772746807369106

In [19]:
df_validation.to_parquet('./test/predicted_series_cp_4h.parquet')