In [1]:
import sys
import gc
sys.path.append('../../')

from scoring.event_detection_matrix import competition_score
from models import PyTorchMLP, LightningModel

from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

In [None]:
checkpoint_path = "../../models/neural-nappers/wv1kmbtj/checkpoints/epoch=4-step=871060.ckpt"
model = LightningModel.load_from_checkpoint(checkpoint_path)

In [2]:
df_validation_events = pd.read_csv('../../data/processed/validation_events_split.csv')

In [3]:
LAGS_FUTURE = [f"t_lag_{i}" for i in range(-1, -25, -1)]
LAGS_PAST = reversed([f"t_lag_{i}" for i in range(1, 25)])
FEATURES = [*LAGS_PAST, 't_0', *LAGS_FUTURE]

In [4]:
def save_chunk(num_series_id, chunk):
    series_length, series_columns = chunk[FEATURES].values.shape
    X = torch.from_numpy(np.vstack(np.ravel(chunk[FEATURES].values))
                           .reshape(series_length, series_columns, 2)).to(torch.float32)
    torch.save(X, './data/' + str(num_series_id) + ".pt")

In [5]:
def make_features_chunk(num_series_id):
    df = pd.read_parquet('../../data/processed/validation_series_split.parquet', filters=[('series_id','=',num_series_id)])
    df['t_0'] = df[['anglez', 'enmo']].values.tolist()

    for i in range(1, 25):
        df[f'anglez_lag_{i}'] = df["anglez"].shift(i).bfill()
        df[f'enmo_lag_{i}'] = df["enmo"].shift(i).bfill()
        df[f't_lag_{i}'] = df[[f'anglez_lag_{i}', f'enmo_lag_{i}']].values.tolist()
        df = df.drop(columns=[f'anglez_lag_{i}', f'enmo_lag_{i}'])

    for i in range(-1, -25, -1):
        df[f'anglez_lag_{i}'] = df["anglez"].shift(i).ffill()
        df[f'enmo_lag_{i}'] = df["enmo"].shift(i).ffill()
        df[f't_lag_{i}'] = df[[f'anglez_lag_{i}', f'enmo_lag_{i}']].values.tolist()
        df = df.drop(columns=[f'anglez_lag_{i}', f'enmo_lag_{i}'])
    
    return df.reset_index(drop=True)

In [6]:
def make_features(series):
    overview_data = []

    for num_series_id in tqdm(series.series_id.unique()):
        chunk = make_features_chunk(num_series_id)
        save_chunk(num_series_id, chunk)

        overview_data.append(
            chunk[['series_id', 'step']].reset_index().rename(columns={'index':'series_index'}).copy()[['series_id', 'step', 'series_index']]
        )

        del chunk
        gc.collect()
    
    return pd.concat(overview_data).reset_index(drop=True)

In [None]:
overview = make_features(df_validation_events)

In [None]:
def get_events_smoothed(test_series) :
    series_ids = test_series['series_id'].unique()
    events = []

    for idx in tqdm(series_ids):
        # Collecting sample and normalizing features
        X = test_series[test_series.series_id == idx]
                
        smoothing_length = 12 * 30 # 30 Minutes
        X["score"] = X["prediction_confidence_1"].rolling(smoothing_length, center=True).mean().bfill().ffill()
        X["smooth"] = X["prediction_confidence_0"].rolling(smoothing_length, center=True).mean().bfill().ffill()

        # Binarize the smoothing column
        X["smooth"] = X["smooth"].round()

        # Getting predicted onset and wakeup time steps
        pred_onsets = X[X['smooth'].diff() > 0]['step'].tolist()
        pred_wakeups = X[X['smooth'].diff() < 0]['step'].tolist()
     
        if len(pred_onsets) > 0 : 

            # Ensuring all predicted sleep periods begin and end
            if min(pred_wakeups) < min(pred_onsets) : 
                pred_wakeups = pred_wakeups[1:]

            if max(pred_onsets) > max(pred_wakeups) :
                pred_onsets = pred_onsets[:-1]

            # Keeping sleep periods longer than 30 minutes
            sleep_periods = [(onset, wakeup) for onset, wakeup in zip(pred_onsets, pred_wakeups) if wakeup - onset >= 12 * 30]

            for onset, wakeup in sleep_periods :
                # Scoring using mean probability over period
                score = X[(X['step'] >= onset) & (X['step'] <= wakeup)]['score'].mean()

                # Adding sleep event to dataframe
                onset_row = {'row_id': len(events), 'series_id': idx, 'step': onset, 'event': 'onset', 'score': score}                
                events.append(onset_row)

                wakeup_row = {'row_id': len(events), 'series_id': idx, 'step': wakeup, 'event': 'wakeup', 'score': score}
                events.append(wakeup_row)

    return pd.DataFrame(events)