# Daten Aufbereitung

In [1]:
import pandas as pd

import gc

## Daten laden

In [2]:
train_events = pd.read_csv('../data/cleaned/train_events.csv')

## Daten zusammenführen

In [3]:
def get_merged_series(series):
    train_series = pd.read_parquet('../data/cleaned/train_series.parquet', filters=[('series_id','=',series)])
    train_events_filtered = train_events.query('series_id == @series')
    
    train_events_filtered = train_events_filtered.dropna()
    train_events_filtered['step'] = train_events_filtered['step'].astype('int')
    train_events_filtered['awake'] = train_events_filtered['event'].replace({'onset': 1, 'wakeup': 0})

    # Merge Series with events and fill empty values with last observed event (awake until onset event...)
    merged = pd.merge(train_series, train_events_filtered[['step', 'awake']], on='step', how='left')
    merged['awake'] = merged['awake'].bfill(axis='rows')
    merged['awake'] = merged['awake'].fillna(1) # awake
    merged["awake"] = merged["awake"].astype("int")
    return merged

In [4]:
train_data = []

total_len = train_events.series_id.nunique()

for i, series_id in enumerate(train_events.series_id.unique()):
    print(f'Step {i+1} of {total_len}')
    train = get_merged_series(series_id)
    train_data.append(train)
    del train
    gc.collect()

train = pd.concat(train_data).reset_index(drop=True)

Step 0 of 269
Step 1 of 269
Step 2 of 269
Step 3 of 269
Step 4 of 269
Step 5 of 269
Step 6 of 269
Step 7 of 269
Step 8 of 269
Step 9 of 269
Step 10 of 269
Step 11 of 269
Step 12 of 269
Step 13 of 269
Step 14 of 269
Step 15 of 269
Step 16 of 269
Step 17 of 269
Step 18 of 269
Step 19 of 269
Step 20 of 269
Step 21 of 269
Step 22 of 269
Step 23 of 269
Step 24 of 269
Step 25 of 269
Step 26 of 269
Step 27 of 269
Step 28 of 269
Step 29 of 269
Step 30 of 269
Step 31 of 269
Step 32 of 269
Step 33 of 269
Step 34 of 269
Step 35 of 269
Step 36 of 269
Step 37 of 269
Step 38 of 269
Step 39 of 269
Step 40 of 269
Step 41 of 269
Step 42 of 269
Step 43 of 269
Step 44 of 269
Step 45 of 269
Step 46 of 269
Step 47 of 269
Step 48 of 269
Step 49 of 269
Step 50 of 269
Step 51 of 269
Step 52 of 269
Step 53 of 269
Step 54 of 269
Step 55 of 269
Step 56 of 269
Step 57 of 269
Step 58 of 269
Step 59 of 269
Step 60 of 269
Step 61 of 269
Step 62 of 269
Step 63 of 269
Step 64 of 269
Step 65 of 269
Step 66 of 269
Step 

In [6]:
train.head()

Unnamed: 0,series_id,step,timestamp,anglez,enmo,awake
0,038441c925bb,0,2018-08-14T15:30:00-0400,2.6367,0.0217,1
1,038441c925bb,1,2018-08-14T15:30:05-0400,2.6368,0.0215,1
2,038441c925bb,2,2018-08-14T15:30:10-0400,2.637,0.0216,1
3,038441c925bb,3,2018-08-14T15:30:15-0400,2.6368,0.0213,1
4,038441c925bb,4,2018-08-14T15:30:20-0400,2.6368,0.0215,1


## Aufbereitete Daten speichern

In [7]:
train.to_parquet('../data/processed/train.parquet')