# Feature Engineering

In [1]:
import pandas as pd
import numpy as np

import time
import gc

## Daten laden

In [2]:
train = pd.read_parquet('../data/processed/train.parquet')

## Neue Features erstellen

In [3]:
# Hyperparams
NUM_STEPS = 20

In [4]:
def make_features_chunck(series_id, periods=NUM_STEPS):
    df = pd.read_parquet('../data/processed/train.parquet', filters=[('series_id','=',series_id)])
    
    print('Generating time features')
    df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))
    df["hour"] = df["timestamp"].dt.hour
    
    print('Generating statistical features')
    df["anglez_abs"] = abs(df["anglez"])
    df["anglez_diff"] = df.groupby('series_id')['anglez'].diff(periods=periods).bfill().astype('float32')
    df["enmo_diff"] = df.groupby('series_id')['enmo'].diff(periods=periods).bfill().astype('float32')
    df['anglez_x_enmo'] = df['anglez'] * df['enmo']
    
    print('Generating rolling features')
    df["anglez_rolling_mean"] = df["anglez"].rolling(periods,center=True).mean().bfill().ffill().astype('float32')
    df["enmo_rolling_mean"] = df["enmo"].rolling(periods,center=True).mean().bfill().ffill().astype('float32')
    df["anglez_rolling_max"] = df["anglez"].rolling(periods,center=True).max().bfill().ffill().astype('float32')
    df["enmo_rolling_max"] = df["enmo"].rolling(periods,center=True).max().bfill().ffill().astype('float32')
    df["anglez_rolling_min"] = df["anglez"].rolling(periods,center=True).min().bfill().ffill().astype('float32')
    df["enmo_rolling_min"] = df["enmo"].rolling(periods,center=True).min().bfill().ffill().astype('float32')
    df["anglez_rolling_std"] = df["anglez"].rolling(periods,center=True).std().bfill().ffill().astype('float32')
    df["enmo_rolling_std"] = df["enmo"].rolling(periods,center=True).std().bfill().ffill().astype('float32')    
    
    return df

In [5]:
def make_features(train):
    train_data = []

    total_len = train.series_id.nunique()

    for i, series_id in enumerate(train.series_id.unique()):
        print(f'Step {i+1} of {total_len}')
        chunck = make_features_chunck(series_id)
        train_data.append(chunck)
        del chunck
        gc.collect()

    return pd.concat(train_data).reset_index(drop=True)

In [6]:
start_time = time.time()
train_with_features = make_features(train)
print(f'Feature Engineering took {time.time() - start_time} seconds')

Step 1 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 2 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 3 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 4 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 5 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 6 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 7 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 8 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 9 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 10 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 11 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 12 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 13 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 14 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 15 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 16 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 17 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 18 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 19 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 20 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 21 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 22 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 23 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 24 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 25 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 26 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 27 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 28 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 29 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 30 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 31 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 32 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 33 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 34 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 35 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 36 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 37 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 38 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 39 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 40 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 41 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 42 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 43 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 44 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 45 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 46 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 47 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 48 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 49 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 50 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 51 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 52 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 53 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 54 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 55 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 56 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 57 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 58 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 59 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 60 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 61 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 62 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 68 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 69 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 70 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 71 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 72 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 73 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 74 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 75 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 76 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 77 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 78 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 79 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 80 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 81 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 82 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 83 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 84 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 85 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 86 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 87 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 88 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 89 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 90 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 91 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 92 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 93 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 94 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 95 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 96 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 97 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 98 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 99 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 100 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 101 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 102 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 103 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 104 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 105 of 269
Generating time fea

  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 106 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 107 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 108 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 109 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 110 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 111 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 112 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 113 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 114 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 115 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 116 of 269
Generating time

  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 120 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 121 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 122 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 123 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 124 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 125 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 126 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 127 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 128 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 129 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 130 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 131 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 132 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 133 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 134 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 135 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 136 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 137 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 138 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 139 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 140 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 141 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 142 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 143 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 144 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 145 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 146 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 147 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 148 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 149 of 269
Generating time

  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 167 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 168 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 169 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 170 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 171 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 172 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 173 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 174 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 175 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 176 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 177 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 178 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 179 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 180 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 181 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 182 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 183 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 184 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 185 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 186 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 187 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 188 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 189 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 190 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 191 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 192 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 193 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 194 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 195 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 196 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 197 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 198 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 199 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 200 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 201 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 202 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 203 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 204 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 205 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 206 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 207 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 208 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 209 of 269
Generating time

  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 222 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 223 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 224 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 225 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 226 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 227 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 228 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 229 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 230 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 231 of 269
Generating time

  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 232 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 233 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 234 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 235 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 236 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 237 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 238 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 239 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 240 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 241 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 242 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 243 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 244 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 245 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 246 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 247 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 248 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 249 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 250 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 251 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 252 of 269
Generating time

  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 254 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 255 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 256 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 257 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 258 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 259 of 269
Generating time features


  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda t: t.tz_localize(None))


Generating statistical features
Generating rolling features
Step 260 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 261 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 262 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 263 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 264 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 265 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 266 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 267 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 268 of 269
Generating time features
Generating statistical features
Generating rolling features
Step 269 of 269
Generating time

## Daten speichern

In [7]:
train_with_features.to_parquet('../data/processed/train_with_features.parquet')