In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import functools
import scipy.signal
import tsfresh
from tqdm import tqdm
import inspect
import sys
sys.path.append("src/data_processing/")
import dp_utils as dp

In [None]:
%%time

filepath = '/home/sergey/Projects/Kaggle/LANL-Earthquake-Prediction/train.csv'

df = pd.read_csv(filepath, 
                 dtype={"acoustic_data": np.int16, "time_to_failure": np.float32}, 
                 skiprows=1, 
                 names=['s', 'ttf'])

In [None]:
filepath = '/home/sergey/Projects/Kaggle/LANL-Earthquake-Prediction/train/train.h5'

df.to_hdf(filepath, key='table')

In [None]:
# filepath = '/home/sergey/Projects/Kaggle/LANL-Earthquake-Prediction/train/train.h5'
# filepath = '/Users/sergey/Dev/Kaggle/LANL-Earthquake-Prediction/train/train.h5'
filepath = '/Users/sergey/Dev/Kaggle/LANL-Earthquake-Prediction/train/train_short.h5'

df = pd.read_hdf(filepath, key='table')

In [None]:
filepath = './data/train_short_processed.h5'

df_processed = pd.read_hdf(filepath, key='table')

In [None]:
df_processed.head(10)

In [None]:
from inspect import getmembers, isfunction

funcname_list = [o[0] for o in getmembers(dp) if isfunction(o[1])]
funcref_list = [o[1] for o in getmembers(dp) if isfunction(o[1])]

In [None]:
import json

dp_config = {"data_path": "../../data/train_short.h5", 
             "data_processed_path": "../../data/train_short_processed.h5", 
             "window_length": 10000,
             "routines": {}}

In [None]:
for obj in funcref_list[:-1]:
    inspect_obj = inspect.signature(obj)
    params_dict = dict(inspect_obj.parameters)
    params = {}
    for k, v in params_dict.items():
        if v.default != inspect._empty:
            params[k] = v.default      
    dp_config["routines"][obj.__name__] = {"on": True, "params": params}
    
dp_config

In [None]:
dfp = pd.concat([func(df['s']) for func in functions_list[:-1]], axis=1)

In [None]:
from dp_utils import *

In [None]:
for params in list(dp_config['routines'].values()):
    print(params['params'])

In [None]:
getattr(dp, test)

In [None]:
desc

In [145]:
t = {}
if not t:
    print("H")

H


In [140]:
def window_decorator(window_size=10000):
    def window_calc(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            temp = []
            df = args[0]
            inspect_params = inspect.getfullargspec(func)
            for i in tqdm(range(0, df.shape[0], window_size), 
                    desc=func.__name__ + "({}, ".format(*inspect_params.args)
                          + ', '.join("{}={})".format(k, v) for k, v in kwargs.items())):
                batch = df.iloc[i: i+window_size].values
                temp.append(func(batch, *args, **kwargs))
            return pd.DataFrame(temp, columns={func.__name__})
        return wrapper
    return window_calc

In [141]:
@window_decorator()
def w_autocorrelation(df, *args, lag=100, **kwargs):
    return tsfresh.feature_extraction.feature_calculators.autocorrelation(df, lag=lag)
@window_decorator()
def w_psd(df, *args, fs=4e6, **kwargs):
    return np.sum(scipy.signal.periodogram(df, fs=fs)[1])
@window_decorator()
def w_min(df, *args, **kwargs):
    return np.min(df)
@window_decorator()
def w_absolute_sum_of_changes(df, *args, **kwargs):
    return tsfresh.feature_extraction.feature_calculators.absolute_sum_of_changes(df)
@window_decorator()
def w_sample_entropy(df, *args, **kwargs):
    return tsfresh.feature_extraction.feature_calculators.sample_entropy(df)
@window_decorator()
def w_symmetry_looking(df, *args, r=0.1, **kwargs):
    return tsfresh.feature_extraction.feature_calculators.symmetry_looking(df, param={"r": r})

In [142]:
w_autocorrelation(df['s'], lag=10)


w_autocorrelation(df, lag=10):   0%|          | 0/100 [00:00<?, ?it/s][A
w_autocorrelation(df, lag=10): 100%|██████████| 100/100 [00:00<00:00, 3270.44it/s][A

Unnamed: 0,w_autocorrelation
0,-0.683628
1,-0.451760
2,-0.231931
3,-0.437729
4,-0.383117
5,-0.257779
6,-0.471590
7,-0.323651
8,-0.431840
9,-0.451727


In [None]:
@window_decorator()
def w_min(df, *args, **kwargs):
    return np.min(df)

@window_decorator()
def w_max(df, *args, **kwargs):
    return np.max(df)

@window_decorator()
def w_min(df, *args, **kwargs):
    return np.min(df)

In [None]:
class DP:
    def __init__(self, df):
        self.df = df
        
    

    @window_decorator()
    def w_mean(self, *args, **kwargs):
        return np.mean(self.df)

    @window_decorator()
    def w_std(self, *args, **kwargs):
        return np.std(self.df)

#     @window_decorator()
#     def w_min(df, *args, **kwargs):
#         return np.min(df)

#     @window_decorator()
#     def w_max(df, *args, **kwargs):
#         return np.max(df)

#     @window_decorator()
#     def w_min(df, *args, **kwargs):
#         return np.min(df)

#     @window_decorator()
#     def w_abs_energy(df, *args, **kwargs):
#         return tsfresh.feature_extraction.feature_calculators.abs_energy(df)

#     @window_decorator()
#     def w_binned_entropy(df, *args, **kwargs):
#         return tsfresh.feature_extraction.feature_calculators.binned_entropy(df, kwargs['max_bins'])