In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import functools
import scipy.signal
from tqdm import tqdm
import inspect
import pickle
import sys
import json
import os
sys.path.append('../data_processing/')
sys.path.append('../validation/')

In [None]:
%%time 

filepath = '../../data/test/seg_00030f.csv'

df = pd.read_csv(filepath, 
                 dtype={'acoustic_data': np.int16, 'time_to_failure': np.float32}, 
                 skiprows=1, 
                 names=['s', 'ttf'])

In [2]:
filepath = '../../data/train_short.h5'
df = pd.read_hdf(filepath, key='table')

In [3]:
def get_function_descriptor(func, extra_params):

    """
    Parse all function args and get a complete description

    Parameters
    ----------
    func : function reference object
    extra_params : external params passed to decorated function (i.e. window size, desc_line, etc...)

    Returns
    -------
    String with function name and its parameters
    """

    inspect_params = inspect.getfullargspec(func)
    funcname_line = func.__name__
    args_line = "{}, ".format(*inspect_params.args)
    kwargs_line = ', '.join("{}={}".format(k, v) for k, v in extra_params.items())
    if inspect_params.kwonlydefaults is not None:
        kwonlydefaults_line = ', (' + ', '.join("{}={}".format(k, v) for k, v in inspect_params.kwonlydefaults.items()) \
                              + ')'
    else:
        kwonlydefaults_line = ''
    
    desc_line = funcname_line + '(' + args_line + kwargs_line + kwonlydefaults_line + ')'

    return desc_line

In [4]:
def window_decorator(func):
    @functools.wraps(func)
    def wrapper(self, *args, **kwargs):
        window_size = kwargs['window_size']
        window_stride = kwargs['window_stride']
        
        df = self.data
        if "verbose" in kwargs:
            verbose = kwargs['verbose']
        else:
            verbose = True
        if "desc_line" in kwargs:
            desc_line = kwargs['desc_line']
        else:
            desc_line = get_function_descriptor(func, kwargs)
            
        if desc_line + '.h5' in os.listdir(self.save_dir):
            self.data = pd.read_hdf(self.save_dir + desc_line + '.h5', key='table')
            return self
        
        self.name.append(desc_line)
        
        temp = []
        for i in tqdm(range(0, df.shape[0] - window_size, window_stride),
                      desc=desc_line, file=sys.stdout, disable=not verbose):
            batch = df.iloc[i: i + window_size].values
            temp.append(func(self, batch, *args, **kwargs).data)
            
        if verbose:
            tqdm.write(f"\t window decorator for {func.__name__}: ")
            tqdm.write("\t - window size: {}".format(window_size))
            tqdm.write("\t - window stride: {}".format(window_stride))
        
        if hasattr(temp[0], 'shape') and temp[0].shape is not ():
            out_features = temp[0].shape[0]
            column_names = ['-'.join(self.name) + '_' + str(i) for i in range(out_features)]
        else:
            column_names = ['-'.join(self.name)]
            
        self.data = pd.DataFrame(temp, columns=column_names)
        return self
    return wrapper

In [5]:
class Feature:
    def __init__(self, df, save_dir):
        self.data = df
        self.name = []
        self.save_dir = save_dir
    
    def dump(self, save_dir=None, ext='.h5'):
        save_path = self.save_dir if save_dir is None else save_dir
        save_path += '-'.join(self.name) + ext
        tqdm.write(f"\t - saving to: {save_path}")
        self.data.to_hdf(save_path, key='table')
        return self
    
    @window_decorator
    def mean(self, df=None, *args, **kwargs):
        self.data = np.mean(self.data, axis=0) if df is None else np.mean(df, axis=0)   
        return self
    
    @window_decorator
    def std(self, df=None, *args, **kwargs):
        self.data = np.std(self.data, axis=0) if df is None else np.std(df, axis=0)
        return self
    
    @window_decorator
    def periodogram(self, df=None, *args, fs=4e6, N=2000, **kwargs):
        """
        Calculates power spectrum density of the dataframe

        Parameters
        ----------
        df : pandas DataFrame
        args : None
        fs : sampling frequency
        kwargs :

        Returns
        -------
        Spectral components of the dataframe
        """
        
        self.data = scipy.signal.periodogram(self.data, fs=fs)[1][:N] if df is None else scipy.signal.periodogram(df, fs=fs)[1][:N]
        return self
    
    @window_decorator
    def spectrogramm_downsampled(self, df=None, *args, fs=4e6, nperseg=100, noverlap=20, mode='psd', **kwargs):
        """
        custom spectrogramm with downsampling afterwards in freq domain
        Parameters
        ----------
        df : pandas DataFrame
        args : None
        fs : sampling frequency
        kwargs :

        Returns
        -------

        """
        data = self.data if df is None else df
        f, t, Sxx = scipy.signal.spectrogram(data, fs, nperseg=nperseg, noverlap=noverlap, mode=mode)
        smoothen = scipy.signal.convolve2d(Sxx, np.array([[0.25, 0.25, 0.25, 0.25]]).T, mode='full')[::4]
        smoothen = scipy.signal.convolve2d(smoothen, np.array([[0.25, 0.25, 0.25, 0.25]]).T, mode='full')[::4]
        smoothen = scipy.signal.convolve2d(smoothen, np.array([[0.25, 0.25, 0.25, 0.25]]).T, mode='full')[::4]
        self.data = smoothen.T.flatten()
        return self

In [6]:
Feature(df, '../../data/')\
.mean(window_size=100, window_stride=100) \
.dump()\
.std(window_size=100, window_stride=100)\
.data

	 - saving to: ../../data/.h5
std(self, window_size=100, window_stride=100): 100%|██████████| 99/99 [00:00<00:00, 8592.57it/s]
	 window decorator for std: 
	 - window size: 100
	 - window stride: 100


Unnamed: 0,"std(self, window_size=100, window_stride=100)_0","std(self, window_size=100, window_stride=100)_1"
0,0.674343,0.000763
1,0.340504,0.000789
2,0.340500,0.000801
3,0.397458,0.000802
4,0.407217,0.000838
5,0.351359,0.000839
6,0.352210,0.000804
7,0.307291,0.000825
8,0.480172,0.000781
9,0.292285,0.000775


In [None]:
Feature(df['s'], '../../data/').periodogram(window_size=1000, window_stride=100, verbose=True).data

In [None]:
Feature(df['s'], '../../data/')\
.spectrogramm_downsampled(window_size=1000, window_stride=100, verbose=True)\
.dump()\
.std(window_size=100, window_stride=10, verbose=True).data

In [None]:
c=["a"]
print('-'.join(c))

In [31]:
p = Feature(df['ttf'], '../../data/')
p = getattr(p, 'mean')(window_size=100, window_stride=100)

mean(self, window_size=100, window_stride=100): 100%|██████████| 9999/9999 [00:00<00:00, 16157.63it/s]
	 window decorator for mean: 
	 - window size: 100
	 - window stride: 100


In [32]:
getattr(p, 'std')(window_size=100, window_stride=100)

std(self, window_size=100, window_stride=100): 100%|██████████| 99/99 [00:00<00:00, 5960.21it/s]
	 window decorator for std: 
	 - window size: 100
	 - window stride: 100


<__main__.Feature at 0x7fe41941a6d8>

In [28]:
funcs = {"functions": {"periodogram": {"N": 1000}, "mean": {}}}
for func_name, func_params in funcs['functions'].items():
    print(func_name, func_params)

periodogram {'N': 1000}
mean {}
