In [1]:
# encoding=utf-8
"""
    Created on 15:05 2017/3/28 
    @author: Jindong Wang
"""

"""
   Construct sequence features for time-series sequence data
"""

import numpy as np


class Feature_fft(object):
    def __init__(self, sequence_data):
        self.data = sequence_data
        fft_trans = np.abs(np.fft.fft(sequence_data))
        self.dc = fft_trans[0]
        self.freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
        self._freq_sum_ = np.sum(self.freq_spectrum)

    def fft_dc(self):
        return self.dc

    def fft_mean(self):
        return np.mean(self.freq_spectrum)

    def fft_var(self):
        return np.var(self.freq_spectrum)

    def fft_std(self):
        return np.std(self.freq_spectrum)

    def fft_entropy(self):
        pr_freq = self.freq_spectrum * 1.0 / self._freq_sum_
        entropy = -1 * np.sum([np.log2(p) * p for p in pr_freq])
        return entropy

    def fft_energy(self):
        return np.sum(self.freq_spectrum ** 2) / len(self.freq_spectrum)

    # def fft_skew(self):
    #     fft_mean, fft_std = self.fft_mean(), self.fft_std()
    #     return np.mean([np.power((x - fft_mean) / fft_std, 3)
    #                     for x in self.freq_spectrum])
    def fft_skew(self):
        fft_mean, fft_std = self.fft_mean(), self.fft_std()

        return np.mean([0 if fft_std == 0 else np.power((x - fft_mean) / fft_std, 3)
                        for x in self.freq_spectrum])

    # def fft_kurt(self):
    #     fft_mean, fft_std = self.fft_mean(), self.fft_std()
    #     return np.mean([np.power((x - fft_mean) / fft_std, 4) - 3
    #                     for x in self.freq_spectrum])
    def fft_kurt(self):
        fft_mean, fft_std = self.fft_mean(), self.fft_std()
        return np.mean([0 if fft_std == 0 else np.power((x - fft_mean) / fft_std, 4) - 3
                        for x in self.freq_spectrum])

    def fft_max(self):
        idx = np.argmax(self.freq_spectrum)
        return idx, self.freq_spectrum[idx]

    def fft_topk_freqs(self, top_k=None):
        idxs = np.argsort(self.freq_spectrum)
        if top_k == None:
            top_k = len(self.freq_spectrum)
        return idxs[:top_k], self.freq_spectrum[idxs[:top_k]]

    # def fft_shape_mean(self):
    #     shape_sum = np.sum([x * self.freq_spectrum[x]
    #                         for x in range(len(self.freq_spectrum))])
    #     return shape_sum * 1.0 / self._freq_sum_
    def fft_shape_mean(self):
        shape_sum = np.sum([x * self.freq_spectrum[x]
                            for x in range(len(self.freq_spectrum))])
        return 0 if self._freq_sum_ == 0 else shape_sum * 1.0 / self._freq_sum_

    # def fft_shape_std(self):
    #     shape_mean = self.fft_shape_mean()
    #     var = np.sum([np.power((x - shape_mean), 2) * self.freq_spectrum[x]
    #                   for x in range(len(self.freq_spectrum))]) / self._freq_sum_
    #     return np.sqrt(var)
    def fft_shape_std(self):
        shape_mean = self.fft_shape_mean()
        var = np.sum([0 if self._freq_sum_ == 0 else np.power((x - shape_mean), 2) * self.freq_spectrum[x]
                      for x in range(len(self.freq_spectrum))]) / self._freq_sum_
        return np.sqrt(var)

    def fft_shape_skew(self):
        shape_mean = self.fft_shape_mean()
        return np.sum([np.power((x - shape_mean), 3) * self.freq_spectrum[x]
                       for x in range(len(self.freq_spectrum))]) / self._freq_sum_

    def fft_shape_kurt(self):
        shape_mean = self.fft_shape_mean()
        np.sum([np.power((x - shape_mean), 4) * self.freq_spectrum[x] - 3
                for x in range(len(self.freq_spectrum))]) / self._freq_sum_

    def fft_all(self):
        '''
        Get all fft features in one function
        :return: All fft features in one list
        '''
        feature_all = list()
        feature_all.append(self.fft_dc())
        feature_all.append(self.fft_shape_mean())
        feature_all.append(self.fft_shape_std() ** 2)
        feature_all.append(self.fft_shape_std())
        feature_all.append(self.fft_shape_skew())
        #feature_all.append(self.fft_shape_kurt())
        feature_all.append(self.fft_mean())
        feature_all.append(self.fft_var())
        feature_all.append(self.fft_std())
        feature_all.append(self.fft_skew())
        feature_all.append(self.fft_kurt())
        return feature_all

In [2]:
# encoding=utf-8
"""
    Created on 14:46 2017/3/28 
    @author: Jindong Wang
"""
import numpy as np
import scipy.stats

'''
    Calculate time domain features
'''


class Feature_time(object):
    def __init__(self, sequence_data):
        self.data = sequence_data

    def time_mean(self):
        return np.mean(self.data)

    def time_var(self):
        return np.var(self.data)

    def time_std(self):
        return np.std(self.data)

    def time_mode(self):
        return float(scipy.stats.mode(self.data, axis=None)[0])

    def time_max(self):
        return np.max(self.data)

    def time_min(self):
        return np.min(self.data)

    def time_over_zero(self):
        return len(self.data[self.data > 0])

    def time_range(self):
        return self.time_max() - self.time_min()

    def time_all(self):
        '''
        Get all time domain features in one function
        :return: all time domain features in a list
        '''
        feature_all = list()
        feature_all.append(self.time_mean())
        feature_all.append(self.time_var())
        feature_all.append(self.time_std())
        feature_all.append(self.time_mode())
        feature_all.append(self.time_max())
        feature_all.append(self.time_min())
        feature_all.append(self.time_over_zero())
        feature_all.append(self.time_range())
        return feature_all

In [15]:
# -*- coding:utf-8 -*-
# （1）时域：均值，方差，标准差，最大值，最小值，过零点个数，最大值与最小值之差，众数
# （2）频域：直流分量，图形的均值、方差、标准差、斜度、峭度，幅度的均值、方差、标准差、斜度、峭度
# 共19个特征

import numpy as np

def get_feature(arr):
    '''
    Get features of an array
    :param arr: input 1D array
    :return: feature list
    '''
    feature_list = list()
    # get time domain features
    feature_time = Feature_time(arr).time_all()
    feature_list.extend(feature_time)
    # get frequency domain features
    feature_fft = Feature_fft(arr).fft_all()
    feature_list.extend(feature_fft)
    return feature_list


def sequence_feature(seq, win_size, step_size):
    '''
    Get features of a sequence, with or without window
    :param seq: shape of the sequence: (n,1)
    :param win_size: window size, if window_size == 0, get features without window
    :param step_size: step size
    :return: 2D feature matrix
    '''
    if win_size == 0:
        return np.asarray(get_feature(seq))
    window_size = win_size
    step_size = step_size
    r = len(seq)
    feature_mat = list()

    j = 0
    while j < r - step_size:
        window = seq[j:j + window_size]
        win_feature = get_feature(window)
        feature_mat.append(win_feature)
        j += step_size
    return np.asarray(feature_mat)


def test():
    a = np.arange(0, 10).reshape((10, 1))
    print(sequence_feature(a, 0, 4))  # without window
    print(sequence_feature(a, 5, 4))  # with window
    # example output:
    # [4.5         4.5         2.87228132  0.          9.          0.          9.
    #  9.          0.          2.66666667  1.55555556  1.24721913 - 1.14074074
    #  - 1.14074074  3.          2.          1.41421356  0. - 1.3]
    # [[2.          2.          1.41421356  0.          4.          0.          4.
    #   4.          0.          0.66666667  0.22222222  0.47140452 - 0.07407407
    #   - 0.07407407  1.5         0.25        0.5         0. - 2.]
    #  [6.          6.          1.41421356  4.          8.          4.          5.
    #  4.          4.          0.54545455  0.24793388  0.4979296 - 0.02253944
    #  - 0.02253944  5.5         0.25        0.5         0. - 2.]]

In [16]:
def getTensorflowVersion():
    import tensorflow as tf
    print(tf.__version__)

In [17]:
def getTsfreshVersion():
    import tsfresh
    print(tsfresh.__version__)

In [18]:
if __name__=="__main__":
    test()
    getTensorflowVersion()
    getTsfreshVersion()

[4.5 8.25 2.8722813232690143 0.0 9 0 9 9 array([ 0.]) 2.6666666666666665
 1.5555555555555554 1.247219128924647 -1.1407407407407399 None 3.0 2.0
 1.4142135623730951 0.0 -1.3000000000000007]
[[2.0 2.0 1.4142135623730951 0.0 4 0 4 4 array([ 0.]) 0.66666666666666663
  0.22222222222222221 0.47140452079103168 -0.074074074074074042 None 1.5
  0.25 0.5 0.0 -2.0]
 [6.0 2.0 1.4142135623730951 4.0 8 4 5 4 array([ 4.]) 0.54545454545454541
  0.24793388429752067 0.49792959773196921 -0.022539444027047287 None 5.5
  0.25 0.5 0.0 -2.0]]
1.2.1
0.8.1
