In [1]:
import pandas as pd
import numpy as np

sc = pd.Series([np.random.randint(1,5)*np.random.randint(1,400) for i in range(30)])
print(f'initial series:\n {sc}')
std = sc.std()
mean = sc.mean()
median = sc.median()
print(f'\nmedian: {median}, standard deviation: {std}\n\nExtreme elements:')
for el in sc:
  if abs(el-median) > std:
    ind = sc[sc==el].index[0]
    print(f'extreme element: {el} at position {ind}')

    sc.replace(int(el),pd.np.nan,True)
sc.interpolate(method='linear',inplace=True)
print(f'\n interpolated series: \n{sc}')





initial series:
 0       99
1     1388
2      308
3      193
4      374
5       90
6      843
7        6
8      510
9     1308
10     135
11     104
12     798
13     126
14     622
15      66
16     482
17    1376
18     526
19     906
20     766
21      84
22     456
23      46
24     121
25     666
26      96
27      70
28      57
29     764
dtype: int64

median: 341.0, standard deviation: 418.72321530395567

Extreme elements:
extreme element: 1388 at position 1
extreme element: 843 at position 6
extreme element: 1308 at position 9
extreme element: 798 at position 12
extreme element: 1376 at position 17
extreme element: 906 at position 19
extreme element: 766 at position 20
extreme element: 764 at position 29

 interpolated series: 
0      99.000000
1     203.500000
2     308.000000
3     193.000000
4     374.000000
5      90.000000
6      48.000000
7       6.000000
8     510.000000
9     322.500000
10    135.000000
11    104.000000
12    115.000000
13    126.000000
14    622.000000

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


In [7]:
def tkeo(emg):
    emg = np.asarray(emg)
    tkeo = np.copy(emg)
    # Teager–Kaiser Energy operator
    tkeo[1:-1] = emg[1:-1]*emg[1:-1] - emg[:-2]*emg[2:]
    # correct the data in the extremities
    tkeo[0], tkeo[-1] = tkeo[1], tkeo[-2]

    return(tkeo)



def gen_features(wind,if_min,if_max,if_mean,if_median,if_std,if_teager):
  atrs = {}
  print(wind)
  if if_min:
    atrs['min'] = min(wind)
  if if_max:
    atrs['max'] = max(wind)
  if if_mean:
    atrs['mean'] = wind.mean()
  if if_median:
    atrs['median'] = wind.median()
  if if_std:
    atrs['std'] = wind.std()
  if if_teager:
    atrs['tkeo'] = tkeo(wind)
  print(f'\nfeatures: {atrs}\n')
  return atrs


def window_features(seq,coverage,if_cover=True,cover_degr=1,if_min=True,if_max=True,if_mean=True,if_median = True,if_std=True, if_teager = True):
  assert coverage <= 1, "wrong coverage"
  assert type(cover_degr) == int, 'cover degree must be an integer'
  assert type(seq) == pd.core.series.Series, 'input series must be pandas Series'
  N = len(seq)
  windsize = int(np.floor(N*coverage))
  if if_cover:
    step = cover_degr
  else:
    step = windsize
  features = {}
  windnum = 1
  for i in range(0,(N//windsize)*windsize,step):
    wind = seq[i:i+windsize]
    print(f'window nr {windnum}')
    features[windnum] = gen_features(wind,if_min,if_max,if_mean,if_median,if_std, if_teager)
    windnum +=1
  wind = seq[(N//windsize)*windsize:]
  features[(N//step)+1] = gen_features(wind,if_min,if_max,if_mean,if_median,if_std, if_teager)
  return features

print('\n Feature Extraction:\n')
print(window_features(sc,0.25,True,4))


 Feature Extraction:

window nr 1
0     99.0
1    203.5
2    308.0
3    193.0
4    374.0
5     90.0
6     48.0
dtype: float64

features: {'min': 48.0, 'max': 374.0, 'mean': 187.92857142857142, 'median': 193.0, 'std': 119.99042620539517, 'tkeo': array([ 10920.25,  10920.25,  55588.5 , -77943.  , 122506.  ,  -9852.  ,
        -9852.  ])}

window nr 2
4     374.0
5      90.0
6      48.0
7       6.0
8     510.0
9     322.5
10    135.0
dtype: float64

features: {'min': 6.0, 'max': 510.0, 'mean': 212.21428571428572, 'median': 135.0, 'std': 190.3518358949328, 'tkeo': array([ -9852.  ,  -9852.  ,   1764.  , -24444.  , 258165.  ,  35156.25,
        35156.25])}

window nr 3
8     510.0
9     322.5
10    135.0
11    104.0
12    115.0
13    126.0
14    622.0
dtype: float64

features: {'min': 104.0, 'max': 622.0, 'mean': 276.35714285714283, 'median': 135.0, 'std': 213.90003784589706, 'tkeo': array([ 35156.25,  35156.25, -15315.  ,  -4709.  ,    121.  , -55654.  ,
       -55654.  ])}

window nr 4
1

In [4]:
sc.rolling(8).sum()

0             NaN
1             NaN
2             NaN
3             NaN
4             NaN
5             NaN
6             NaN
7     1321.500000
8     1732.500000
9     1851.500000
10    1678.500000
11    1589.500000
12    1330.500000
13    1366.500000
14    1940.500000
15    2000.500000
16    1972.500000
17    2154.000000
18    2545.000000
19    2819.666667
20    2936.000000
21    2894.000000
22    2728.000000
23    2708.000000
24    2347.000000
25    2509.000000
26    2079.000000
27    1770.333333
28    1596.000000
29    1569.000000
dtype: float64