In [1]:
import pandas as pd
import numpy as np

In [2]:
SAMPLE_PATH: str = "./Выборка_Н22_5_1.csv"

In [3]:
df = pd.read_csv(SAMPLE_PATH, sep=';', usecols=[1], header=0)

In [4]:
df

Unnamed: 0,Signals
0,5300411994
1,5300411994
2,-2258525978
3,-1136987869
4,2413023575
...,...
65810,3793774319
65811,5266079194
65812,404554818
65813,-3249599451


In [5]:
df['Signals'] = df['Signals'].str.replace(',', '.').astype('float32')

In [6]:
signal: np.array = df['Signals'].values

In [7]:
mean_value = np.nanmean(signal)
signal = np.nan_to_num(signal, nan=mean_value)

In [8]:
def spectral_centroid(y: np.array, sr: float = 30000, frame_size: int = 2048, hop_size: int = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    frequencies = np.fft.fftfreq(frame_size, d=1/sr)[:frame_size//2]
    
    centroids = np.sum(spectrum * frequencies, axis=1) / np.sum(spectrum, axis=1)
    
    return centroids

In [9]:
centroid_descriptor = spectral_centroid(signal)

In [10]:
centroid_descriptor

array([8300.40153501, 8285.51096293, 8305.82957987, 8303.98950131,
       8214.71424482, 8274.18107248, 8303.06542726, 8173.08025174,
       8394.93007999, 8378.75206777, 8312.84806089, 8284.16179535,
       8342.93865384, 8230.2592399 , 8305.21848247, 8322.16560306,
       8251.18551807, 8379.08487458, 8347.77951341, 8206.42839964,
       8236.26290875, 8291.08390632, 8304.57396606, 8430.09738016,
       8429.67802624, 8327.82568663, 8369.57502491, 8341.36398101,
       8258.3146529 , 8234.35582992, 8266.00565552, 8220.84122845,
       8247.48618001, 8377.51095823, 8314.09291152, 8302.18679168,
       8356.23352111, 8244.67976777, 8240.88793399, 8331.66977751,
       8237.09670675, 8324.60789091, 8430.33421905, 8360.34442038,
       8313.71267848, 8302.9310876 , 8259.47036166, 8221.34183092,
       8325.13424439, 8269.53025858, 8258.60483294, 8330.5728374 ,
       8295.38478658, 8237.12288492, 8371.58505891, 8311.28256378,
       8273.18090915, 8325.64451584, 8233.19181565, 8269.74862

In [11]:
def spectral_spread(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = spectral_centroid(y, sr, frame_size, hop_size)
    
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    frequencies = np.fft.fftfreq(frame_size, d=1/sr)[:frame_size//2]
    
    spreads = np.sqrt(np.sum(((frequencies - centroids[:, None]) ** 2) * spectrum, axis=1) / np.sum(spectrum, axis=1))
    
    return spreads

In [12]:
spread_descriptor = spectral_spread(signal)

In [13]:
spread_descriptor

array([2836.75059479, 2693.22775006, 2741.9740911 , 2635.43488788,
       2654.89786474, 2808.8899267 , 2742.92652468, 2796.31000938,
       2758.53375978, 2734.07460372, 2783.02689699, 2831.41807969,
       2803.11348244, 2744.80006785, 2770.20247629, 2784.00931982,
       2744.4419992 , 2786.97526294, 2770.35103429, 2748.09624069,
       2760.53720077, 2758.72423996, 2822.09102549, 2811.51432906,
       2837.14796344, 2825.92318045, 2744.65597486, 2796.67774195,
       2815.72275476, 2756.08577627, 2782.3681175 , 2760.6051056 ,
       2668.98663519, 2695.09009581, 2672.93456923, 2666.78513468,
       2674.92037813, 2658.77044734, 2677.26677222, 2745.7173233 ,
       2748.00964655, 2678.40757353, 2723.56765655, 2674.0422889 ,
       2657.01431162, 2729.26182732, 2668.15105381, 2636.56944267,
       2692.0663933 , 2691.58239719, 2641.33001829, 2695.56396256,
       2709.54860965, 2702.15024658, 2773.27124897, 2757.79798174,
       2756.48061612, 2786.62527117, 2761.68069947, 2626.96557

In [14]:
def spectral_skewness(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = spectral_centroid(y, sr, frame_size, hop_size)
    spreads = spectral_spread(y, sr, frame_size, hop_size)

    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    frequencies = np.fft.fftfreq(frame_size, d=1/sr)[:frame_size//2]
    
    skewnesses = np.sum(((frequencies - centroids[:, None]) ** 3) * spectrum, axis=1) / (spreads ** 3 * np.sum(spectrum, axis=1))

    return skewnesses

In [15]:
skewness_descriptor = spectral_skewness(signal)

In [16]:
skewness_descriptor

array([-0.8042887 , -0.82818403, -0.86205422, -0.92068009, -0.82246863,
       -0.82272153, -0.80668495, -0.74852693, -0.8915158 , -0.89431927,
       -0.84746781, -0.86563705, -0.91258265, -0.77403381, -0.82205136,
       -0.84779629, -0.82173779, -0.96919425, -0.9414606 , -0.79534643,
       -0.8070194 , -0.81633973, -0.73562897, -0.84022148, -0.88747964,
       -0.86524005, -0.96133779, -0.98806611, -0.89946299, -0.86560621,
       -0.87894545, -0.76466166, -0.822953  , -0.91682325, -0.8370136 ,
       -0.88762318, -0.95551645, -0.88628056, -0.82865024, -0.8465651 ,
       -0.8074834 , -0.8478791 , -0.9654725 , -0.92432493, -0.91155975,
       -0.95620229, -0.89197218, -0.77773524, -0.81374246, -0.79044882,
       -0.6552981 , -0.74265625, -0.71601516, -0.65517445, -0.81455885,
       -0.83055642, -0.75320037, -0.84580849, -0.82629679, -0.81122891,
       -0.9193309 , -0.90247744, -0.79601734, -0.83990075, -0.78080963,
       -0.73254239, -0.87602403, -0.85060697, -0.84517194, -0.94

In [17]:
def spectral_kurtosis(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = spectral_centroid(y, sr, frame_size, hop_size)
    spreads = spectral_spread(y, sr, frame_size, hop_size)

    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
        
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    frequencies = np.fft.fftfreq(frame_size, d=1/sr)[:frame_size//2]
        
    kurtosises = np.sum((frequencies - centroids[:, None]) ** 4 * spectrum) / (spreads ** 4 * np.sum(spectrum))
    
    return kurtosises

In [18]:
kurtosis_descriptor = spectral_kurtosis(signal)

In [19]:
kurtosis_descriptor

array([4.57865501, 5.63547098, 5.24528576, 6.14629466, 5.96803371,
       4.76303374, 5.23800421, 4.84932478, 5.12046411, 5.30616923,
       4.9425711 , 4.61324518, 4.80241646, 5.22371744, 5.0347335 ,
       4.93559824, 5.22644414, 4.91462159, 5.03365366, 5.19870038,
       5.10561572, 5.11905006, 4.67453558, 4.74527445, 4.57609042,
       4.64923108, 5.2248145 , 4.84677475, 4.71696852, 5.13868056,
       4.94725376, 5.10511339, 5.84301423, 5.61991035, 5.80857002,
       5.86233237, 5.79134054, 5.93333909, 5.77106477, 5.21674064,
       5.19935569, 5.76123888, 5.38852457, 5.79895122, 5.94904105,
       5.34369591, 5.85033706, 6.13572212, 5.64520184, 5.64926338,
       6.09160693, 5.61595959, 5.50091271, 5.56140557, 5.01248566,
       5.12593084, 5.13573692, 4.9170911 , 5.09716487, 6.22594114,
       6.35190573, 6.16900819, 5.89240327, 5.54055254, 5.17915984,
       5.60292353, 5.86071182, 5.17672811, 6.01104962, 6.00534004,
       5.88398253, 6.03735337, 5.70104709, 4.88154942, 5.31281

In [20]:
def spectral_entropy(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    
    spectrum_sum = np.sum(spectrum, axis=1, keepdims=True)
    spectrum_norm = spectrum / spectrum_sum
    entropies = -np.sum(spectrum_norm * np.log(spectrum_norm), axis=1) / np.log(spectrum.shape[1])
    
    return entropies

In [21]:
entropy_descriptor = spectral_entropy(signal)

In [22]:
entropy_descriptor

array([0.8781074 , 0.87305062, 0.87382031, 0.86419939, 0.86634885,
       0.88171019, 0.87512253, 0.88685145, 0.86894898, 0.86294113,
       0.8676697 , 0.87401534, 0.87211574, 0.88047341, 0.8805931 ,
       0.88304117, 0.88078953, 0.8713853 , 0.87294068, 0.87410846,
       0.87748912, 0.87816272, 0.88054381, 0.87699569, 0.87696828,
       0.87470963, 0.86745365, 0.87435739, 0.87641732, 0.88203905,
       0.88474367, 0.88156083, 0.87514005, 0.87396119, 0.87400396,
       0.87162119, 0.86898436, 0.86880282, 0.8730309 , 0.8803909 ,
       0.87618813, 0.87254347, 0.86962688, 0.86717863, 0.86473404,
       0.87373746, 0.86567711, 0.87289875, 0.88019449, 0.87948098,
       0.87834769, 0.88430328, 0.88242187, 0.88191175, 0.88006816,
       0.87362391, 0.87552278, 0.87866263, 0.87355868, 0.85866255,
       0.85799471, 0.86067488, 0.86799012, 0.8722906 , 0.87709208,
       0.86996883, 0.86786777, 0.87397439, 0.86801285, 0.86386644,
       0.86362592, 0.86789009, 0.87579213, 0.88617831, 0.87977

In [23]:
def spectral_flatness(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])

    geometric_mean = np.exp(np.mean(np.log(spectrum + 1e-10), axis=1))
    arithmetic_mean = np.mean(spectrum, axis=1)
    flatnesses = geometric_mean / arithmetic_mean

    return flatnesses

In [24]:
flatness_descriptor = spectral_flatness(signal)

In [25]:
flatness_descriptor

array([0.42505518, 0.4046191 , 0.41646344, 0.37720318, 0.39648888,
       0.44608996, 0.42137359, 0.46948402, 0.41046184, 0.3898676 ,
       0.4126034 , 0.44020484, 0.41140732, 0.42184418, 0.42243911,
       0.4352622 , 0.42509459, 0.40677194, 0.40792704, 0.41604756,
       0.41835722, 0.41607952, 0.4271586 , 0.41518262, 0.41623791,
       0.42161637, 0.40179976, 0.41429037, 0.42483429, 0.43567487,
       0.43164122, 0.43010195, 0.40046202, 0.38538468, 0.39679045,
       0.40381842, 0.39128495, 0.39857941, 0.40253943, 0.4118766 ,
       0.41965751, 0.40559335, 0.40215951, 0.3896606 , 0.39255144,
       0.41296429, 0.38670691, 0.39523862, 0.4062791 , 0.41258687,
       0.39579829, 0.41173   , 0.40797935, 0.42278248, 0.41858801,
       0.40967994, 0.41322123, 0.42755066, 0.42566767, 0.38342178,
       0.37587775, 0.37785576, 0.40793053, 0.40446555, 0.4246537 ,
       0.4025711 , 0.39340078, 0.41547989, 0.40705553, 0.39716359,
       0.38024466, 0.40352223, 0.40676524, 0.44729118, 0.42741

In [26]:
def spectral_crest(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    
    crests = np.max(spectrum, axis=1) / (np.sum(spectrum, axis=1) / spectrum.shape[1])

    return crests

In [27]:
crest_descriptor = spectral_crest(signal)

In [28]:
crest_descriptor

array([33.49050754, 29.70288502, 30.32499438, 28.7542748 , 28.04940185,
       30.34896913, 29.11775177, 27.27131168, 29.90794544, 30.40097016,
       31.52814824, 32.11461164, 32.64773976, 29.99426994, 30.2419525 ,
       29.80193887, 29.48616583, 32.47850418, 31.91189406, 31.55967953,
       31.14985829, 30.67004822, 31.59525374, 31.31899792, 32.45268071,
       32.62201687, 31.93236335, 33.60372147, 33.81418343, 30.65550349,
       31.33064174, 30.6523734 , 28.35695854, 29.28629275, 28.62477738,
       27.9496715 , 29.71109413, 29.64206465, 29.5951635 , 30.75411675,
       30.09285615, 27.81252873, 29.51266783, 29.34759956, 28.57838382,
       30.45496992, 30.23391764, 26.96787616, 28.51617353, 28.02572462,
       26.12872611, 28.00507846, 28.11378636, 26.4150142 , 29.83634362,
       30.26657044, 29.92841184, 30.45226551, 30.07869334, 28.08739831,
       27.96225169, 28.68299951, 28.02940772, 29.87737045, 30.33607771,
       29.38978641, 29.1847552 , 30.60555213, 27.82395136, 28.68

In [29]:
def spectral_flux(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512, p: float = 2):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    
    diff_spectrum = np.diff(spectrum, axis=0)
    
    fluxes = np.sum(np.abs(diff_spectrum) ** p, axis=1) ** (1/p)
    
    return np.concatenate(([0], fluxes))

In [30]:
flux_descriptor = spectral_flux(signal)

In [31]:
flux_descriptor

array([    0.        , 21732.31123912, 15852.61290969, 18829.40315661,
       16877.71721046, 20953.35044088, 18779.24899534, 23844.90448087,
       18673.32941445, 13914.19905423, 19905.43666917, 20217.62929151,
       16572.98637306, 17868.23705376, 17141.51929375, 17473.31928204,
       21300.45978121, 15648.34220368, 16717.77683616, 20316.53917254,
       19883.28809973, 16124.99175044, 16190.94292339, 19392.83366651,
       15996.46190537, 17402.65878747, 16090.56700329, 17906.98598415,
       14600.92856728, 16445.1193592 , 18668.70186261, 18712.54260315,
       20387.74255754, 17667.72243851, 18597.63796556, 20884.36320225,
       21829.58557868, 19414.26907607, 24286.38911941, 17711.81896365,
       20853.39460079, 24375.56335141, 18691.1915328 , 16376.06439068,
       22547.55957995, 22957.82616211, 18955.78906678, 20046.45607023,
       22538.34405298, 16550.70899842, 19549.81852171, 20964.60880269,
       20454.66513478, 20737.91379366, 22066.5476031 , 17355.43093559,
      

In [32]:
def spectral_slope(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])
    frequencies = np.fft.fftfreq(frame_size, d=1/sr)[:frame_size//2]
        
    slopes = np.sum((frequencies - np.mean(frequencies))*(spectrum - np.mean(spectrum)), axis=1) / np.sum((frequencies - np.mean(frequencies)) ** 2)
    
    return slopes

In [33]:
slope_descriptor = spectral_slope(signal)

In [34]:
slope_descriptor

array([0.02647248, 0.0293919 , 0.02951269, 0.03105072, 0.02830802,
       0.02818799, 0.03051775, 0.02733132, 0.03321888, 0.0322235 ,
       0.02868758, 0.02721764, 0.02873948, 0.0270186 , 0.02949787,
       0.03062994, 0.02818494, 0.02998372, 0.02958636, 0.02497578,
       0.02642795, 0.02877355, 0.0284207 , 0.03297892, 0.03178283,
       0.02834353, 0.03013634, 0.0278802 , 0.02497643, 0.02662189,
       0.02735836, 0.02625468, 0.02961742, 0.03348756, 0.0319024 ,
       0.03225727, 0.03210352, 0.02810629, 0.02801957, 0.0301298 ,
       0.02743662, 0.0330226 , 0.0349736 , 0.03262549, 0.03153334,
       0.02940804, 0.02784657, 0.0297191 , 0.03227142, 0.03054491,
       0.03259103, 0.03321111, 0.03176479, 0.03138432, 0.03272161,
       0.03012304, 0.02897748, 0.03044437, 0.02754989, 0.03075892,
       0.03470677, 0.03159208, 0.02775141, 0.03117991, 0.02967683,
       0.02771221, 0.03261457, 0.02822065, 0.02851762, 0.02950599,
       0.0306917 , 0.02781687, 0.03005683, 0.02788063, 0.02703

In [35]:
def spectral_mean(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])

    means = np.mean(spectrum, axis=1)
    
    return means

In [36]:
mean_descriptor = spectral_mean(signal)

In [37]:
mean_descriptor

array([614.51362217, 695.09729717, 680.51375847, 717.60206065,
       735.10599996, 676.29002647, 706.08901411, 753.17223278,
       690.33019263, 681.87130264, 655.82765025, 644.77488143,
       633.76241097, 686.83513823, 680.68354877, 692.36638062,
       696.71771096, 634.23782541, 648.74442809, 656.10316845,
       666.39613809, 675.72400208, 656.34794869, 659.63288627,
       635.99359844, 636.34176772, 644.37946361, 615.95432178,
       611.65597937, 673.012511  , 663.3245627 , 676.04802497,
       735.71602246, 709.61362403, 728.21632225, 747.14647849,
       697.04715275, 700.78421268, 702.16247998, 673.34584483,
       691.05541795, 744.25932294, 699.35315273, 705.02408528,
       720.12533971, 680.52651938, 680.91592336, 764.72975295,
       726.86932733, 737.22486967, 797.8299652 , 743.17919603,
       741.97411501, 790.45968789, 698.05797432, 689.96063262,
       696.12249548, 685.29745994, 697.56747236, 742.18155039,
       746.33885698, 725.37679912, 739.39801   , 698.51

In [38]:
def spectral_std(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])

    stds = np.std(spectrum, axis=1)
    
    return stds

In [39]:
std_descriptor = spectral_std(signal)

In [40]:
std_descriptor

array([1185.58094287, 1338.47635713, 1311.74605556, 1415.6443587 ,
       1464.89805588, 1269.09262284, 1358.31786092, 1356.16087529,
       1404.33308548, 1425.69513926, 1361.51336066, 1291.9612537 ,
       1248.47400218, 1262.15379081, 1268.46813882, 1260.81379856,
       1278.78476559, 1244.92097738, 1241.54116615, 1274.18095103,
       1252.53615812, 1241.97589035, 1226.54904624, 1242.55708821,
       1204.13836536, 1231.07421438, 1309.17284822, 1208.62833394,
       1187.20622124, 1239.49633139, 1180.60156587, 1236.13565713,
       1365.56072401, 1322.21135933, 1382.10679987, 1433.1968131 ,
       1347.73806529, 1372.17265698, 1341.83634325, 1224.69048363,
       1306.46204232, 1423.68147224, 1369.71923689, 1393.66466351,
       1463.91106052, 1305.16042699, 1377.56991172, 1457.85284693,
       1332.16208444, 1351.55392976, 1419.96315873, 1283.15375178,
       1296.59147155, 1399.66287057, 1287.03895732, 1337.80384516,
       1364.3419069 , 1299.28125365, 1378.39338063, 1587.99052

In [41]:
window_size = 256
moving_avg = np.convolve(signal, np.ones(window_size)/window_size, mode='valid')

new_size = 125
block_size = moving_avg.shape[0] // new_size
moving_avg = np.array([moving_avg[i*block_size:(i+1)*block_size].mean() for i in range(new_size)])

In [42]:
moving_avg

array([10.02723605, 10.08602152,  9.98994789, 10.06124822, 10.1849169 ,
       10.0042141 , 10.03641957, 10.0291766 , 10.09078822, 10.00574966,
       10.16611629, 10.08520562, 10.10334971, 10.06301481, 10.06344798,
       10.03970533, 10.00177212, 10.07353081, 10.09525262, 10.13412352,
       10.09879029, 10.12986802, 10.1474065 , 10.12791108, 10.0780613 ,
       10.03753111, 10.10233438, 10.16621392, 10.00034672, 10.10215481,
       10.15966588, 10.13105443, 10.10557779, 10.18225286, 10.21267097,
       10.11995986, 10.20306582, 10.13729092, 10.10508881, 10.13823517,
       10.13438617, 10.12077871, 10.09343746, 10.1201883 , 10.07996106,
       10.06069818, 10.00969361, 10.10673588, 10.10645545, 10.11335662,
       10.17354975, 10.22219413, 10.11855818, 10.14221236, 10.16175485,
       10.21763208, 10.23076793, 10.20986561, 10.20086095, 10.17014846,
       10.17412738, 10.13228072, 10.17038212, 10.22749671, 10.21182339,
       10.16679234, 10.17445804, 10.17899738, 10.17041909, 10.13

In [43]:
alpha = 2 / (window_size + 1)

ema = np.zeros(len(signal))
ema[0] = signal[0]
ema[1:] = alpha * signal[1:] + (1 - alpha) * np.cumsum(alpha * signal[:-1])

ema = np.array([ema[i*block_size:(i+1)*block_size].mean() for i in range(new_size)])

In [44]:
ema

array([  20.64203016,   61.23778184,  102.00541751,  142.49228681,
        183.30700005,  224.44756177,  264.7941645 ,  305.37967845,
        346.03319899,  386.86891436,  427.50090417,  468.57403733,
        509.45307509,  550.15891644,  590.90410923,  631.55780635,
        672.25374493,  712.70149278,  753.59488707,  794.39247737,
        835.36333634,  876.22966737,  917.16028857,  958.3217551 ,
        999.27262133, 1039.96620679, 1080.64457109, 1121.61377186,
       1162.41686546, 1203.02911424, 1244.03304122, 1285.12808064,
       1326.09860672, 1367.06356812, 1408.28660292, 1449.49372456,
       1490.45115999, 1531.77424447, 1572.81504228, 1613.73239485,
       1654.73858759, 1695.68427166, 1736.56368087, 1777.34457165,
       1818.32558179, 1859.17191088, 1899.74518428, 1940.46648704,
       1981.23837373, 2022.1571795 , 2063.07975711, 2104.34630276,
       2145.59883129, 2186.65424749, 2227.6891801 , 2268.82715356,
       2310.19150316, 2351.47998606, 2392.80111857, 2434.06914

In [45]:
window_size = 256
moving_std = np.array([np.std(signal[i:i+window_size]) for i in range(len(signal) - window_size + 1)])

new_size = 125
block_size = moving_std.shape[0] // new_size
moving_std = np.array([moving_std[i*block_size:(i+1)*block_size].mean() for i in range(new_size)])

In [46]:
moving_std

array([27.600805, 25.860418, 23.33771 , 33.265358, 33.885956, 26.960482,
       33.53654 , 27.368687, 25.519947, 34.690987, 37.76826 , 22.371035,
       26.857939, 30.672167, 26.057371, 29.354637, 28.236351, 26.77942 ,
       28.178818, 26.456804, 23.65747 , 30.868942, 31.006727, 22.28868 ,
       23.611715, 33.299294, 24.576872, 24.737206, 31.648645, 23.717947,
       21.017828, 31.926132, 26.891254, 31.372425, 32.217415, 26.445677,
       33.78621 , 32.726986, 25.214895, 30.483265, 28.401539, 24.462334,
       33.466194, 37.6448  , 22.63209 , 32.145493, 35.666946, 25.03563 ,
       30.904644, 35.736744, 26.077234, 30.70466 , 32.35882 , 26.57483 ,
       33.053562, 31.135649, 22.831644, 31.139214, 34.1625  , 26.884687,
       41.490597, 33.750065, 25.18127 , 30.437616, 36.505077, 22.666082,
       32.00243 , 34.00811 , 23.482262, 36.801945, 36.10325 , 25.477636,
       28.835653, 28.270535, 23.591309, 29.101585, 24.681253, 24.474562,
       25.950064, 29.75404 , 25.324373, 29.006516, 

In [47]:
def lag(arr, lag_value):
    result = np.empty_like(arr)
    result[:lag_value] = np.nan
    result[lag_value:] = arr[:-lag_value]
    return result

In [48]:
max_dif = 10

compressed_signal = np.array([signal[i*block_size:(i+1)*block_size].mean() for i in range(new_size)])

lags = np.zeros((max_dif, len(compressed_signal)))

In [49]:
lags

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [50]:
for i in range(max_dif):
    lags[i] = lag(compressed_signal, i+1)

In [51]:
lags

array([[        nan, 10.04760742, 10.10840988, ..., 10.46300793,
        10.29617023, 10.30119324],
       [        nan,         nan, 10.04760742, ..., 10.32390785,
        10.46300793, 10.29617023],
       [        nan,         nan,         nan, ..., 10.28946495,
        10.32390785, 10.46300793],
       ...,
       [        nan,         nan,         nan, ..., 10.2580595 ,
        10.38141251, 10.27880859],
       [        nan,         nan,         nan, ..., 10.36116695,
        10.2580595 , 10.38141251],
       [        nan,         nan,         nan, ..., 10.32237911,
        10.36116695, 10.2580595 ]])

In [52]:
lag_diffs = np.zeros((max_dif, len(compressed_signal)))

In [53]:
lag_diffs

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [54]:
for i in range(max_dif):
    lag_diffs[i] = compressed_signal - lags[i]

In [55]:
lag_diffs

array([[        nan,  0.06080246, -0.0575695 , ..., -0.16683769,
         0.005023  ,  0.14925098],
       [        nan,         nan,  0.00323296, ..., -0.02773762,
        -0.16181469,  0.15427399],
       [        nan,         nan,         nan, ...,  0.00670528,
        -0.02271461, -0.01256371],
       ...,
       [        nan,         nan,         nan, ...,  0.03811073,
        -0.08021927,  0.17163563],
       [        nan,         nan,         nan, ..., -0.06499672,
         0.04313374,  0.06903172],
       [        nan,         nan,         nan, ..., -0.02620888,
        -0.05997372,  0.19238472]])

In [56]:
def spectral_pow(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:,:frame_size//2])

    pows = np.sum(spectrum**2, axis=1)
    
    return pows

In [57]:
pow_descriptor = spectral_pow(signal)

In [58]:
pow_descriptor

array([1.82602666e+09, 2.32927151e+09, 2.23618733e+09, 2.57945771e+09,
       2.75077852e+09, 2.11759543e+09, 2.39983525e+09, 2.46419531e+09,
       2.50747616e+09, 2.55749643e+09, 2.33864042e+09, 2.13493609e+09,
       2.00739034e+09, 2.11432929e+09, 2.12207771e+09, 2.11867918e+09,
       2.17160299e+09, 1.99893592e+09, 2.00938885e+09, 2.10330467e+09,
       2.06124098e+09, 2.04708561e+09, 1.98166036e+09, 2.02656119e+09,
       1.89894355e+09, 1.96656596e+09, 2.18025824e+09, 1.88434655e+09,
       1.82638761e+09, 2.03704012e+09, 1.87783120e+09, 2.03271403e+09,
       2.46377898e+09, 2.30583744e+09, 2.49909066e+09, 2.67497571e+09,
       2.35752717e+09, 2.43093126e+09, 2.34860229e+09, 2.00013968e+09,
       2.23682627e+09, 2.64272986e+09, 2.42199104e+09, 2.49790480e+09,
       2.72549488e+09, 2.21855753e+09, 2.41801764e+09, 2.77519003e+09,
       2.35826671e+09, 2.42708330e+09, 2.71649590e+09, 2.25157004e+09,
       2.28523523e+09, 2.64589585e+09, 2.19520431e+09, 2.32014316e+09,
      

In [59]:
def band_powers(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]

    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:, :frame_size // 2])
    frequencies = np.fft.fftfreq(frame_size, d=1 / sr)[:frame_size // 2]

    band_edges = np.arange(0, 30000 + 1000, 1000)
    band_indices = [np.logical_and(frequencies >= band_edges[i], frequencies < band_edges[i + 1]) for i in range(len(band_edges) - 1)]

    band_pows = np.zeros((len(band_edges) - 1, frames.shape[0]))

    for i, indices in enumerate(band_indices):
        band_pows[i, :] = np.sum(spectrum[:, indices] ** 2, axis=1)

    return band_pows

In [60]:
band_pows = band_powers(signal)

In [61]:
band_pows

array([[4.24321231e+08, 4.27036159e+08, 4.26788900e+08, ...,
        4.52768351e+08, 4.47922810e+08, 4.50857613e+08],
       [6.11275716e+05, 7.58431282e+05, 8.18510669e+05, ...,
        7.43825437e+05, 6.35939710e+05, 8.12260223e+05],
       [2.22922943e+06, 1.92164785e+06, 1.97741034e+06, ...,
        1.81210604e+06, 1.93095891e+06, 1.74687163e+06],
       ...,
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [62]:
def freq_peaks(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:, :frame_size // 2])
    frequencies = np.fft.fftfreq(frame_size, d=1/sr)[:frame_size//2]

    min_indices = np.argmin(spectrum, axis=1)
    peaks = frequencies[min_indices]
    
    return peaks

In [63]:
peak_freqs = freq_peaks(signal)

In [None]:
def spectral_percentiles(y: np.array, sr: float = 30000, frame_size: int = 2048, hop_size: int = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]

    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:, :frame_size // 2])

    percentiles = np.percentile(spectrum, np.arange(1, 101), axis=1)

    return percentiles

In [None]:
percentile_descriptor = spectral_percentiles(signal)

In [None]:
def spectral_variance(y: np.array, sr: float = 30000, frame_size: int = 2048, hop_size: int = 512):
    frames = np.lib.stride_tricks.sliding_window_view(y, frame_size)[::hop_size]
    
    spectrum = np.abs(np.fft.fft(frames, n=frame_size, axis=-1)[:, :frame_size // 2])
    
    variances = np.var(spectrum, axis=1)
    
    return variances

In [None]:
variance_descriptor = spectral_variance(signal)

In [68]:
import pandas as pd

def make_dataset(make_csv: bool = False):
    pipeline = {
        'centoid': centroid_descriptor,
        'spread': spread_descriptor,
        'skewness': skewness_descriptor,
        'kurtosis': kurtosis_descriptor,
        'entropy': entropy_descriptor,
        'flatness': flatness_descriptor,
        'crest': crest_descriptor,
        'flux': flux_descriptor,
        'slope': slope_descriptor,
        'mean': mean_descriptor,
        'std': std_descriptor,
        'var': variance_descriptor,
        **{f'percentile{i+1}': percentile_descriptor[i] for i in range(100)},
        'moving_avg': moving_avg,
        'moving_std': moving_std,
        'ema': ema,
        **{f'lag_{i+1}': lags[i] for i in range(10)},
        **{f'lag_diff_{i+1}': lag_diffs[i] for i in range(10)},
        'powers': pow_descriptor,
        **{f'band_pow_{i*1000}_{(i+1)*1000}': band_pows[i] for i in range(30)},
        'peak_freqs': peak_freqs
    }

    normalize = lambda arr: arr if arr.max() == arr.min() == 0 else (arr - arr.min()) / (arr.max() - arr.min())
    pipeline = {key: normalize(value) for key, value in pipeline.items()}

    if make_csv:
        dataset = pd.DataFrame(pipeline)
        dataset.to_csv('dataset.csv', index=False)
        return dataset

    return pipeline

In [69]:
dataset = make_dataset(False)

In [70]:
dataset

{'centoid': array([0.57524005, 0.52656162, 0.59298475, 0.58696939, 0.2951217 ,
        0.48952334, 0.58394852, 0.15901697, 0.88426114, 0.83137398,
        0.6159287 , 0.52215109, 0.71429717, 0.34593949, 0.59098702,
        0.64638846, 0.4143491 , 0.83246195, 0.73012231, 0.26803463,
        0.36556594, 0.54478   , 0.58888005, 0.99922576, 0.99785486,
        0.66489171, 0.80137352, 0.70914944, 0.43765479, 0.35933155,
        0.46279727, 0.31515128, 0.40225568, 0.82731669, 0.61999821,
        0.58107619, 0.75775911, 0.3930813 , 0.3806855 , 0.67745834,
        0.36829169, 0.65437249, 1.        , 0.77119796, 0.6187552 ,
        0.58350935, 0.44143289, 0.31678779, 0.65609318, 0.47431947,
        0.43860341, 0.67387236, 0.55883991, 0.36837727, 0.80794447,
        0.61081097, 0.48625373, 0.6577613 , 0.3555263 , 0.47503333,
        0.78511156, 0.604288  , 0.23527821, 0.67078476, 0.58391808,
        0.32090681, 0.73312919, 0.47286798, 0.26582525, 0.43053915,
        0.62862691, 0.29263526, 0.543

In [71]:
dataframe_dataset = make_dataset(True)