In [3]:
import pandas as pd
import numpy as np

In [4]:
SAMPLE_PATH: str = "./Выборка_Н22_5_1.csv"

In [5]:
df = pd.read_csv(SAMPLE_PATH, sep=';', usecols=[1], header=0)

In [6]:
df

Unnamed: 0,Signals
0,5300411994
1,5300411994
2,-2258525978
3,-1136987869
4,2413023575
...,...
65810,3793774319
65811,5266079194
65812,404554818
65813,-3249599451


In [7]:
df['Signals'] = df['Signals'].str.replace(',', '.').astype('float32')

In [8]:
signal: np.array = df['Signals'].values

In [9]:
mean_value = np.nanmean(signal)
signal = np.nan_to_num(signal, nan=mean_value)

In [10]:
def spectral_centroid(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = []
    num_frames = int(np.ceil(len(y) / hop_size))
    
    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        frequencies = np.fft.fftfreq(frame_size, 1 / sr)[:frame_size // 2]
        
        centroid = np.sum(frequencies * spectrum) / np.sum(spectrum)
        centroids.append(centroid)
    
    return np.array(centroids)

In [11]:
centroid_descriptor = spectral_centroid(signal)

In [12]:
centroid_descriptor

array([8300.40153501, 8285.51096293, 8305.82957987, 8303.98950131,
       8214.71424482, 8274.18107248, 8303.06542726, 8173.08025174,
       8394.93007999, 8378.75206777, 8312.84806089, 8284.16179535,
       8342.93865384, 8230.2592399 , 8305.21848247, 8322.16560306,
       8251.18551807, 8379.08487458, 8347.77951341, 8206.42839964,
       8236.26290875, 8291.08390632, 8304.57396606, 8430.09738016,
       8429.67802624, 8327.82568663, 8369.57502491, 8341.36398101,
       8258.3146529 , 8234.35582992, 8266.00565552, 8220.84122845,
       8247.48618001, 8377.51095823, 8314.09291152, 8302.18679168,
       8356.23352111, 8244.67976777, 8240.88793399, 8331.66977751,
       8237.09670675, 8324.60789091, 8430.33421905, 8360.34442038,
       8313.71267848, 8302.9310876 , 8259.47036166, 8221.34183092,
       8325.13424439, 8269.53025858, 8258.60483294, 8330.5728374 ,
       8295.38478658, 8237.12288492, 8371.58505891, 8311.28256378,
       8273.18090915, 8325.64451584, 8233.19181565, 8269.74862

In [13]:
def spectral_spread(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = spectral_centroid(y, sr, frame_size, hop_size)
    spreads = []
    num_frames = int(np.ceil(len(y) / hop_size))
    
    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        frequencies = np.fft.fftfreq(frame_size, 1 / sr)[:frame_size // 2]
        
        centroid = centroids[i]
        spread = np.sqrt(np.sum(((frequencies - centroid) ** 2) * spectrum) / np.sum(spectrum))
        spreads.append(spread)
    
    return np.array(spreads)

In [14]:
spread_descriptor = spectral_spread(signal)

In [15]:
spread_descriptor

array([2836.75059479, 2693.22775006, 2741.9740911 , 2635.43488788,
       2654.89786474, 2808.8899267 , 2742.92652468, 2796.31000938,
       2758.53375978, 2734.07460372, 2783.02689699, 2831.41807969,
       2803.11348244, 2744.80006785, 2770.20247629, 2784.00931982,
       2744.4419992 , 2786.97526294, 2770.35103429, 2748.09624069,
       2760.53720077, 2758.72423996, 2822.09102549, 2811.51432906,
       2837.14796344, 2825.92318045, 2744.65597486, 2796.67774195,
       2815.72275476, 2756.08577627, 2782.3681175 , 2760.6051056 ,
       2668.98663519, 2695.09009581, 2672.93456923, 2666.78513468,
       2674.92037813, 2658.77044734, 2677.26677222, 2745.7173233 ,
       2748.00964655, 2678.40757353, 2723.56765655, 2674.0422889 ,
       2657.01431162, 2729.26182732, 2668.15105381, 2636.56944267,
       2692.0663933 , 2691.58239719, 2641.33001829, 2695.56396256,
       2709.54860965, 2702.15024658, 2773.27124897, 2757.79798174,
       2756.48061612, 2786.62527117, 2761.68069947, 2626.96557

In [16]:
def spectral_skewness(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = spectral_centroid(y, sr, frame_size, hop_size)
    spreads = spectral_spread(y, sr, frame_size, hop_size)
    skewnesses = []

    num_frames = int(np.ceil(len(y) / hop_size))

    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        frequencies = np.fft.fftfreq(frame_size, 1 / sr)[:frame_size // 2]
        
        centroid = centroids[i]
        spread = spreads[i]
        skewness = np.sum((frequencies - centroid) ** 3 * spectrum) / (spread ** 3 * np.sum(spectrum))
        skewnesses.append(skewness)
    
    return np.array(skewnesses)

In [17]:
skewness_descriptor = spectral_skewness(signal)

In [18]:
skewness_descriptor

array([-0.8042887 , -0.82818403, -0.86205422, -0.92068009, -0.82246863,
       -0.82272153, -0.80668495, -0.74852693, -0.8915158 , -0.89431927,
       -0.84746781, -0.86563705, -0.91258265, -0.77403381, -0.82205136,
       -0.84779629, -0.82173779, -0.96919425, -0.9414606 , -0.79534643,
       -0.8070194 , -0.81633973, -0.73562897, -0.84022148, -0.88747964,
       -0.86524005, -0.96133779, -0.98806611, -0.89946299, -0.86560621,
       -0.87894545, -0.76466166, -0.822953  , -0.91682325, -0.8370136 ,
       -0.88762318, -0.95551645, -0.88628056, -0.82865024, -0.8465651 ,
       -0.8074834 , -0.8478791 , -0.9654725 , -0.92432493, -0.91155975,
       -0.95620229, -0.89197218, -0.77773524, -0.81374246, -0.79044882,
       -0.6552981 , -0.74265625, -0.71601516, -0.65517445, -0.81455885,
       -0.83055642, -0.75320037, -0.84580849, -0.82629679, -0.81122891,
       -0.9193309 , -0.90247744, -0.79601734, -0.83990075, -0.78080963,
       -0.73254239, -0.87602403, -0.85060697, -0.84517194, -0.94

In [19]:
def spectral_kurtosis(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    centroids = spectral_centroid(y, sr, frame_size, hop_size)
    spreads = spectral_spread(y, sr, frame_size, hop_size)
    kurtosises = []

    num_frames = int(np.ceil(len(y) / hop_size))

    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        frequencies = np.fft.fftfreq(frame_size, 1 / sr)[:frame_size // 2]
        
        centroid = centroids[i]
        spread = spreads[i]
        kurtosis = np.sum((frequencies - centroid) ** 4 * spectrum) / (spread ** 4 * np.sum(spectrum))
        kurtosises.append(kurtosis)
    
    return np.array(kurtosises)

In [20]:
kurtosis_descriptor = spectral_kurtosis(signal)

In [21]:
kurtosis_descriptor

array([4.75992486, 5.2005155 , 5.06847249, 5.5043048 , 5.27688485,
       4.77502233, 5.01026191, 4.67316132, 5.13591779, 5.26186705,
       5.00038518, 4.816098  , 5.02442432, 4.902213  , 4.86879699,
       4.83646534, 4.90828249, 5.11244642, 5.13452265, 5.01533389,
       4.96147823, 4.99607055, 4.75314529, 4.93566596, 4.89977813,
       4.88305287, 5.28769189, 5.10788819, 4.93519157, 4.95987439,
       4.88009787, 4.8948751 , 5.23359412, 5.31395589, 5.30312274,
       5.33506931, 5.434421  , 5.38865109, 5.2058523 , 5.01110532,
       4.95570586, 5.25892691, 5.33743894, 5.454377  , 5.42683832,
       5.16780087, 5.34094437, 5.18862443, 5.06336007, 5.03006991,
       5.10114138, 5.09061651, 5.02643299, 4.98772666, 4.96842102,
       5.00044609, 4.92422218, 4.89281457, 4.9186267 , 5.46426332,
       5.68771219, 5.54449161, 5.21622876, 5.15623945, 4.95607557,
       5.07358474, 5.38509901, 5.01931209, 5.32809926, 5.41592548,
       5.45859095, 5.32990247, 5.23110621, 4.82513548, 5.06874

In [22]:
def spectral_entropy(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    entropies = []

    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])

        entropy = -np.sum(spectrum * np.log(spectrum)) / np.log(end - start)
        entropies.append(entropy)

    return np.array(entropies)

In [23]:
entropy_descriptor = spectral_entropy(signal)

In [24]:
entropy_descriptor

array([-599642.42614232, -693051.08668341, -676084.97756011,
       -724473.26173961, -743053.14208711, -666356.05509192,
       -704136.38874439, -749395.49652386, -690295.83955344,
       -684521.86165883, -652060.06232989, -635790.13692467,
       -624585.54505047, -678964.25798025, -671984.85977373,
       -683522.95107221, -689865.30303463, -625549.21402398,
       -640888.13407703, -648438.38825843, -657907.05543112,
       -667953.8532481 , -644781.19129586, -650629.26374468,
       -624211.64841192, -625938.1104295 , -639283.22857268,
       -603392.22685661, -597433.37437157, -662481.54316068,
       -649983.39725204, -666179.11046989, -737730.75989315,
       -708892.95902994, -729978.60823877, -753186.96864164,
       -697895.98292839, -702259.2637462 , -701062.0343693 ,
       -663887.53678416, -686461.46448921, -749250.4847808 ,
       -700096.70325707, -708145.18102013, -727001.75799394,
       -676151.85617172, -681700.29156969, -772391.95369216,
       -724258.80143974,

In [25]:
def spectral_flatness(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    flatnesses = []

    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])

        flatness = np.prod(spectrum ** (1 / (end - start))) / (1 / (end - start) * np.sum(spectrum))
        flatnesses.append(flatness)

    return np.array(flatnesses)

In [26]:
flatness_descriptor = spectral_flatness(signal)

In [27]:
flatness_descriptor

array([0.05260014, 0.04825367, 0.04947662, 0.04585385, 0.04644837,
       0.05136588, 0.0488578 , 0.04993364, 0.04876834, 0.04782306,
       0.05016508, 0.05225807, 0.05095687, 0.04956553, 0.0498241 ,
       0.05014614, 0.04940198, 0.05065   , 0.05015156, 0.05036344,
       0.0501115 , 0.04962877, 0.051022  , 0.05017628, 0.05116517,
       0.05148059, 0.04994178, 0.05186903, 0.05270917, 0.05088616,
       0.05101859, 0.05044602, 0.04666116, 0.04660862, 0.04668533,
       0.04649649, 0.0473855 , 0.04769746, 0.04788675, 0.0494646 ,
       0.04928571, 0.04668886, 0.04796019, 0.04701877, 0.04669541,
       0.04926786, 0.04766221, 0.04546798, 0.04728398, 0.04731379,
       0.04454631, 0.04707491, 0.04689804, 0.04625393, 0.04897536,
       0.04873491, 0.04872799, 0.04995561, 0.04940515, 0.04545831,
       0.04488335, 0.04564689, 0.04697686, 0.04812639, 0.04973736,
       0.04756141, 0.04693364, 0.04938179, 0.04659806, 0.04682232,
       0.04666907, 0.0472455 , 0.04800183, 0.05157116, 0.05045

In [28]:
def spectral_crest(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    crests = []

    for i in range(num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]
        
        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])

        crest = spectrum[np.argmax(spectrum)] / (1 / (end - start) * np.sum(spectrum))
        crests.append(crest)

    return np.array(crests)

In [29]:
crest_descriptor = spectral_crest(signal)

In [30]:
crest_descriptor

array([66.98101509, 59.40577003, 60.64998876, 57.5085496 , 56.0988037 ,
       60.69793826, 58.23550353, 54.54262336, 59.81589088, 60.80194033,
       63.05629647, 64.22922327, 65.29547952, 59.98853988, 60.48390499,
       59.60387774, 58.97233166, 64.95700836, 63.82378812, 63.11935907,
       62.29971658, 61.34009644, 63.19050749, 62.63799585, 64.90536142,
       65.24403373, 63.86472669, 67.20744295, 67.62836685, 61.31100697,
       62.66128348, 61.3047468 , 56.71391707, 58.5725855 , 57.24955476,
       55.899343  , 59.42218825, 59.28412929, 59.190327  , 61.50823351,
       60.1857123 , 55.62505745, 59.02533566, 58.69519912, 57.15676764,
       60.90993983, 60.46783528, 53.93575232, 57.03234707, 56.05144925,
       52.25745222, 56.01015692, 56.22757272, 52.8300284 , 59.67268725,
       60.53314087, 59.85682368, 60.90453101, 60.15738669, 56.17479661,
       55.92450339, 57.36599902, 56.05881544, 59.75474091, 60.67215541,
       58.77957283, 58.3695104 , 61.21110425, 55.64790272, 57.37

In [31]:
def spectral_flux(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512, p: float = 2):
    num_frames = int(np.ceil(len(y) / hop_size))
    fluxes = []

    prev_spectrum = np.abs(np.fft.fft(y[:frame_size])[:frame_size // 2])
    
    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        
        flux = np.sum(np.abs(spectrum - prev_spectrum) ** p) ** (1/p)
        fluxes.append(flux)
        
        prev_spectrum = spectrum
    
    return np.array(fluxes)

In [32]:
flux_descriptor = spectral_flux(signal)

In [33]:
flux_descriptor

array([21732.31123912, 15852.61290969, 18829.40315661, 16877.71721046,
       20953.35044088, 18779.24899534, 23844.90448087, 18673.32941445,
       13914.19905423, 19905.43666917, 20217.62929151, 16572.98637306,
       17868.23705376, 17141.51929375, 17473.31928204, 21300.45978121,
       15648.34220368, 16717.77683616, 20316.53917254, 19883.28809973,
       16124.99175044, 16190.94292339, 19392.83366651, 15996.46190537,
       17402.65878747, 16090.56700329, 17906.98598415, 14600.92856728,
       16445.1193592 , 18668.70186261, 18712.54260315, 20387.74255754,
       17667.72243851, 18597.63796556, 20884.36320225, 21829.58557868,
       19414.26907607, 24286.38911941, 17711.81896365, 20853.39460079,
       24375.56335141, 18691.1915328 , 16376.06439068, 22547.55957995,
       22957.82616211, 18955.78906678, 20046.45607023, 22538.34405298,
       16550.70899842, 19549.81852171, 20964.60880269, 20454.66513478,
       20737.91379366, 22066.5476031 , 17355.43093559, 20082.79552803,
      

In [34]:
def spectral_slope(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    slopes = []

    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        frequencies = np.fft.fftfreq(frame_size, 1 / sr)[:frame_size // 2]
        
        slope = np.sum((frequencies - np.mean(frequencies))*(spectrum - np.mean(spectrum))) / np.sum((frequencies - np.mean(frequencies)) ** 2)
        slopes.append(slope)
    
    return np.array(slopes)

In [35]:
slope_descriptor = spectral_slope(signal)

In [36]:
slope_descriptor

array([0.0293919 , 0.02951269, 0.03105072, 0.02830802, 0.02818799,
       0.03051775, 0.02733132, 0.03321888, 0.0322235 , 0.02868758,
       0.02721764, 0.02873948, 0.0270186 , 0.02949787, 0.03062994,
       0.02818494, 0.02998372, 0.02958636, 0.02497578, 0.02642795,
       0.02877355, 0.0284207 , 0.03297892, 0.03178283, 0.02834353,
       0.03013634, 0.0278802 , 0.02497643, 0.02662189, 0.02735836,
       0.02625468, 0.02961742, 0.03348756, 0.0319024 , 0.03225727,
       0.03210352, 0.02810629, 0.02801957, 0.0301298 , 0.02743662,
       0.0330226 , 0.0349736 , 0.03262549, 0.03153334, 0.02940804,
       0.02784657, 0.0297191 , 0.03227142, 0.03054491, 0.03259103,
       0.03321111, 0.03176479, 0.03138432, 0.03272161, 0.03012304,
       0.02897748, 0.03044437, 0.02754989, 0.03075892, 0.03470677,
       0.03159208, 0.02775141, 0.03117991, 0.02967683, 0.02771221,
       0.03261457, 0.02822065, 0.02851762, 0.02950599, 0.0306917 ,
       0.02781687, 0.03005683, 0.02788063, 0.02703192, 0.02895

In [37]:
def spectral_mean(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    means = []

    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        
        mean = np.mean(spectrum)
        means.append(mean)
    
    return np.array(means)

In [38]:
mean_descriptor = spectral_mean(signal)

In [39]:
mean_descriptor

array([695.09729717, 680.51375847, 717.60206065, 735.10599996,
       676.29002647, 706.08901411, 753.17223278, 690.33019263,
       681.87130264, 655.82765025, 644.77488143, 633.76241097,
       686.83513823, 680.68354877, 692.36638062, 696.71771096,
       634.23782541, 648.74442809, 656.10316845, 666.39613809,
       675.72400208, 656.34794869, 659.63288627, 635.99359844,
       636.34176772, 644.37946361, 615.95432178, 611.65597937,
       673.012511  , 663.3245627 , 676.04802497, 735.71602246,
       709.61362403, 728.21632225, 747.14647849, 697.04715275,
       700.78421268, 702.16247998, 673.34584483, 691.05541795,
       744.25932294, 699.35315273, 705.02408528, 720.12533971,
       680.52651938, 680.91592336, 764.72975295, 726.86932733,
       737.22486967, 797.8299652 , 743.17919603, 741.97411501,
       790.45968789, 698.05797432, 689.96063262, 696.12249548,
       685.29745994, 697.56747236, 742.18155039, 746.33885698,
       725.37679912, 739.39801   , 698.5136062 , 686.64

In [40]:
def spectral_std(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    stds = []

    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break
        
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        
        std = np.std(spectrum)
        stds.append(std)
    
    return np.array(stds)

In [41]:
std_descriptor = spectral_std(signal)

In [42]:
std_descriptor

array([1338.47635713, 1311.74605556, 1415.6443587 , 1464.89805588,
       1269.09262284, 1358.31786092, 1356.16087529, 1404.33308548,
       1425.69513926, 1361.51336066, 1291.9612537 , 1248.47400218,
       1262.15379081, 1268.46813882, 1260.81379856, 1278.78476559,
       1244.92097738, 1241.54116615, 1274.18095103, 1252.53615812,
       1241.97589035, 1226.54904624, 1242.55708821, 1204.13836536,
       1231.07421438, 1309.17284822, 1208.62833394, 1187.20622124,
       1239.49633139, 1180.60156587, 1236.13565713, 1365.56072401,
       1322.21135933, 1382.10679987, 1433.1968131 , 1347.73806529,
       1372.17265698, 1341.83634325, 1224.69048363, 1306.46204232,
       1423.68147224, 1369.71923689, 1393.66466351, 1463.91106052,
       1305.16042699, 1377.56991172, 1457.85284693, 1332.16208444,
       1351.55392976, 1419.96315873, 1283.15375178, 1296.59147155,
       1399.66287057, 1287.03895732, 1337.80384516, 1364.3419069 ,
       1299.28125365, 1378.39338063, 1587.99052785, 1564.23157

In [43]:
window_size = 256
moving_avg = np.convolve(signal, np.ones(window_size)/window_size, mode='valid')

In [44]:
moving_avg

array([10.18003267,  9.81117859,  9.68962797, ..., 10.50497543,
       10.34922612, 10.41847288])

In [45]:
alpha = 2 / (window_size + 1)
ema = np.empty(len(signal))
ema[0] = signal[0]
for i in range(1, len(signal)):
    ema[i] = alpha * signal[i] + (1 - alpha) * ema[i - 1]

In [46]:
ema

array([53.00411987, 53.00411987, 52.41587567, ..., 10.47733093,
       10.14290817, 10.33752418])

In [47]:
moving_std = np.array([np.std(signal[i:i+window_size]) for i in range(len(signal) - window_size + 1)])

In [48]:
moving_std

array([25.848246, 25.908176, 25.777916, ..., 23.21437 , 23.368086,
       23.415167], dtype=float32)

In [67]:
def lag(arr, lag_value):
    result = np.empty_like(arr)
    result[:lag_value] = np.nan
    result[lag_value:] = arr[:-lag_value]
    return result

In [68]:
max_dif = 10
lags = np.zeros((max_dif, len(signal)))

In [69]:
lags

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [70]:
for i in range(max_dif):
    lags[i] = lag(signal, i+1)

In [71]:
lags

array([[         nan,  53.00411987,  53.00411987, ...,  52.6607933 ,
          4.04554796, -32.49599457],
       [         nan,          nan,  53.00411987, ...,   3.79377437,
         52.6607933 ,   4.04554796],
       [         nan,          nan,          nan, ..., -30.71068954,
          3.79377437,  52.6607933 ],
       ...,
       [         nan,          nan,          nan, ...,  66.10780334,
         13.10940742, -50.36049271],
       [         nan,          nan,          nan, ..., -11.43854427,
         66.10780334,  13.10940742],
       [         nan,          nan,          nan, ..., -30.44747162,
        -11.43854427,  66.10780334]])

In [76]:
lag_diffs = np.zeros((max_dif, len(signal)))

In [77]:
lag_diffs

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [78]:
for i in range(max_dif):
    lag_diffs[i] = signal - lags[i]

In [79]:
lag_diffs

array([[         nan,   0.        , -75.58938026, ..., -48.61524534,
        -36.54154253,  67.64706039],
       [         nan,          nan, -75.58938026, ...,   0.2517736 ,
        -85.15678787,  31.10551786],
       [         nan,          nan,          nan, ...,  34.75623751,
        -36.28976893, -17.50972748],
       ...,
       [         nan,          nan,          nan, ..., -62.06225538,
        -45.60540199,  85.51155853],
       [         nan,          nan,          nan, ...,  15.48409224,
        -98.60379791,  22.0416584 ],
       [         nan,          nan,          nan, ...,  34.49301958,
        -21.05745029, -30.95673752]])

In [95]:
def spectral_pow(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    pows = []

    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break

        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        
        spec_pow = np.sum(spectrum**2)
        pows.append(spec_pow)
    
    return np.array(pows)

In [96]:
pow_descriptor = spectral_pow(signal)

In [97]:
pow_descriptor

array([2.32927151e+09, 2.23618733e+09, 2.57945771e+09, 2.75077852e+09,
       2.11759543e+09, 2.39983525e+09, 2.46419531e+09, 2.50747616e+09,
       2.55749643e+09, 2.33864042e+09, 2.13493609e+09, 2.00739034e+09,
       2.11432929e+09, 2.12207771e+09, 2.11867918e+09, 2.17160299e+09,
       1.99893592e+09, 2.00938885e+09, 2.10330467e+09, 2.06124098e+09,
       2.04708561e+09, 1.98166036e+09, 2.02656119e+09, 1.89894355e+09,
       1.96656596e+09, 2.18025824e+09, 1.88434655e+09, 1.82638761e+09,
       2.03704012e+09, 1.87783120e+09, 2.03271403e+09, 2.46377898e+09,
       2.30583744e+09, 2.49909066e+09, 2.67497571e+09, 2.35752717e+09,
       2.43093126e+09, 2.34860229e+09, 2.00013968e+09, 2.23682627e+09,
       2.64272986e+09, 2.42199104e+09, 2.49790480e+09, 2.72549488e+09,
       2.21855753e+09, 2.41801764e+09, 2.77519003e+09, 2.35826671e+09,
       2.42708330e+09, 2.71649590e+09, 2.25157004e+09, 2.28523523e+09,
       2.64589585e+09, 2.19520431e+09, 2.32014316e+09, 2.40231974e+09,
      

In [104]:
def band_powers(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    band_pows = []

    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break

        freqs = np.fft.fftfreq(len(frame), 1/30000)[:frame_size // 2]
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])

        local_band_pows = []
        for f in range(0,30000,1000):
            band_pow = np.sum(spectrum[(freqs >= f) & (freqs <= f+1000)]**2)
            local_band_pows.append(band_pow)
        band_pows.append(local_band_pows)

    return np.array(band_pows)

In [105]:
band_pows = band_powers(signal)

In [106]:
band_pows

array([[4.27036159e+08, 7.58431282e+05, 1.92164785e+06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.26788900e+08, 8.18510669e+05, 1.97741034e+06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.26628640e+08, 6.76467464e+05, 2.29384068e+06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [4.52768351e+08, 7.43825437e+05, 1.81210604e+06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.47922810e+08, 6.35939710e+05, 1.93095891e+06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.50857613e+08, 8.12260223e+05, 1.74687163e+06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [90]:
def freq_peaks(y: np.array, sr: float = 30000, frame_size: float = 2048, hop_size: float = 512):
    num_frames = int(np.ceil(len(y) / hop_size))
    peaks = []

    for i in range(1, num_frames):
        start = i * hop_size
        end = start + frame_size
        frame = y[start:end]

        if len(frame) < frame_size:
            break

        freqs = np.fft.fftfreq(len(frame), 1/30000)[:frame_size // 2]
        spectrum = np.abs(np.fft.fft(frame)[:frame_size // 2])
        
        peak = freqs[np.argmin(spectrum)]
        peaks.append(peak)
    
    return np.array(peaks)

In [109]:
peak_freqs = freq_peaks(signal)

In [110]:
pipeline = {
    'centoid': centroid_descriptor,
    'spread': spread_descriptor,
    'skewness': skewness_descriptor,
    'kurtosis': kurtosis_descriptor,
    'entropy': entropy_descriptor,
    'flatness': flatness_descriptor,
    'crest': crest_descriptor,
    'flux': flux_descriptor,
    'slope': slope_descriptor,
    'mean': mean_descriptor,
    'std': std_descriptor,
    'moving_avg': moving_avg,
    'moving_std': moving_std,
    'ema': ema,
    'lags': lags,
    'lag_diffs': lag_diffs,
    'powers': pow_descriptor,
    'band_powers': band_pows,
    'peak_freqs': peak_freqs
}

In [111]:
pipeline

{'centoid': array([8300.40153501, 8285.51096293, 8305.82957987, 8303.98950131,
        8214.71424482, 8274.18107248, 8303.06542726, 8173.08025174,
        8394.93007999, 8378.75206777, 8312.84806089, 8284.16179535,
        8342.93865384, 8230.2592399 , 8305.21848247, 8322.16560306,
        8251.18551807, 8379.08487458, 8347.77951341, 8206.42839964,
        8236.26290875, 8291.08390632, 8304.57396606, 8430.09738016,
        8429.67802624, 8327.82568663, 8369.57502491, 8341.36398101,
        8258.3146529 , 8234.35582992, 8266.00565552, 8220.84122845,
        8247.48618001, 8377.51095823, 8314.09291152, 8302.18679168,
        8356.23352111, 8244.67976777, 8240.88793399, 8331.66977751,
        8237.09670675, 8324.60789091, 8430.33421905, 8360.34442038,
        8313.71267848, 8302.9310876 , 8259.47036166, 8221.34183092,
        8325.13424439, 8269.53025858, 8258.60483294, 8330.5728374 ,
        8295.38478658, 8237.12288492, 8371.58505891, 8311.28256378,
        8273.18090915, 8325.64451584,