# CWRU Bearing Fault Data - Data Feature Extraction (divided by load)

In [198]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import scipy.io
import pickle
import librosa

from scipy.stats import skew, kurtosis, entropy
from scipy.fft import fft
from scipy.signal import stft
from IPython.display import display_html

In [200]:
def extract_time_domain_features(signal):
    signal = np.array(signal)

    zcr = librosa.feature.zero_crossing_rate(signal, frame_length=len(signal), hop_length=len(signal))[0][0]
    rms = np.sqrt(np.mean(signal**2))
    mean = np.mean(signal)
    avg = np.mean(np.abs(signal))

    return {
        'mean': mean,
        'std': np.std(signal),
        'variance': np.var(signal),
        'min': np.min(signal),
        'max': np.max(signal),
        'peak_to_peak': np.ptp(signal),
        'skewness': skew(signal),
        'kurtosis': kurtosis(signal),
        'rms': np.sqrt(np.mean(signal**2)), # root mean square
        'zcr': zcr, # zero crossing rate
        'crest_factor': np.max(np.abs(signal)) / rms,
        'form_factor': rms / avg
    }

In [201]:
def extract_frequency_domain_features(fft_amplitudes):
    fft_amplitudes = np.array(fft_amplitudes)
    freqs = np.linspace(0, SAMPLE_RATE / 2, len(fft_amplitudes))

    norm_fft_amplitudes = fft_amplitudes / (np.sum(fft_amplitudes) + 1e-12)
    power_spectrum = fft_amplitudes**2

    peak_idx = np.argmax(fft_amplitudes)
    peak_freq = freqs[peak_idx]

    cumulative_power = np.cumsum(power_spectrum)
    median_freq = freqs[np.searchsorted(cumulative_power, cumulative_power[-1] / 2)]

    low_band = power_spectrum[freqs < 3000]
    high_band = power_spectrum[freqs >= 3000]
    band_energy_ratio = np.sum(high_band) / (np.sum(low_band) + 1e-12)

    return {
        'mean': np.mean(fft_amplitudes), # mean magnitude
        'std': np.std(fft_amplitudes), # standard deviation magnitude
        'max': np.max(fft_amplitudes), # max magnitude
        'skewness': skew(fft_amplitudes), # skewness magnitude
        'kurtosis': kurtosis(fft_amplitudes), # kurtosis magnitude
        'spec_centroid': np.sum(freqs * norm_fft_amplitudes), # spectral centroid
        'spec_spread': np.sqrt(np.sum(((freqs - np.sum(freqs * norm_fft_amplitudes))**2) * norm_fft_amplitudes)), # spectral_spread
        'spec_flatness': np.exp(np.mean(np.log(fft_amplitudes + 1e-12))) / (np.mean(fft_amplitudes) + 1e-12), # spectral_flatness
        'spec_entropy': entropy(norm_fft_amplitudes), # spectral_entropy
        'total_power': np.sum(power_spectrum),
        'dominant_freq': peak_freq, # dominant frequency
        'median_freq': median_freq, # median frequency
        'ber': band_energy_ratio, # band energy ratio
    }

In [202]:
def extract_time_frequency_domain_features(magnitude, freqs):
    mean_mag = np.mean(magnitude)
    max_mag = np.max(magnitude)
    spec_centroid = np.sum(freqs[:, None] * magnitude) / (np.sum(magnitude) + 1e-12)
    spec_spread = np.sqrt(np.sum(((freqs[:, None] - spec_centroid)**2) * magnitude) / (np.sum(magnitude) + 1e-12))
    total_power = np.sum(magnitude**2)

    return {
        'mean': mean_mag, # mean magnitude
        'max': max_mag, # max_magnitude
        'spec_centroid': spec_centroid, # spectral centroid
        'spec_spread': spec_spread, # spectral spread
        'total_power': total_power
    }

## 48k Drive-End Bearing Fault Data + Normal Baseline Data (48k)

In [None]:
with open('48kdrive-end_normalbaseline_loadsplit_cropped_data.pkl', 'rb') as f:
    data = pickle.load(f)

In [204]:
SAMPLE_RATE = 48000
FRAME_SIZE = 1024
HOP_SIZE = 512

### Feature extraction in time domain

In [205]:
load_feature_tables = {}

for load in data['load'].unique():
    load_df = data[data['load'] == load]

    feature_rows = []
    for _, row in load_df.iterrows():
        features = extract_time_domain_features(row['cropped_signal'])
        features['label'] = row['label']
        feature_rows.append(features)

    features_df = pd.DataFrame(feature_rows)
    load_feature_tables[load] = features_df

#### Mean, Standard deviation, Variance

In [206]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'mean', 'variance', 'std']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,mean,variance,std
Normal,0.012255,0.005173,0.071921
B007,0.003296,0.007124,0.084405
B014,0.002709,0.021903,0.147998
B021,0.002516,0.010058,0.100289
OR007@12,0.00341,0.009902,0.099508
OR007@3,0.002382,0.00743,0.086197
OR014@3,0.003015,0.039618,0.199043
IR007,0.003115,0.014987,0.12242
IR014,0.003726,0.033691,0.18355
IR021,0.012099,0.040213,0.200531

label,mean,variance,std
Normal,0.010755,0.004232,0.065053
B007,0.001522,0.006816,0.082559
B014,0.003193,0.016746,0.129405
B021,0.002885,0.010584,0.10288
OR007@12,0.002853,0.010242,0.101202
OR007@3,0.002472,0.008844,0.094044
OR021@3,0.002633,0.010331,0.101642
OR014@3,0.002885,0.013624,0.116724
IR007,0.002956,0.014725,0.121347
IR014,0.002725,0.024895,0.157782

label,mean,variance,std
Normal,0.010755,0.004232,0.065053
B007,0.002599,0.007413,0.0861
B014,0.003249,0.0437,0.209045
B021,0.002549,0.011448,0.106997
OR007@12,0.003365,0.011505,0.107259
OR007@3,0.002357,0.00842,0.09176
OR021@3,0.002642,0.007432,0.086208
OR014@3,0.002725,0.053873,0.232105
IR007,0.002998,0.015601,0.124902
IR014,0.003119,0.02295,0.151493

label,mean,variance,std
Normal,0.013137,0.004172,0.064587
B007,0.002698,0.008448,0.09191
B014,0.003091,0.025098,0.158422
B021,0.002844,0.012639,0.112422
OR007@12,0.003568,0.012262,0.110732
OR007@3,0.002422,0.009185,0.095838
OR021@3,0.002767,0.011631,0.107847
OR014@3,0.002799,0.08272,0.287611
IR007,0.003009,0.016166,0.127147
IR014,0.00291,0.023016,0.15171


#### Min, Max, Peak to Peak

In [207]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'min', 'max', 'peak_to_peak']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,min,max,peak_to_peak
Normal,-0.286638,0.311254,0.597892
B007,-0.693111,0.569335,1.262446
B014,-1.504555,1.549225,3.05378
B021,-0.596949,0.776034,1.372983
OR007@12,-0.480771,0.48207,0.962841
OR007@3,-0.427627,0.403065,0.830692
OR014@3,-1.186426,1.173594,2.36002
IR007,-0.653801,0.717314,1.371115
IR014,-1.138508,0.972662,2.111169
IR021,-1.135702,1.016791,2.152494

label,min,max,peak_to_peak
Normal,-0.269114,0.251382,0.520495
B007,-0.793658,0.657213,1.450871
B014,-1.471256,1.450546,2.921802
B021,-0.73908,0.855221,1.594301
OR007@12,-0.884784,0.896479,1.781264
OR007@3,-0.413539,0.492342,0.905881
OR021@3,-0.488767,0.457011,0.945779
OR014@3,-0.675893,0.649578,1.325471
IR007,-0.65039,0.594025,1.244416
IR014,-1.338628,1.124376,2.463004

label,min,max,peak_to_peak
Normal,-0.269114,0.251382,0.520495
B007,-0.85847,0.767993,1.626463
B014,-1.586179,1.719782,3.305961
B021,-0.697659,0.810957,1.508616
OR007@12,-0.539122,0.637883,1.177005
OR007@3,-0.409155,0.439117,0.848271
OR021@3,-0.513376,0.41892,0.932296
OR014@3,-1.10862,1.133635,2.242255
IR007,-0.684989,0.710816,1.395805
IR014,-1.092214,1.0633,2.155514

label,min,max,peak_to_peak
Normal,-0.282257,0.265985,0.548241
B007,-0.932053,0.953657,1.885709
B014,-1.561814,1.500495,3.062308
B021,-0.668827,0.707405,1.376232
OR007@12,-0.475448,0.51362,0.989068
OR007@3,-0.413255,0.437006,0.850261
OR021@3,-0.631629,0.591345,1.222974
OR014@3,-1.240517,1.457368,2.697885
IR007,-0.66907,0.649416,1.318486
IR014,-1.104559,1.275441,2.38


#### Skewness, Kurtosis

In [208]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'skewness', 'kurtosis']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,skewness,kurtosis
Normal,-0.036569,-0.21903
B007,0.022704,0.869712
B014,0.027552,8.303194
B021,-0.024162,0.473043
OR007@12,0.004946,0.351325
OR007@3,0.007862,0.240899
OR014@3,-0.00262,3.294376
IR007,-0.151455,1.011153
IR014,0.004744,0.709505
IR021,-0.033539,1.645847

label,skewness,kurtosis
Normal,-0.182274,-0.116492
B007,-0.008605,0.979575
B014,0.052079,9.669677
B021,0.003321,0.933125
OR007@12,0.014747,0.563967
OR007@3,0.013867,0.168229
OR021@3,-0.019498,0.415923
OR014@3,-0.018879,0.801965
IR007,-0.1744,0.906075
IR014,0.029049,1.253416

label,skewness,kurtosis
Normal,-0.182274,-0.116492
B007,-0.041771,0.509472
B014,0.020107,5.125135
B021,0.012167,0.443667
OR007@12,-0.010792,0.034553
OR007@3,0.021757,0.127913
OR021@3,-0.041641,0.32611
OR014@3,0.009026,1.888559
IR007,-0.163378,0.869467
IR014,0.019568,1.439771

label,skewness,kurtosis
Normal,-0.119581,-0.043921
B007,-0.024623,2.910631
B014,0.01711,6.130103
B021,-0.018004,0.253158
OR007@12,0.000746,0.056007
OR007@3,0.015847,0.073915
OR021@3,-0.016439,0.795925
OR014@3,0.026612,1.754866
IR007,-0.095755,0.693615
IR014,0.025818,1.754562


#### RMS (root mean square)

In [209]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'rms']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:100px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,rms
Normal,0.072957
B007,0.084469
B014,0.148023
B021,0.100321
OR007@12,0.099567
OR007@3,0.08623
OR014@3,0.199066
IR007,0.12246
IR014,0.183588
IR021,0.200896

label,rms
Normal,0.065936
B007,0.082573
B014,0.129444
B021,0.102921
OR007@12,0.101242
OR007@3,0.094076
OR021@3,0.101676
OR014@3,0.11676
IR007,0.121383
IR014,0.157806

label,rms
Normal,0.065936
B007,0.08614
B014,0.20907
B021,0.107028
OR007@12,0.107312
OR007@3,0.09179
OR021@3,0.086248
OR014@3,0.232121
IR007,0.124938
IR014,0.151526

label,rms
Normal,0.06591
B007,0.09195
B014,0.158452
B021,0.112458
OR007@12,0.110789
OR007@3,0.095869
OR021@3,0.107883
OR014@3,0.287625
IR007,0.127182
IR014,0.151738


#### Zero crossing rate

In [210]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'zcr']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:100px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,zcr
Normal,0.083322
B007,0.22457
B014,0.222207
B021,0.202459
OR007@12,0.191797
OR007@3,0.222862
OR014@3,0.246292
IR007,0.212284
IR014,0.235514
IR021,0.249476

label,zcr
Normal,0.085485
B007,0.198795
B014,0.223468
B021,0.186168
OR007@12,0.187486
OR007@3,0.198761
OR021@3,0.230109
OR014@3,0.230109
IR007,0.213245
IR014,0.217233

label,zcr
Normal,0.085485
B007,0.183996
B014,0.237844
B021,0.187569
OR007@12,0.175141
OR007@3,0.202816
OR021@3,0.232869
OR014@3,0.251731
IR007,0.210418
IR014,0.20066

label,zcr
Normal,0.102175
B007,0.19256
B014,0.224761
B021,0.181765
OR007@12,0.18412
OR007@3,0.202749
OR021@3,0.226776
OR014@3,0.241707
IR007,0.210236
IR014,0.195105


### Feature extraction in frequency domain

In [211]:
load_fft_tables = {}
load_fft_feature_tables = {}

for load in data['load'].unique():
    load_df = data[data['load'] == load]

    fft_rows = []
    feature_rows = []

    for _, row in load_df.iterrows():
        signal = np.array(row['cropped_signal'])

        fft_result = np.fft.rfft(signal)
        fft_amplitudes = np.abs(fft_result)

        fft_dict = {f'fft_{i}': amp for i, amp in enumerate(fft_amplitudes)}
        fft_dict['label'] = row['label']
        fft_rows.append(fft_dict)

        features = extract_frequency_domain_features(fft_amplitudes)
        features['label'] = row['label']
        feature_rows.append(features)

    fft_df = pd.DataFrame(fft_rows)
    features_df = pd.DataFrame(feature_rows)

    load_fft_tables[load] = fft_df
    load_fft_feature_tables[load] = features_df

#### Mean magnitude, Standard deviation magnitude, Max magnitude

In [212]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'mean', 'std', 'max']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:2px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,mean,std,max
Normal,5.587756,25.077865,2847.158654
B007,14.000226,25.805048,1430.498791
B014,25.82083,44.462893,1491.843175
B021,14.485033,31.69945,1246.410013
OR007@12,12.993907,32.066976,1355.260562
OR007@3,12.59745,27.181685,879.101811
OR014@3,22.559158,65.359334,3664.400126
IR007,17.276563,38.877885,1507.910033
IR014,22.807117,59.55526,4636.998313
IR021,25.818883,64.953944,2878.160274

label,mean,std,max
Normal,5.640582,22.506081,1720.435455
B007,13.707609,25.1949,1677.506951
B014,21.348834,39.578642,2209.657064
B021,13.904802,32.943995,2860.484704
OR007@12,11.340266,33.296938,2526.481971
OR007@3,10.975947,30.785596,1985.481384
OR021@3,14.739568,32.101642,1248.734019
OR014@3,17.411798,36.635618,1276.774638
IR007,16.16265,38.948183,2112.096918
IR014,21.587122,50.384007,3460.798098

label,mean,std,max
Normal,5.640582,22.506081,1720.435455
B007,12.76681,27.070241,2760.260276
B014,32.479641,64.949986,1405.188889
B021,14.539456,34.22041,3582.192397
OR007@12,11.6768,35.412036,3003.481241
OR007@3,10.569414,30.08657,1698.900425
OR021@3,12.822666,27.086113,812.33231
OR014@3,22.797053,77.330277,4570.334538
IR007,16.496704,40.145998,2249.943816
IR014,20.88937,48.313031,3975.616618

label,mean,std,max
Normal,5.657882,22.644449,2432.841204
B007,14.69634,28.366898,1512.354159
B014,26.395738,48.298495,1583.863723
B021,14.741051,36.181389,3079.551851
OR007@12,11.955704,36.59331,2719.275844
OR007@3,11.356692,31.309742,1314.182524
OR021@3,15.354501,34.190312,1110.988191
OR014@3,29.445598,95.458041,3960.840848
IR007,17.243836,40.678568,2650.482008
IR014,22.263582,47.775158,3502.272997


#### Skewness magnitude, Kurtosis magnitude

In [213]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'skewness', 'kurtosis']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,skewness,kurtosis
Normal,55.84772,4948.632543
B007,9.533126,261.00862
B014,5.283822,63.788199
B021,9.550278,179.434736
OR007@12,12.508942,288.464694
OR007@3,8.715504,135.228527
OR014@3,15.957681,462.731979
IR007,12.755724,292.278754
IR014,26.16732,1403.010742
IR021,8.899599,148.637371

label,skewness,kurtosis
Normal,38.745655,2300.692362
B007,15.841604,626.269515
B014,8.589272,250.582016
B021,23.395453,1428.414454
OR007@12,22.933705,1074.793183
OR007@3,17.382076,611.311526
OR021@3,9.128943,159.132203
OR014@3,9.531786,161.89328
IR007,14.49504,419.648657
IR014,16.724665,656.11064

label,skewness,kurtosis
Normal,38.745655,2300.692362
B007,27.308912,2037.545627
B014,4.646485,31.149726
B021,26.82678,2145.958718
OR007@12,25.784696,1373.171168
OR007@3,16.404759,481.582566
OR021@3,8.178528,120.858596
OR014@3,20.342681,753.419819
IR007,14.061629,390.445421
IR014,19.370829,1000.075445

label,skewness,kurtosis
Normal,44.420013,3438.282859
B007,11.482946,340.329019
B014,5.941752,83.898515
B021,22.514466,1307.446586
OR007@12,24.556977,1244.640748
OR007@3,14.700449,367.426949
OR021@3,9.067323,152.91348
OR014@3,14.048746,313.174701
IR007,14.95889,510.014899
IR014,15.199645,652.060701


#### Spectral centroid, Spectral spread

In [214]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'spec_centroid', 'spec_spread']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,spec_centroid,spec_spread
Normal,4786.205791,4558.571856
B007,11318.681093,5043.808002
B014,11390.116546,4490.772684
B021,10559.446021,4486.844478
OR007@12,10502.146436,4788.070183
OR007@3,10935.42438,4728.236287
OR014@3,12058.877638,4723.507206
IR007,10744.044357,5013.79228
IR014,11117.292852,4781.9394
IR021,11713.940921,4674.767447

label,spec_centroid,spec_spread
Normal,4729.226532,4550.085843
B007,10952.955036,5838.097151
B014,11560.053334,4468.933137
B021,10784.114152,4719.790597
OR007@12,10886.995918,4960.931597
OR007@3,10702.957905,4916.626646
OR021@3,11508.310399,4917.05659
OR014@3,11816.135673,5218.362727
IR007,11180.561799,5026.122165
IR014,11456.775672,4720.557727

label,spec_centroid,spec_spread
Normal,4729.226532,4550.085843
B007,11100.752305,5348.664042
B014,11644.481789,3969.675568
B021,10781.243581,4693.783758
OR007@12,10821.73718,5004.95492
OR007@3,10567.992936,5004.229502
OR021@3,11660.728206,5043.646601
OR014@3,12496.057645,4603.957007
IR007,11136.794739,5005.044782
IR014,11321.710786,4678.472872

label,spec_centroid,spec_spread
Normal,4613.822623,4423.366031
B007,11073.485572,5164.597318
B014,11317.338292,4351.290247
B021,10583.782596,4688.832018
OR007@12,10775.406715,5107.016398
OR007@3,10650.980668,4890.290818
OR021@3,11774.509822,5006.006574
OR014@3,12599.580504,4445.761265
IR007,11148.274362,4877.118635
IR014,11303.611342,4715.299604


#### Spectral flatness, Spectral entropy

In [215]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'spec_flatness', 'spec_entropy']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,spec_flatness,spec_entropy
Normal,0.26611,9.691646
B007,0.462029,10.225548
B014,0.437961,10.215167
B021,0.384191,10.04367
OR007@12,0.397802,10.012729
OR007@3,0.411708,10.084155
OR014@3,0.39952,9.922678
IR007,0.438431,10.133464
IR014,0.408803,10.065798
IR021,0.385548,9.925622

label,spec_flatness,spec_entropy
Normal,0.274333,9.719826
B007,0.540997,10.330052
B014,0.429297,10.174352
B021,0.392618,10.046136
OR007@12,0.386207,9.927136
OR007@3,0.37829,9.923805
OR021@3,0.419163,10.082851
OR014@3,0.44368,10.151568
IR007,0.428178,10.076728
IR014,0.404298,10.068764

label,spec_flatness,spec_entropy
Normal,0.274333,9.719826
B007,0.470609,10.224507
B014,0.369011,10.028548
B021,0.396752,10.052444
OR007@12,0.390362,9.933807
OR007@3,0.387298,9.928548
OR021@3,0.421711,10.097382
OR014@3,0.359436,9.776722
IR007,0.416141,10.058019
IR014,0.394557,10.071328

label,spec_flatness,spec_entropy
Normal,0.257226,9.691481
B007,0.448409,10.19492
B014,0.416286,10.15183
B021,0.386882,10.017236
OR007@12,0.388471,9.919268
OR007@3,0.377564,9.924136
OR021@3,0.417621,10.065153
OR014@3,0.34367,9.750157
IR007,0.411009,10.072515
IR014,0.403886,10.10514


#### Total power, Median frequency

In [216]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'total_power', 'median_freq']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,total_power,median_freq
Normal,39811320.0,4144.325794
B007,51980740.0,12897.791338
B014,159436700.0,11979.704185
B021,73255610.0,11012.668303
OR007@12,72197870.0,10679.976123
OR007@3,54129720.0,11920.408569
OR014@3,288322700.0,12941.964582
IR007,109157500.0,11186.177622
IR014,245276300.0,12866.75068
IR021,294647400.0,13091.596471

label,total_power,median_freq
Normal,32466740.0,2772.965444
B007,49615100.0,10746.434967
B014,121959400.0,11787.49088
B021,77114120.0,10934.270744
OR007@12,74619580.0,11064.80069
OR007@3,64423540.0,11387.941898
OR021@3,75251780.0,12069.642502
OR014@3,99228760.0,12651.057903
IR007,107241000.0,11381.176627
IR014,181201500.0,12786.363335

label,total_power,median_freq
Normal,32466740.0,2772.965444
B007,54024160.0,5760.031837
B014,318035200.0,12162.366519
B021,83373110.0,10930.291172
OR007@12,83851220.0,4432.048816
OR007@3,61329080.0,11375.605227
OR021@3,54162200.0,12523.313657
OR014@3,391989000.0,12993.699012
IR007,113612600.0,11535.584002
IR014,167087000.0,12372.487895

label,total_power,median_freq
Normal,32855300.0,2823.506003
B007,61555180.0,12138.091132
B014,182704900.0,11773.164423
B021,92055150.0,10680.37408
OR007@12,89378490.0,9138.688068
OR007@3,66899230.0,11367.646083
OR021@3,84718350.0,13131.790144
OR014@3,601840400.0,13010.413212
IR007,117728900.0,11528.420773
IR014,167546400.0,12133.315646


#### Dominant frequency, Band energy ratio

In [217]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'dominant_freq', 'ber']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,dominant_freq,ber
Normal,4144.325794,2.08137
B007,5750.082908,4.824897
B014,5746.501293,13.430146
B021,1439.411023,5.921656
OR007@12,2035.550839,4.477783
OR007@3,4309.478013,7.965912
OR014@3,12976.984811,22.92048
IR007,16491.344432,12.128873
IR014,12866.75068,7.743924
IR021,13737.878888,14.245164

label,dominant_freq,ber
Normal,4263.314983,0.947463
B007,4384.691915,3.453723
B014,11383.962327,12.131315
B021,1439.808981,2.452153
OR007@12,4370.365457,7.910524
OR007@3,4246.600783,6.30234
OR021@3,11596.471447,12.544407
OR014@3,12887.842409,9.741415
IR007,11381.176627,16.0674
IR014,12949.127811,5.65174

label,dominant_freq,ber
Normal,4263.314983,0.947463
B007,4338.926842,4.054642
B014,1439.808981,35.366466
B021,1439.808981,2.865825
OR007@12,4315.049413,11.802294
OR007@3,4186.509253,7.235372
OR021@3,13988.989852,10.604354
OR014@3,12876.301652,58.905374
IR007,4154.274723,14.756738
IR014,2080.917955,3.5723

label,dominant_freq,ber
Normal,8407.640777,0.960977
B007,4288.784241,4.870979
B014,4005.040791,21.148669
B021,1440.206938,4.622018
OR007@12,4252.57014,14.417293
OR007@3,4290.774027,9.318925
OR021@3,4103.734165,18.148044
OR014@3,13098.361743,90.004415
IR007,4100.550507,15.147193
IR014,2054.652782,4.146506


### Feature extraction in time-frequency domain

In [218]:
load_stft_tables = {}
load_stft_feature_tables = {}

for load in sorted(data['load'].unique()):
    load_df = data[data['load'] == load]

    stft_rows = []
    feature_rows = []

    for _, row in load_df.iterrows():
        signal = np.array(row['cropped_signal'])

        S = librosa.stft(signal, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, win_length=FRAME_SIZE, window='hann', center=True)
        magnitude = np.abs(S)

        freqs = np.linspace(0, SAMPLE_RATE / 2, magnitude.shape[0])

        stft_dict = {f'stft_{i}': mag for i, mag in enumerate(magnitude.flatten())}
        stft_dict['label'] = row['label']
        stft_rows.append(stft_dict)

        features = extract_time_frequency_domain_features(magnitude, freqs)
        features['label'] = row['label']
        feature_rows.append(features)

    stft_df = pd.DataFrame(stft_rows)
    features_df = pd.DataFrame(feature_rows)

    load_stft_tables[load] = stft_df
    load_stft_feature_tables[load] = features_df


#### Mean magnitude for all frames, Max magnitude for all frames

In [219]:
tables = []

for load in sorted(load_stft_feature_tables.keys()):
    df = load_stft_feature_tables[load][['label', 'mean', 'max']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)


label,mean,max
Normal,0.533502,16.965659
B007,0.998792,19.476653
B014,1.552758,44.715976
B021,1.132916,19.98539
OR007@12,1.181489,17.356743
OR007@3,1.01816,16.269384
OR014@3,1.950357,43.516096
IR007,1.561926,19.09654
IR014,1.91784,42.580971
IR021,1.963516,48.446957

label,mean,max
Normal,0.521393,16.047183
B007,0.942156,30.943693
B014,1.313326,46.81304
B021,1.047394,23.302718
OR007@12,1.056348,32.021676
OR007@3,1.015173,18.297382
OR021@3,1.192964,16.556435
OR014@3,1.43552,19.599347
IR007,1.492887,16.940078
IR014,1.726855,38.066993

label,mean,max
Normal,0.521393,16.047183
B007,0.977098,19.226392
B014,1.948981,60.391063
B021,1.110493,18.140183
OR007@12,1.087044,34.966117
OR007@3,1.018332,15.337364
OR021@3,1.022767,14.647826
OR014@3,1.910111,63.027406
IR007,1.537527,20.627155
IR014,1.686649,29.321903

label,mean,max
Normal,0.545063,12.476696
B007,1.078029,19.20504
B014,1.629354,45.748442
B021,1.149102,21.573632
OR007@12,1.168329,31.492637
OR007@3,1.073081,18.554515
OR021@3,1.147784,26.200322
OR014@3,2.332189,81.396723
IR007,1.5484,21.934408
IR014,1.737789,27.45863


#### Spectral centroid for all frames, Spectral spread for all frames

In [220]:
tables = []

for load in sorted(load_stft_feature_tables.keys()):
    df = load_stft_feature_tables[load][['label', 'spec_centroid', 'spec_spread']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)


label,spec_centroid,spec_spread
Normal,4219.490662,4021.127323
B007,10079.166272,5613.62742
B014,10601.770771,4986.854959
B021,9315.429516,4821.564089
OR007@12,9236.851512,5115.439148
OR007@3,10017.620286,5136.682492
OR014@3,11305.314596,4942.818666
IR007,9894.611208,5212.324311
IR014,10241.007015,5207.394045
IR021,10767.417547,5108.149848

label,spec_centroid,spec_spread
Normal,4185.313277,4126.543326
B007,9719.795029,5900.355423
B014,10730.724438,4950.265262
B021,9380.885368,5278.810449
OR007@12,9755.913908,5293.272071
OR007@3,9674.686275,5220.205371
OR021@3,10442.221398,5205.413137
OR014@3,11013.833958,5435.066516
IR007,10252.306849,5233.392935
IR014,10204.752229,5285.604036

label,spec_centroid,spec_spread
Normal,4185.313277,4126.543326
B007,9599.51254,5816.523162
B014,11112.387807,4428.159066
B021,9436.067377,5276.773314
OR007@12,9656.565628,5298.06859
OR007@3,9684.916559,5212.046427
OR021@3,10464.503003,5397.655609
OR014@3,11764.669862,4773.943107
IR007,10247.399798,5288.485557
IR014,9981.441295,5349.256678

label,spec_centroid,spec_spread
Normal,4170.761128,4020.130891
B007,9710.642866,5641.261662
B014,10593.934628,4760.99827
B021,9219.518575,5157.581536
OR007@12,9681.860409,5314.836217
OR007@3,9653.521503,5162.558003
OR021@3,10570.226463,5451.045695
OR014@3,11782.113172,4724.354088
IR007,10211.424948,5219.539758
IR014,9878.191947,5342.091997


#### Total power for all frames

In [221]:
tables = []

for load in sorted(load_stft_feature_tables.keys()):
    df = load_stft_feature_tables[load][['label', 'total_power']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)


label,total_power
Normal,250865.6
B007,331222.6
B014,1016189.0
B021,467144.7
OR007@12,458906.2
OR007@3,345523.9
OR014@3,1828605.0
IR007,694207.6
IR014,1563656.0
IR021,1876308.0

label,total_power
Normal,205169.9
B007,318778.9
B014,768286.2
B021,490124.1
OR007@12,478383.3
OR007@3,409238.2
OR021@3,481838.8
OR014@3,632645.9
IR007,682911.7
IR014,1153120.0

label,total_power
Normal,205169.9
B007,342515.2
B014,2033745.0
B021,529985.7
OR007@12,533024.6
OR007@3,391483.2
OR021@3,344486.8
OR014@3,2500093.0
IR007,722814.0
IR014,1064766.0

label,total_power
Normal,206628.2
B007,393121.2
B014,1160076.0
B021,586460.4
OR007@12,568341.7
OR007@3,424207.0
OR021@3,535741.8
OR014@3,3841921.0
IR007,749665.5
IR014,1063312.0
