# CWRU Bearing Fault Data - Data Feature Extraction (divided by load)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os
import scipy.io
import pickle
import librosa

from scipy.stats import skew, kurtosis, entropy
from scipy.fft import fft
from scipy.signal import stft
from IPython.display import display_html

In [2]:
def extract_time_domain_features(signal):
    signal = np.array(signal)

    zcr = librosa.feature.zero_crossing_rate(signal, frame_length=len(signal), hop_length=len(signal))[0][0]
    rms = np.sqrt(np.mean(signal**2))
    mean = np.mean(signal)
    avg = np.mean(np.abs(signal))

    return {
        'mean': mean,
        'std': np.std(signal),
        'variance': np.var(signal),
        'min': np.min(signal),
        'max': np.max(signal),
        'peak_to_peak': np.ptp(signal),
        'skewness': skew(signal),
        'kurtosis': kurtosis(signal),
        'rms': np.sqrt(np.mean(signal**2)), # root mean square
        'zcr': zcr, # zero crossing rate
        'crest_factor': np.max(np.abs(signal)) / rms,
        'form_factor': rms / avg
    }

In [3]:
def extract_frequency_domain_features(fft_amplitudes):
    fft_amplitudes = np.array(fft_amplitudes)
    freqs = np.linspace(0, SAMPLE_RATE / 2, len(fft_amplitudes))

    norm_fft_amplitudes = fft_amplitudes / (np.sum(fft_amplitudes) + 1e-12)
    power_spectrum = fft_amplitudes**2

    peak_idx = np.argmax(fft_amplitudes)
    peak_freq = freqs[peak_idx]

    cumulative_power = np.cumsum(power_spectrum)
    median_freq = freqs[np.searchsorted(cumulative_power, cumulative_power[-1] / 2)]

    low_band = power_spectrum[freqs < 3000]
    high_band = power_spectrum[freqs >= 3000]
    band_energy_ratio = np.sum(high_band) / (np.sum(low_band) + 1e-12)

    return {
        'mean': np.mean(fft_amplitudes), # mean magnitude
        'std': np.std(fft_amplitudes), # standard deviation magnitude
        'max': np.max(fft_amplitudes), # max magnitude
        'skewness': skew(fft_amplitudes), # skewness magnitude
        'kurtosis': kurtosis(fft_amplitudes), # kurtosis magnitude
        'spec_centroid': np.sum(freqs * norm_fft_amplitudes), # spectral centroid
        'spec_spread': np.sqrt(np.sum(((freqs - np.sum(freqs * norm_fft_amplitudes))**2) * norm_fft_amplitudes)), # spectral_spread
        'spec_flatness': np.exp(np.mean(np.log(fft_amplitudes + 1e-12))) / (np.mean(fft_amplitudes) + 1e-12), # spectral_flatness
        'spec_entropy': entropy(norm_fft_amplitudes), # spectral_entropy
        'total_power': np.sum(power_spectrum),
        'dominant_freq': peak_freq, # dominant frequency
        'median_freq': median_freq, # median frequency
        'ber': band_energy_ratio, # band energy ratio
    }

In [4]:
def extract_time_frequency_domain_features(magnitude, freqs):
    mean_mag = np.mean(magnitude)
    max_mag = np.max(magnitude)
    spec_centroid = np.sum(freqs[:, None] * magnitude) / (np.sum(magnitude) + 1e-12)
    spec_spread = np.sqrt(np.sum(((freqs[:, None] - spec_centroid)**2) * magnitude) / (np.sum(magnitude) + 1e-12))
    total_power = np.sum(magnitude**2)

    return {
        'mean': mean_mag, # mean magnitude
        'max': max_mag, # max_magnitude
        'spec_centroid': spec_centroid, # spectral centroid
        'spec_spread': spec_spread, # spectral spread
        'total_power': total_power
    }

## 48k Drive-End Bearing Fault Data + Normal Baseline Data (48k)

In [5]:
with open('48kdrive-end_normalbaseline_loadsplit_cropped_data.pkl', 'rb') as f:
    data = pickle.load(f)

In [6]:
SAMPLE_RATE = 48000
FRAME_SIZE = 1024
HOP_SIZE = 512

### Feature extraction in time domain

In [7]:
load_feature_tables = {}

for load in data['load'].unique():
    load_df = data[data['load'] == load]

    feature_rows = []
    for _, row in load_df.iterrows():
        features = extract_time_domain_features(row['cropped_signal'])
        features['label'] = row['label']
        feature_rows.append(features)

    features_df = pd.DataFrame(feature_rows)
    load_feature_tables[load] = features_df

#### Mean, Standard deviation, Variance

In [8]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'mean', 'variance', 'std']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,mean,variance,std
Normal,0.012059,0.005229,0.072313
B007,0.032624,0.022016,0.148378
B014,0.033902,0.012497,0.11179
B021,0.285251,0.116771,0.341718
OR021@12,0.11696,0.387444,0.62245
OR007@12,0.119367,0.046886,0.216533
OR021@3,0.006357,0.171236,0.413807
OR007@3,0.109014,0.162601,0.403238
IR007,0.082329,0.341184,0.58411
IR014,0.037581,0.031744,0.178169

label,mean,variance,std
Normal,0.010809,0.004179,0.064649
B007,0.019218,0.019054,0.138036
B014,0.01088,0.026858,0.163885
B021,0.010511,0.032746,0.180958
OR021@12,0.022294,0.273261,0.522744
OR007@12,0.013344,0.084678,0.290995
OR021@3,0.015722,0.565143,0.75176
OR007@3,0.012416,0.203578,0.451196
IR007,0.022474,0.07847,0.280126
IR014,0.031287,0.037362,0.193292

label,mean,variance,std
Normal,0.010809,0.004179,0.064649
B007,0.016222,0.021025,0.145001
B014,0.012479,0.061478,0.247948
B021,0.011637,0.104229,0.322846
OR021@12,0.017837,0.228713,0.47824
OR007@12,0.011583,0.075116,0.274072
OR021@3,0.011217,0.502064,0.708565
OR007@3,0.00953,0.19908,0.446184
IR007,0.017626,0.07874,0.280606
IR014,0.017933,0.056435,0.23756

label,mean,variance,std
Normal,0.013956,0.004174,0.064607
B007,0.014514,0.020452,0.143011
B014,0.011245,0.030514,0.174683
B021,0.014987,0.083067,0.288214
OR021@12,0.019966,0.189337,0.435129
OR007@12,0.011832,0.059699,0.244334
OR021@3,0.013129,0.449837,0.670699
OR007@3,0.010933,0.193755,0.440176
IR007,0.013764,0.078815,0.280741
IR014,0.032975,0.112671,0.335664


#### Min, Max, Peak to Peak

In [9]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'min', 'max', 'peak_to_peak']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,min,max,peak_to_peak
Normal,-0.286638,0.311254,0.597892
B007,-0.523833,0.679252,1.203085
B014,-0.711378,0.811097,1.522475
B021,-1.125131,1.575851,2.700981
OR021@12,-5.212493,5.788413,11.000907
OR007@12,-1.279021,1.471156,2.750177
OR021@3,-2.971831,2.479377,5.451208
OR007@3,-1.85021,1.992903,3.843113
IR007,-2.772763,3.291508,6.064271
IR014,-1.319191,1.449816,2.769007

label,min,max,peak_to_peak
Normal,-0.251382,0.227182,0.478564
B007,-0.578908,0.663397,1.242305
B014,-2.487738,2.103052,4.59079
B021,-1.271928,1.234377,2.506305
OR021@12,-4.311888,4.790987,9.102875
OR007@12,-2.358397,2.398868,4.757265
OR021@3,-5.399459,4.389512,9.788971
OR007@3,-2.219042,2.125999,4.345041
IR007,-1.500779,1.61969,3.120469
IR014,-0.972774,1.154269,2.127042

label,min,max,peak_to_peak
Normal,-0.251382,0.227182,0.478564
B007,-0.664023,0.679669,1.343692
B014,-3.417954,3.417746,6.8357
B021,-3.417954,3.417746,6.8357
OR021@12,-4.326912,4.512208,8.83912
OR007@12,-1.981429,2.014599,3.996028
OR021@3,-4.710859,4.55728,9.268139
OR007@3,-1.932822,1.949719,3.882541
IR007,-1.60613,1.69646,3.30259
IR014,-2.448495,2.669264,5.117759

label,min,max,peak_to_peak
Normal,-0.282257,0.265985,0.548241
B007,-0.598726,0.629184,1.22791
B014,-1.988522,2.526541,4.515063
B021,-3.417954,3.417746,6.8357
OR021@12,-4.033109,4.291021,8.324131
OR007@12,-2.012304,2.072385,4.084689
OR021@3,-4.591501,4.164987,8.756488
OR007@3,-1.876078,1.94513,3.821208
IR007,-1.59862,1.553767,3.152387
IR014,-2.420533,2.519859,4.940392


#### Skewness, Kurtosis

In [10]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'skewness', 'kurtosis']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,skewness,kurtosis
Normal,-0.04555,-0.185236
B007,-0.01909,0.105033
B014,0.00205,2.448639
B021,0.024113,0.084684
OR021@12,0.132761,21.360128
OR007@12,0.034581,2.555944
OR021@3,-0.262731,5.134962
OR007@3,0.072165,1.196439
IR007,0.005247,3.020826
IR014,-0.091302,8.669292

label,skewness,kurtosis
Normal,-0.189362,-0.112915
B007,-0.012104,0.040244
B014,0.109428,25.271415
B021,0.015148,1.112457
OR021@12,0.170471,18.423676
OR007@12,0.019587,8.404926
OR021@3,0.088792,4.181221
OR007@3,0.059828,1.055609
IR007,-0.06515,4.459857
IR014,-0.018043,0.980092

label,skewness,kurtosis
Normal,-0.189362,-0.112915
B007,-0.005646,0.184793
B014,0.00939,40.026806
B021,0.064263,16.480632
OR021@12,0.167588,20.273863
OR007@12,0.042373,9.731409
OR021@3,0.003018,4.756478
OR007@3,0.051157,1.376386
IR007,-0.10023,4.495266
IR014,0.163512,19.197554

label,skewness,kurtosis
Normal,-0.11895,-0.021216
B007,0.008761,0.071878
B014,0.3437,19.354164
B021,0.054574,16.912832
OR021@12,0.123757,19.649733
OR007@12,0.021031,7.822689
OR021@3,-0.05698,4.097515
OR007@3,0.040012,0.989626
IR007,-0.158681,3.828906
IR014,0.529446,6.964151


#### RMS (root mean square)

In [11]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'rms']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:100px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,rms
Normal,0.073311
B007,0.151923
B014,0.116818
B021,0.445129
OR021@12,0.633343
OR007@12,0.247254
OR021@3,0.413856
OR007@3,0.417714
IR007,0.589883
IR014,0.182089

label,rms
Normal,0.065546
B007,0.139367
B014,0.164245
B021,0.181263
OR021@12,0.523219
OR007@12,0.291301
OR021@3,0.751924
OR007@3,0.451367
IR007,0.281026
IR014,0.195808

label,rms
Normal,0.065546
B007,0.145906
B014,0.248261
B021,0.323055
OR021@12,0.478572
OR007@12,0.274317
OR021@3,0.708654
OR007@3,0.446286
IR007,0.281159
IR014,0.238236

label,rms
Normal,0.066097
B007,0.143746
B014,0.175045
B021,0.288604
OR021@12,0.435586
OR007@12,0.24462
OR021@3,0.670827
OR007@3,0.440312
IR007,0.281078
IR014,0.33728


#### Zero crossing rate

In [12]:
tables = []

for load in sorted(load_feature_tables.keys()):
    df = load_feature_tables[load][['label', 'zcr']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:100px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,zcr
Normal,0.082915
B007,0.060199
B014,0.049523
B021,0.038377
OR021@12,0.039976
OR007@12,0.04631
OR021@3,0.05559
OR007@3,0.058898
IR007,0.063492
IR014,0.05269

label,zcr
Normal,0.085188
B007,0.060435
B014,0.050417
B021,0.0565
OR021@12,0.053129
OR007@12,0.060732
OR021@3,0.05581
OR007@3,0.063554
IR007,0.063445
IR014,0.074356

label,zcr
Normal,0.085188
B007,0.060184
B014,0.054603
B021,0.055998
OR021@12,0.053835
OR007@12,0.057675
OR021@3,0.05501
OR007@3,0.061751
IR007,0.06357
IR014,0.052753

label,zcr
Normal,0.102966
B007,0.059964
B014,0.051577
B021,0.051765
OR021@12,0.053302
OR007@12,0.0565
OR021@3,0.054148
OR007@3,0.061626
IR007,0.061501
IR014,0.131686


### Feature extraction in frequency domain

In [13]:
load_fft_tables = {}
load_fft_feature_tables = {}

for load in data['load'].unique():
    load_df = data[data['load'] == load]

    fft_rows = []
    feature_rows = []

    for _, row in load_df.iterrows():
        signal = np.array(row['cropped_signal'])

        fft_result = np.fft.rfft(signal)
        fft_amplitudes = np.abs(fft_result)

        fft_dict = {f'fft_{i}': amp for i, amp in enumerate(fft_amplitudes)}
        fft_dict['label'] = row['label']
        fft_rows.append(fft_dict)

        features = extract_frequency_domain_features(fft_amplitudes)
        features['label'] = row['label']
        feature_rows.append(features)

    fft_df = pd.DataFrame(fft_rows)
    features_df = pd.DataFrame(feature_rows)

    load_fft_tables[load] = fft_df
    load_fft_feature_tables[load] = features_df

#### Mean magnitude, Standard deviation magnitude, Max magnitude

In [24]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'mean', 'std', 'max']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:0px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,mean,std,max
Normal,4.35404,18.252052,1423.231877
B007,7.96557,38.427174,2081.009391
B014,7.575948,29.771998,2162.571122
B021,15.769366,132.589667,18195.609385
OR021@12,24.701168,160.774718,7460.632944
OR007@12,11.425461,68.394783,7614.152162
OR021@3,29.162845,100.384979,2346.9569
OR007@3,19.418944,107.287747,6953.798103
IR007,35.174226,146.253975,7247.623156
IR014,12.62306,45.22906,2397.219424

label,mean,std,max
Normal,4.232113,16.235313,1007.29691
B007,7.635665,34.701415,1225.876571
B014,10.929176,40.110203,1440.539854
B021,8.905365,44.983369,1786.304369
OR021@12,24.699269,129.936762,4592.213985
OR007@12,15.88531,71.914101,3860.732083
OR021@3,47.063853,184.023778,4881.48847
OR007@3,18.70203,112.495936,5756.19383
IR007,16.656479,69.226555,3706.396791
IR014,17.607869,46.882765,1995.729508

label,mean,std,max
Normal,4.232113,16.235313,1007.29691
B007,7.85869,36.234454,1570.427868
B014,14.75906,61.020247,1476.564867
B021,17.016663,79.850329,1843.736788
OR021@12,22.522495,118.836113,4812.365172
OR007@12,14.095681,67.895147,2381.661441
OR021@3,44.38124,173.410023,4883.213665
OR007@3,19.48712,111.042218,4725.861192
IR007,16.490882,69.21113,3387.0117
IR014,11.80616,59.172626,2679.669388

label,mean,std,max
Normal,4.375276,16.49083,1322.172631
B007,8.015851,35.597531,1210.655497
B014,11.342601,42.823556,1380.495796
B021,14.483621,71.53615,2972.086328
OR021@12,20.735902,108.156913,3664.912161
OR007@12,12.803236,60.513689,2201.713165
OR021@3,44.078113,163.622692,5715.370149
OR007@3,19.442678,109.526592,5562.179076
IR007,17.20129,68.960821,3040.887034
IR014,25.946361,81.644303,2729.238827


#### Skewness magnitude, Kurtosis magnitude

In [15]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'skewness', 'kurtosis']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,skewness,kurtosis
Normal,36.318393,2076.028017
B007,15.161559,437.351517
B014,18.114617,940.427551
B021,86.283393,11191.845879
OR021@12,19.225282,526.509744
OR007@12,51.465482,4971.926219
OR021@3,7.536522,80.384802
OR007@3,21.366264,834.258361
IR007,19.27529,604.475032
IR014,10.254882,285.240881

label,skewness,kurtosis
Normal,32.992517,1640.31029
B007,11.59986,205.817566
B014,7.546874,101.008807
B021,16.511155,446.337658
OR021@12,12.446397,225.553656
OR007@12,16.188111,452.68508
OR021@3,9.959176,144.377259
OR007@3,22.995608,799.903982
IR007,19.474675,630.12224
IR014,10.827178,236.323768

label,skewness,kurtosis
Normal,32.992517,1640.31029
B007,13.103934,291.107445
B014,7.362379,69.699707
B021,8.754709,97.973429
OR021@12,13.44514,267.792336
OR007@12,14.30203,290.670288
OR021@3,10.957589,179.816858
OR007@3,18.073771,454.472601
IR007,19.100119,594.906979
IR014,15.731588,384.938725

label,skewness,kurtosis
Normal,33.748452,1947.895257
B007,11.649184,210.705067
B014,8.340685,112.660477
B021,12.442872,268.884249
OR021@12,13.018702,236.10431
OR007@12,13.906309,284.766208
OR021@3,10.617819,185.248729
OR007@3,19.782765,583.346847
IR007,16.603571,454.494339
IR014,14.543156,321.786641


#### Spectral centroid, Spectral spread

In [16]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'spec_centroid', 'spec_spread']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,spec_centroid,spec_spread
Normal,4655.864957,4459.586913
B007,3175.777514,2477.661405
B014,3275.418309,2989.823842
B021,3122.831877,2954.562569
OR021@12,2894.816735,1926.907514
OR007@12,3884.58975,3873.53727
OR021@3,3198.848564,2616.324033
OR007@3,3522.761692,2596.180313
IR007,3770.064757,2874.897618
IR014,3200.120111,2625.505676

label,spec_centroid,spec_spread
Normal,4579.448403,4521.76046
B007,3111.373682,2514.878781
B014,3045.67844,2759.541969
B021,3267.898912,2918.588929
OR021@12,2943.065501,1996.488131
OR007@12,5236.980327,5655.397901
OR021@3,3167.934038,2647.351109
OR007@3,3548.928313,2627.512059
IR007,3822.590966,2878.087656
IR014,5293.738619,4866.173969

label,spec_centroid,spec_spread
Normal,4579.448403,4521.76046
B007,3053.27961,2443.815544
B014,3246.835,2462.927016
B021,3127.043151,2438.008643
OR021@12,2968.893662,2056.534265
OR007@12,3679.942426,3544.415303
OR021@3,3322.446541,3040.244558
OR007@3,3485.214516,2560.009448
IR007,3759.991057,2899.649344
IR014,3122.022176,2569.32326

label,spec_centroid,spec_spread
Normal,4469.226057,4370.585285
B007,3056.846304,2505.540155
B014,3048.61049,2630.381231
B021,3050.36263,2436.846154
OR021@12,2912.304873,2088.978063
OR007@12,3516.016547,3324.185976
OR021@3,3294.493782,2988.183054
OR007@3,3465.527836,2524.028993
IR007,3810.716941,2842.697346
IR014,8780.261219,7668.684652


#### Spectral flatness, Spectral entropy

In [17]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'spec_flatness', 'spec_entropy']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,spec_flatness,spec_entropy
Normal,0.253348,9.008821
B007,0.088189,8.128475
B014,0.119786,8.460957
B021,0.110667,7.978351
OR021@12,0.05775,7.743569
OR007@12,0.145476,8.317296
OR021@3,0.105807,8.530812
OR007@3,0.097051,8.129905
IR007,0.124383,8.616732
IR014,0.105035,8.495823

label,spec_flatness,spec_entropy
Normal,0.271765,9.078063
B007,0.090096,8.17558
B014,0.114638,8.397135
B021,0.117161,8.168697
OR021@12,0.065556,7.9148
OR007@12,0.191588,8.548789
OR021@3,0.092554,8.420797
OR007@3,0.107335,8.097808
IR007,0.118029,8.619169
IR014,0.26211,9.134757

label,spec_flatness,spec_entropy
Normal,0.271765,9.078063
B007,0.082927,8.164731
B014,0.098129,8.206512
B021,0.090055,8.026309
OR021@12,0.066564,7.968869
OR007@12,0.145651,8.301916
OR021@3,0.14299,8.491236
OR007@3,0.102685,8.089197
IR007,0.122971,8.611466
IR014,0.09675,8.179036

label,spec_flatness,spec_entropy
Normal,0.251461,9.046045
B007,0.092763,8.201837
B014,0.09475,8.391373
B021,0.08778,8.018258
OR021@12,0.068951,7.978634
OR007@12,0.118796,8.27947
OR021@3,0.137319,8.536071
OR007@3,0.093781,8.13476
IR007,0.130279,8.605411
IR014,0.407182,9.217394


#### Total power, Median frequency

In [18]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'total_power', 'median_freq']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:8px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,total_power,median_freq
Normal,11230070.0,4144.729416
B007,49121430.0,2907.631529
B014,30101440.0,2668.338873
B021,568646200.0,0.0
OR021@12,843898900.0,2748.855584
OR007@12,153363500.0,2584.059698
OR021@3,348536300.0,2833.134759
OR007@3,379160000.0,2814.322443
IR007,721702600.0,2760.142974
IR014,70328780.0,2699.19107

label,total_power,median_freq
Normal,8978321.0,2774.440334
B007,40267170.0,2869.254405
B014,55123340.0,2771.430363
B021,67069090.0,2746.598106
OR021@12,557958900.0,2764.657929
OR007@12,172997900.0,2709.725967
OR021@3,1150764000.0,2828.619803
OR007@3,414797700.0,2871.511883
IR007,161699800.0,2757.133003
IR014,79993640.0,3220.668464

label,total_power,median_freq
Normal,8978321.0,2774.440334
B007,43845890.0,2887.314228
B014,125707800.0,2782.717753
B021,212600600.0,2709.725967
OR021@12,466601000.0,2765.410422
OR007@12,153365200.0,2690.161159
OR021@3,1021939000.0,2789.490186
OR007@3,405389300.0,2845.927134
IR007,161456600.0,2801.530068
IR014,116122800.0,2585.564683

label,total_power,median_freq
Normal,9284332.0,2790.242679
B007,42466220.0,2889.571706
B014,62594300.0,2748.855584
B021,169910900.0,2631.466734
OR021@12,386819200.0,2754.875525
OR007@12,122024800.0,2682.636233
OR021@3,915873400.0,2791.747664
OR007@3,394671700.0,2896.34414
IR007,161116900.0,2815.827428
IR014,234077600.0,5996.613783


#### Dominant frequency, Band energy ratio

In [19]:
tables = []

for load in sorted(load_fft_feature_tables.keys()):
    df = load_fft_feature_tables[load][['label', 'dominant_freq', 'ber']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)

label,dominant_freq,ber
Normal,4144.729416,2.086419
B007,0.0,0.739567
B014,0.0,0.294075
B021,0.0,0.03722
OR021@12,0.0,0.040083
OR007@12,0.0,0.163281
OR021@3,2748.855584,0.52771
OR007@3,0.0,0.353216
IR007,2616.416881,0.515407
IR014,0.0,0.229035

label,dominant_freq,ber
Normal,8408.352668,0.953905
B007,0.0,0.570426
B014,359.691478,0.257708
B021,2819.589892,0.078465
OR021@12,2757.133003,0.062826
OR007@12,2550.197529,0.226676
OR021@3,2593.089609,0.349293
OR007@3,2871.511883,0.563664
IR007,2580.297235,0.550046
IR014,0.0,1.197286

label,dominant_freq,ber
Normal,8408.352668,0.953905
B007,2933.968772,0.518393
B014,359.691478,0.201958
B021,2551.702515,0.066583
OR021@12,2776.697812,0.069117
OR007@12,2515.582868,0.201355
OR021@3,2870.75939,0.324194
OR007@3,2565.999875,0.468878
IR007,2545.682574,0.581414
IR014,2556.21747,0.061489

label,dominant_freq,ber
Normal,8407.600176,0.945888
B007,2889.571706,0.455082
B014,359.691478,0.168655
B021,1386.091428,0.055612
OR021@12,2777.450304,0.067456
OR007@12,2582.554712,0.202758
OR021@3,2733.805731,0.307819
OR007@3,3368.15702,0.610095
IR007,1422.211074,0.790685
IR014,5998.871261,1.336175


### Feature extraction in time-frequency domain

In [20]:
load_stft_tables = {}
load_stft_feature_tables = {}

for load in sorted(data['load'].unique()):
    load_df = data[data['load'] == load]

    stft_rows = []
    feature_rows = []

    for _, row in load_df.iterrows():
        signal = np.array(row['cropped_signal'])

        S = librosa.stft(signal, n_fft=FRAME_SIZE, hop_length=HOP_SIZE, win_length=FRAME_SIZE, window='hann', center=True)
        magnitude = np.abs(S)

        freqs = np.linspace(0, SAMPLE_RATE / 2, magnitude.shape[0])

        stft_dict = {f'stft_{i}': mag for i, mag in enumerate(magnitude.flatten())}
        stft_dict['label'] = row['label']
        stft_rows.append(stft_dict)

        features = extract_time_frequency_domain_features(magnitude, freqs)
        features['label'] = row['label']
        feature_rows.append(features)

    stft_df = pd.DataFrame(stft_rows)
    features_df = pd.DataFrame(feature_rows)

    load_stft_tables[load] = stft_df
    load_stft_feature_tables[load] = features_df


#### Mean magnitude for all frames, Max magnitude for all frames

In [21]:
tables = []

for load in sorted(load_stft_feature_tables.keys()):
    df = load_stft_feature_tables[load][['label', 'mean', 'max']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)


label,mean,max
Normal,0.533445,16.965659
B007,0.887999,46.812279
B014,0.750646,42.458211
B021,2.185782,156.320575
OR021@12,2.1871,411.102242
OR007@12,1.384778,85.057663
OR021@3,2.566951,117.649251
OR007@3,2.415092,113.994387
IR007,4.669546,100.375157
IR014,0.988049,73.415319

label,mean,max
Normal,0.517542,16.047183
B007,0.818408,45.955857
B014,0.817233,112.415182
B021,0.904793,76.225716
OR021@12,1.894275,314.870091
OR007@12,1.558366,138.611287
OR021@3,4.434868,224.895647
OR007@3,2.452306,118.785701
IR007,2.212725,47.083627
IR014,1.749261,54.340778

label,mean,max
Normal,0.517542,16.047183
B007,0.844284,47.155893
B014,1.023243,198.80751
B021,1.295738,244.005097
OR021@12,1.776705,345.919967
OR007@12,1.412002,134.042623
OR021@3,4.144984,249.931856
OR007@3,2.47986,106.196727
IR007,2.194555,50.637106
IR014,1.082657,134.634214

label,mean,max
Normal,0.545684,12.476696
B007,0.851617,37.72741
B014,0.937711,128.981171
B021,1.267231,178.737668
OR021@12,1.663879,304.931942
OR007@12,1.334196,112.827331
OR021@3,4.109377,222.792121
OR007@3,2.397065,118.455583
IR007,2.163041,51.164631
IR014,1.476662,267.240785


#### Spectral centroid for all frames, Spectral spread for all frames

In [22]:
tables = []

for load in sorted(load_stft_feature_tables.keys()):
    df = load_stft_feature_tables[load][['label', 'spec_centroid', 'spec_spread']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)


label,spec_centroid,spec_spread
Normal,4219.881204,4016.604835
B007,2900.507124,2418.854747
B014,2784.665734,2923.003153
B021,2445.110289,2630.757215
OR021@12,2594.939433,2015.218044
OR007@12,3195.984558,3479.159724
OR021@3,2943.755689,2426.532599
OR007@3,3116.148235,2381.608706
IR007,3314.499866,2540.556489
IR014,3043.48217,2705.370081

label,spec_centroid,spec_spread
Normal,4192.726843,4137.967544
B007,2925.550949,2426.485564
B014,2873.027351,2815.821412
B021,3011.438426,2665.18367
OR021@12,2764.866017,2114.666346
OR007@12,4422.727348,4890.310136
OR021@3,2916.172058,2431.76456
OR007@3,3273.762152,2205.706421
IR007,3341.473525,2549.572073
IR014,4597.927034,4513.580889

label,spec_centroid,spec_spread
Normal,4192.726843,4137.967544
B007,2905.776935,2385.590451
B014,3027.077901,2585.456639
B021,2950.70625,2419.700746
OR021@12,2766.067611,2145.378599
OR007@12,3417.047971,3210.068329
OR021@3,2951.011109,2554.280534
OR007@3,3242.212756,2185.678014
IR007,3338.80616,2555.275792
IR014,2957.850032,2553.130944

label,spec_centroid,spec_spread
Normal,4167.392685,4022.089033
B007,2873.689492,2413.327996
B014,2875.273434,2711.243275
B021,2800.320017,2398.690762
OR021@12,2729.134732,2169.914135
OR007@12,3292.904155,3132.689956
OR021@3,2956.772807,2625.977684
OR007@3,3248.262349,2228.436139
IR007,3368.325261,2559.822638
IR014,6465.610406,6905.433667


#### Total power for all frames

In [23]:
tables = []

for load in sorted(load_stft_feature_tables.keys()):
    df = load_stft_feature_tables[load][['label', 'total_power']].copy()
    html_table = df.to_html(index=False)
    tables.append(f'<div style="display:inline-block; margin-right:30px;">{html_table}</div>')

display_html(''.join(tables), raw=True)


label,total_power
Normal,133863.6
B007,580339.3
B014,349545.2
B021,6209870.0
OR021@12,10183440.0
OR007@12,1724693.0
OR021@3,4185402.0
OR007@3,4468484.0
IR007,8615453.0
IR014,805419.6

label,total_power
Normal,106944.9
B007,483833.2
B014,660649.1
B021,807084.5
OR021@12,6321955.0
OR007@12,2104563.0
OR021@3,14253910.0
OR007@3,4982939.0
IR007,1946225.0
IR014,953303.7

label,total_power
Normal,106944.9
B007,524893.0
B014,1411929.0
B021,2545070.0
OR021@12,5664942.0
OR007@12,1836223.0
OR021@3,12352410.0
OR007@3,4998794.0
IR007,1936367.0
IR014,1356454.0

label,total_power
Normal,110201.9
B007,506762.3
B014,782349.2
B021,2018023.0
OR021@12,4558631.0
OR007@12,1439270.0
OR021@3,10994330.0
OR007@3,4727808.0
IR007,1936645.0
IR014,2877667.0
