In [1]:
#importing libaries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq, fftshift

In [2]:
bearing_signals = pd.read_csv('bearing_signals.csv')
bearing_classes = pd.read_csv('bearing_classes.csv', delimiter=';')

In [3]:
bearing_signals = bearing_signals.drop(['bearing_1_id','experiment_id'], 1)
bearing_signals.rename(columns={'bearing_2_id': 'bearing_id'}, inplace=True)
bearing_signals.head(2)

Unnamed: 0,bearing_id,timestamp,a1_x,a1_y,a1_z,a2_x,a2_y,a2_z,rpm,hz,w
0,1,0.0,0.113269,0.149706,-0.110275,-0.18603,0.19445,0.454299,0.0,0.0,6e-06
1,1,0.000333,-0.367713,-0.228832,0.177821,0.285992,0.002226,-0.04393,0.0,0.0,0.000243
2,1,0.000667,0.113269,0.149706,-0.398371,-0.091625,0.002226,0.454299,0.0,0.0,0.000369
3,1,0.001,-0.17532,-0.228832,-0.110275,0.285992,0.002226,0.255007,0.0,0.0,0.00052
4,1,0.001333,-0.079124,0.055072,-0.110275,0.191588,0.002226,0.255007,0.0,0.0,0.000175


Data analysing and cleaning

In [4]:
bearing_signals.isnull().sum()

bearing_id    0
timestamp     0
a1_x          0
a1_y          0
a1_z          0
a2_x          0
a2_y          0
a2_z          0
rpm           0
hz            0
w             0
dtype: int64

In [5]:
bearing_signals.describe()

Unnamed: 0,bearing_id,timestamp,a1_x,a1_y,a1_z,a2_x,a2_y,a2_z,rpm,hz,w
count,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0,10265700.0
mean,56.58897,15.56797,-0.2200731,-0.04832596,-0.0525489,-0.3124191,-0.08758339,0.07102206,-276005.9,-4600.098,0.4629741
std,32.31361,9.425589,1.824082,1.450662,1.768919,2.553113,2.631378,2.700098,40769580.0,679493.0,0.456508
min,1.0,0.0,-16.04771,-18.87183,-16.33967,-17.65083,-18.73959,-22.96248,-6000000000.0,-100000000.0,1.407501e-06
25%,29.0,7.638,-1.041087,-0.6073697,-0.8785304,-1.602094,-1.247228,-1.040389,477.4536,7.95756,0.04778236
50%,56.0,15.27633,-0.1753203,-0.03956274,-0.01424326,-0.3748382,0.002226305,0.05571547,1285.714,21.42857,0.378118
75%,85.0,22.91433,0.59425,0.5282442,0.8500439,0.8524177,0.8672333,1.15182,1487.603,24.79339,0.7271071
max,112.0,55.49967,15.60087,16.14294,16.40721,19.16685,22.49241,25.96364,90001.35,1500.023,9.393605


In [6]:
bearing_signals.shape

(10265700, 11)

In [7]:
bearing_signals.columns.to_list()

['bearing_id',
 'timestamp',
 'a1_x',
 'a1_y',
 'a1_z',
 'a2_x',
 'a2_y',
 'a2_z',
 'rpm',
 'hz',
 'w']

Merging signals tabel with classes tabel

In [8]:
df = (pd.merge(bearing_signals, bearing_classes, on='bearing_id'))
df.head(2)

Unnamed: 0,bearing_id,timestamp,a1_x,a1_y,a1_z,a2_x,a2_y,a2_z,rpm,hz,w,status
0,1,0.0,0.113269,0.149706,-0.110275,-0.18603,0.19445,0.454299,0.0,0.0,6e-06,0
1,1,0.000333,-0.367713,-0.228832,0.177821,0.285992,0.002226,-0.04393,0.0,0.0,0.000243,0


Calculating metrics per bearing

In [9]:
df_mean = df.groupby('bearing_id').mean()
df_mean.columns = ['timestamp','a1_x_mean','a1_y_mean','a1_z_mean','a2_x_mean','a2_y_mean','a2_z_mean','rpm_mean','hz_mean','w_mean','status']
df_mean = df_mean.drop(['timestamp'], 1)
df_mean = df_mean.drop(40)

In [10]:
df_max = df.groupby('bearing_id').max()
df_max = df_max.drop(40)
df_max = df_max.drop(['timestamp','rpm','hz','status'], 1)
df_max.columns = ['a1_x_max','a1_y_max','a1_z_max','a2_x_max','a2_y_max','a2_z_max','w_max']

In [11]:
df_min = df.groupby('bearing_id').min()
df_min = df_min.drop(40)
df_min = df_min.drop(['timestamp','rpm','hz','w','status'], 1)
df_min.columns = ['a1_x_min','a1_y_min','a1_z_min','a2_x_min','a2_y_min','a2_z_min']

In [12]:
df_min_range = df.groupby('bearing_id').min()
df_min_range = df_min_range.drop(40)
df_min_range = df_min_range.drop(['timestamp','rpm','hz','status'], 1)

In [13]:
df_max_range = df.groupby('bearing_id').max()
df_max_range = df_max_range.drop(40)
df_max_range = df_max_range.drop(['timestamp','rpm','hz','status'], 1)

In [14]:
df_range = df_max_range.subtract(df_min_range, fill_value=0)
df_range.columns = ['a1_x_range','a1_y_range','a1_z_range','a2_x_range','a2_y_range','a2_z_range','w_range']
df_range.head(2)

Unnamed: 0_level_0,a1_x_range,a1_y_range,a1_z_range,a2_x_range,a2_y_range,a2_z_range,w_range
bearing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,16.738155,9.936622,17.861935,18.692052,17.876810,22.021738,1.679265
2,15.006622,9.558084,16.421456,17.748009,16.723468,20.028821,1.907449
3,15.583800,9.558084,16.421456,21.712990,18.645705,19.132008,2.132677
4,12.505518,9.558084,15.173042,17.653605,15.666237,18.932716,1.877710
5,14.718033,10.882967,17.189712,20.580138,18.549593,17.039445,1.578119
...,...,...,...,...,...,...,...
108,15.583800,13.627367,20.166701,21.996202,15.474013,23.516426,1.887597
109,14.429444,10.788333,19.014318,14.349454,12.686769,21.124925,1.754798
110,19.624044,10.504429,19.494477,16.331944,13.071216,21.423863,1.707395
111,16.064781,9.084912,19.110350,14.632667,12.013985,17.438028,1.799548


In [15]:
df_master = (pd.merge(df_mean, df_range, on='bearing_id'))
df_master = (pd.merge(df_master, df_min, on='bearing_id'))
df_master = (pd.merge(df_master, df_max, on='bearing_id'))
df_master.head(2)

Unnamed: 0_level_0,a1_x_mean,a1_y_mean,a1_z_mean,a2_x_mean,a2_y_mean,a2_z_mean,rpm_mean,hz_mean,w_mean,status,...,a2_x_min,a2_y_min,a2_z_min,a1_x_max,a1_y_max,a1_z_max,a2_x_max,a2_y_max,a2_z_max,w_max
bearing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.176971,-0.001740,-0.035361,0.114590,0.023329,0.250710,1082.423656,18.040394,0.419961,0,...,-10.004077,-8.647843,-10.207810,7.327991,5.638507,9.204820,8.687975,9.228967,11.813929,1.679266
2,-0.174032,-0.007636,-0.053353,-0.178986,0.024936,0.194061,1074.981634,17.916361,0.501160,0,...,-9.437651,-7.975060,-11.104623,7.424187,4.881431,8.340533,8.310358,8.748408,8.924198,1.907451
3,-0.172328,-0.002343,-0.039898,-0.121842,0.036149,0.183881,1149.927038,19.165451,0.513051,0,...,-12.080972,-8.455619,-11.204268,7.231794,5.449238,8.052437,9.632018,10.190086,7.927740,2.132678
4,-0.149644,-0.000549,-0.037679,-0.119574,0.030838,0.200699,1127.538630,18.792311,0.539426,0,...,-10.004077,-7.782836,-11.104623,5.981243,4.881431,8.148469,7.649527,7.883401,7.828094,1.877711
5,-0.146438,0.015268,-0.026678,-0.080799,0.034042,0.206547,1037.216452,17.286941,0.353715,0,...,-11.325737,-8.840067,-7.816309,6.750813,6.206314,8.340533,9.254401,9.709526,9.223136,1.578121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108,-0.145112,0.026677,0.120057,-0.225940,0.069918,0.219440,952.405287,15.873421,0.387498,1,...,-11.136929,-6.917829,-12.101081,7.231794,6.679486,9.204820,10.859274,8.556184,11.415345,1.887599
109,-0.124200,0.077907,0.219025,-0.229740,0.099719,0.184505,1000.562215,16.676037,0.262752,1,...,-7.266352,-6.052822,-10.805685,7.520383,4.786797,9.012756,7.083102,6.633946,10.319241,1.754799
110,-0.132581,0.080428,0.213349,-0.209128,0.117938,0.079041,1073.939895,17.898998,0.302760,1,...,-8.682417,-6.052822,-9.709580,10.598665,4.313624,10.069107,7.649527,7.018394,11.714283,1.707396
111,-0.126273,0.090689,0.221793,-0.150227,0.124992,0.029792,987.802878,16.463381,0.299101,1,...,-7.455161,-5.283927,-8.812767,8.867131,4.313624,9.877043,7.177506,6.730058,8.625261,1.799549


In [16]:
df_master.columns.to_list()

['a1_x_mean',
 'a1_y_mean',
 'a1_z_mean',
 'a2_x_mean',
 'a2_y_mean',
 'a2_z_mean',
 'rpm_mean',
 'hz_mean',
 'w_mean',
 'status',
 'a1_x_range',
 'a1_y_range',
 'a1_z_range',
 'a2_x_range',
 'a2_y_range',
 'a2_z_range',
 'w_range',
 'a1_x_min',
 'a1_y_min',
 'a1_z_min',
 'a2_x_min',
 'a2_y_min',
 'a2_z_min',
 'a1_x_max',
 'a1_y_max',
 'a1_z_max',
 'a2_x_max',
 'a2_y_max',
 'a2_z_max',
 'w_max']

Engeneering frequencies

In [17]:
df_fft = df
acceleration_cols = ['a1_x','a1_y','a1_z','a2_x','a2_y','a2_z']
acceleration_fft = fft(df[acceleration_cols].values)
df_fft['a1_x_fft'] = abs(acceleration_fft[:,0])
df_fft['a1_y_fft'] = abs(acceleration_fft[:,1])
df_fft['a1_z_fft'] = abs(acceleration_fft[:,2])
df_fft['a2_x_fft'] = abs(acceleration_fft[:,3])
df_fft['a2_y_fft'] = abs(acceleration_fft[:,4])
df_fft['a2_z_fft'] = abs(acceleration_fft[:,5])
df_fft.head()

Unnamed: 0,bearing_id,timestamp,a1_x,a1_y,a1_z,a2_x,a2_y,a2_z,rpm,hz,w,status,a1_x_fft,a1_y_fft,a1_z_fft,a2_x_fft,a2_y_fft,a2_z_fft
0,1,0.0,0.113269,0.149706,-0.110275,-0.18603,0.19445,0.454299,0.0,0.0,6e-06,0,0.615419,0.768876,0.416851,0.220532,0.416851,0.768876
1,1,0.000333,-0.367713,-0.228832,0.177821,0.285992,0.002226,-0.04393,0.0,0.0,0.000243,0,0.174436,0.880146,0.314195,0.200896,0.314195,0.880146
2,1,0.000667,0.113269,0.149706,-0.398371,-0.091625,0.002226,0.454299,0.0,0.0,0.000369,0,0.229504,0.932711,0.116978,0.795256,0.116978,0.932711
3,1,0.001,-0.17532,-0.228832,-0.110275,0.285992,0.002226,0.255007,0.0,0.0,0.00052,0,0.028798,0.6497,0.355533,0.595537,0.355533,0.6497
4,1,0.001333,-0.079124,0.055072,-0.110275,0.191588,0.002226,0.255007,0.0,0.0,0.000175,0,0.314494,0.277512,0.076581,0.688839,0.076581,0.277512


Merging calculated metrics with fequencies per bearing

In [18]:
df_fft = df_fft.drop(['timestamp', 'a1_x', 'a1_y', 'a1_z', 'a2_x', 'a2_y', 'a2_z', 'rpm', 'hz', 'w', 'status'], 1)
df_fft = df_fft.drop(40)

MemoryError: Unable to allocate 1.22 GiB for an array with shape (16, 10265700) and data type float64

In [None]:
df_fft_mean = df_fft.groupby('bearing_id').mean()
df_fft_mean.columns = ['a1_x_fft_mean', 'a1_y_fft_mean', 'a1_z_fft_mean', 'a2_x_fft_mean', 'a2_y_fft_mean', 'a2_z_fft_mean']
df_fft_mean.head()

In [None]:
df_fft_max = df_fft.groupby('bearing_id').max()
df_fft_max.columns = ['a1_x_fft_max', 'a1_y_fft_max', 'a1_z_fft_max', 'a2_x_fft_max', 'a2_y_fft_max', 'a2_z_fft_max']
df_fft_max.head()

In [None]:
df_fft_min = df_fft.groupby('bearing_id').min()
df_fft_min.columns = ['a1_x_fft_min', 'a1_y_fft_min', 'a1_z_fft_min', 'a2_x_fft_min', 'a2_y_fft_min', 'a2_z_fft_min']

In [None]:
df_fft_min

In [None]:
df_fft_min_range = df_fft.groupby('bearing_id').min()

In [None]:
df_fft_min_range

In [None]:
df_fft_max_range = df_fft.groupby('bearing_id').max()
df_fft_max_range

In [None]:
df_fft_range = df_fft_max_range.subtract(df_fft_min_range, fill_value=0)
df_fft_range

In [None]:
df_fft_range.columns = ['a1_x_ff_range', 'a1_y_fft_range', 'a1_z_fft_range', 'a2_x_fft_range', 'a2_y_fft_range', 'a2_z_fft_range']

In [None]:
df_fft_master = (pd.merge(df_fft_mean, df_fft_range, on='bearing_id'))
df_fft_master = (pd.merge(df_fft_master, df_fft_min, on='bearing_id'))
df_fft_master = (pd.merge(df_fft_master, df_fft_max, on='bearing_id'))
df_fft_master

In [None]:
df_final_master = (pd.merge(df_master, df_fft_master, on='bearing_id'))


In [None]:
df_final_master

In [None]:
df_final_master.info()

In [None]:
df_rpm_b_68 = df_final_master[df_final_master['bearing_id']==68]
xf = df_rpm_b_68["timestamp"].values
yf = df_rpm_b_68["rpm"].values
plt.plot(xf, yf)
plt.show()