In [108]:
import pandas as pd
import numpy as np
import os
import copy
# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import glob
from scipy.signal import detrend, get_window
from scipy.signal import butter, filtfilt
from scipy.fftpack import fft, fftfreq
from scipy.signal import hilbert, chirp

In [109]:
def compute_skewness(x):
    
    n = len(x)
    third_moment = np.sum((x - np.mean(x))**3) / n
    s_3 = np.std(x, ddof = 1) ** 3
    return third_moment/s_3

In [110]:
def compute_kurtosis(x):
    
    n = len(x)
    fourth_moment = np.sum((x - np.mean(x))**4) / n
    s_4 = np.std(x, ddof = 1) ** 4
    return fourth_moment / s_4 - 3

In [111]:
def to_feature(df):
    data = pd.DataFrame()
    data['max'] = df[df.columns[:]].max(axis = 1)
    data['min'] = df[df.columns[:]].min(axis = 1)
    data['mean'] = df[df.columns[:]].mean(axis = 1)
    data['std'] = df[df.columns[:]].std(ddof=1,axis = 1)
    data['rms'] = df[df.columns[:]].apply(lambda x: np.sqrt(np.mean(x**2)), axis=1)
    data['skewness'] = df[df.columns[:]].apply(lambda x: compute_skewness(x), axis=1)
    data['kurtosis'] = df[df.columns[:]].apply(lambda x: compute_kurtosis(x), axis=1)
    data['crest_factor'] = data['max'] / data['rms']
    data['form_factor'] = data['rms'] / data['mean']
    return data

In [112]:
def to_freq(df_time:pd.DataFrame):
    df_freq = df_time.copy()
    filter_vibr = pd.RangeIndex(start=0, stop=len(df_freq.columns), step=1)
    df_freq = df_freq.drop(columns=filter_vibr, axis = 1)
    
    freq_data = []
    for i,row in df_time.iterrows():
        y = np.array(df_time.loc[i,:])
        # vibration_data = data['Vibration'].values
        vibration_data = y
        # Time parameters
        sampling_rate =  len(vibration_data) # Hz, example sampling rate
        n = len(vibration_data)
        time = np.arange(n) / sampling_rate

        # Perform FFT
        freq_domain = np.fft.fft(vibration_data)
        freq = np.fft.fftfreq(n, d=1/sampling_rate)

        x_freq = freq[:n//2]
        y_freq = np.abs(freq_domain)[:n//2] * 1/n

        freq_data.append(y_freq)

    df_freq = pd.DataFrame(data = freq_data)
    # df_freq = pd.concat([df_freq,pd.DataFrame(freq_data)],axis=0)
    # columns = [str(x) for x in df_freq.columns]
    # df_freq.columns = columns
    df_freq.index = df_time.index
    return df_freq



In [113]:
def to_freq2(df_time:pd.DataFrame):
    df_freq = df_time.copy()
    filter_vibr = pd.RangeIndex(start=0, stop=len(df_freq.columns), step=1)
    df_freq = df_freq.drop(columns=filter_vibr, axis = 1)
    
    freq_data = []
    for i,row in df_time.iterrows():
        y = np.array(row)
        fs = len(y)
        fc = 500  # Cutoff frequency in Hz
        order = 10  # Filter order
        nyquist = 0.5 * fs
        normal_cutoff = fc / nyquist
        b, a = butter(order, normal_cutoff, btype='highpass', analog=False)

        filtered_signal = filtfilt(b, a, y)
        analytic_signal = hilbert(filtered_signal)
        amplitude_envelope = np.abs(analytic_signal)

        # Sampling parameters
        N = fs  # Number of samples
        T = 1.0 / fs  # Sample spacing (inverse of the sampling rate)
        x = np.arange(0, 1, T)
        # Compute the FFT of the signal
        y_start = 0
        y_end = N//2

        x_start = 0
        x_end = N//2

        yf = fft(amplitude_envelope)
        yf = 2.0/N * np.abs(yf[y_start:y_end])

        # Compute the frequencies corresponding to the FFT result
        xf = fftfreq(N, T)[x_start:x_end]

        freq_data.append(yf)

    df_freq = pd.DataFrame(data = freq_data)
    # df_freq = pd.concat([df_freq,pd.DataFrame(freq_data)],axis=0)
    # columns = [str(x) for x in df_freq.columns]
    # df_freq.columns = columns
    df_freq.index = df_time.index
    return df_freq


In [114]:
# Load your vibration data from a CSV file
# Assume the CSV file has a single column of vibration data with a header
# data = pd.read_csv('vibration_data.csv')
def to_fft(y, sample_rate):
    # vibration_data = data['Vibration'].values
    vibration_data = y
    # Time parameters
    sampling_rate = sample_rate  # Hz, example sampling rate
    n = len(vibration_data)
    time = np.arange(n) / sampling_rate

    # Perform FFT
    freq_domain = np.fft.fft(vibration_data)
    freq = np.fft.fftfreq(n, d=1/sampling_rate)

    x_freq = freq[:n//2]
    y_freq = np.abs(freq_domain)[:n//2] * 1/n


    # Plot the results
    plt.figure(figsize=(20,6))

    # Time domain plot
    plt.subplot(2, 1, 1)
    plt.plot(time, vibration_data)
    plt.title('Time Domain')
    plt.xlabel('Time (s)')
    plt.ylabel('Vibration Amplitude')

    # Frequency domain plot
    plt.subplot(2, 1, 2)
    plt.plot(x_freq, y_freq)
    plt.title('Frequency Domain')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Magnitude')

    # plt.tight_layout()
    # plt.show()

    return plt


In [115]:
def to_fft2(y,sample_rate):
    # Remove DC component
    signal = detrend(y)
    signal = np.array(y)
    # Apply a window function (e.g., Hamming window)
    # window = get_window('hamming', len(signal))
    windowed_signal = signal # * window
    # Apply FFT
    fft_result = np.fft.fft(windowed_signal)
    yf = fft(windowed_signal)
    
    fs = sample_rate
    t = np.arange(0, 1, 1/fs)
    # Frequency bins
    N = len(windowed_signal)
    freqs = np.fft.fftfreq(N, 1/fs)
    xf = fftfreq(N, 1/fs)
    # Compute the magnitude of the FFT
    magnitude = np.abs(fft_result) / N

    # Single-sided spectrum (only positive frequencies)
    y_start = 1
    y_end = 1500 # N // 2
    x_start = 1
    x_end = 1500
    
    freqs = freqs[x_start:x_end]
    magnitude = magnitude[y_start:y_end] * 2  # Multiply by 2 (except for DC and Nyquist component)
    yf = 2.0/N * np.abs(yf[y_start:y_end])
    xf = xf[x_start:x_end]
    # Correct the magnitude for windowing effect
    # magnitude /= np.sum(window) / len(window)

    plt.figure(figsize=(20, 6))

    # Plot time-domain signal
    plt.subplot(2, 1, 1)
    plt.plot(t[:200], signal[:200], linewidth=0.5)
    plt.title('Time Domain Signal')
    plt.xlabel('Time [s]')
    plt.ylabel('Amplitude')

    # Plot frequency-domain signal
    plt.subplot(2, 1, 2)
    plt.plot(freqs, magnitude, linewidth=0.5,color='red')
    plt.plot(xf, yf, linewidth=0.5,color='blue')
    plt.title('Frequency Domain Signal')
    plt.xlabel('Frequency [Hz]')
    plt.ylabel('Magnitude')
    # Set custom x-ticks to show more frequency markers
    
    step = round((x_end - x_start)/40)  # Adjust this value to show more or fewer frequency markers
    plt.xticks(np.arange(0, x_end, step))

    # plt.tight_layout()
    # plt.show()
    return plt

In [116]:
def to_fft3(y,sample_rate):
    y = np.array(y)
    fs = sample_rate

    fc = 500  # Cutoff frequency in Hz
    order = 10  # Filter order
    nyquist = 0.5 * fs
    normal_cutoff = fc / nyquist
    b, a = butter(order, normal_cutoff, btype='highpass', analog=False)
    filtered_signal = filtfilt(b, a, y)

    analytic_signal = hilbert(filtered_signal)
    amplitude_envelope = np.abs(analytic_signal)
    # amplitude_envelope = filtered_signal
    # y = amplitude_envelope

    # Sampling parameters
    N = sample_rate  # Number of samples
    T = 1.0 / sample_rate  # Sample spacing (inverse of the sampling rate)
    x = np.arange(0, 1, T)
    # Compute the FFT of the signal
    y_start = 1
    y_end = 1200 # N//2

    x_start = 1
    x_end = 1200

    yf = fft(amplitude_envelope)
    yf = 2.0/N * np.abs(yf[y_start:y_end])

    # Compute the frequencies corresponding to the FFT result
    xf = fftfreq(N, T)[x_start:x_end]

    # Plot the original signal
    plt.figure(figsize=(20, 6))

    plt.subplot(2, 1, 1)
    # plt.plot(x[:200], amplitude_envelope[:200], linewidth=0.5, color='red')
    plt.plot(x[:200], filtered_signal[:200], linewidth=0.5, color='blue')
    plt.plot(x[:200], y[:200], linewidth=0.5, color='green')
    plt.title('Original Vibration Signal')
    plt.xlabel('Time [s]')
    plt.ylabel('Amplitude')

    # Plot the FFT result
    plt.subplot(2, 1, 2)
    plt.plot(xf, yf, linewidth=0.5)
    plt.title('Fourier Transform')
    plt.xlabel('Frequency [Hz]')
    plt.ylabel('Amplitude')
    
    step = round((x_end - x_start)/40)  # Adjust this value to show more or fewer frequency markers
    plt.xticks(np.arange(0, x_end, step))


    # plt.tight_layout()
    # plt.show()
    return plt


In [117]:
test_rutes = glob.glob(f"nasa_bearing_dataset/bear*/*",recursive=False)
test_rutes

['nasa_bearing_dataset\\bearing_1st_test\\1st_test',
 'nasa_bearing_dataset\\bearing_2nd_test\\2nd_test',
 'nasa_bearing_dataset\\bearing_3rd_test\\4th_test']

In [118]:
os.path.basename(test_rutes[0])

'1st_test'

In [119]:
number_test = len(test_rutes)
number_test

3

In [120]:
# dict_test_rotes = {os.path.basename(key): None for key in test_rutes}
dict_test_rotes = {}
for file_folder in test_rutes:
    dict_test_rotes[os.path.basename(file_folder)] = glob.glob(os.path.join(file_folder,f'*'),recursive=True)

list(dict_test_rotes)[0]
next(iter(dict_test_rotes.values()))[0]


'nasa_bearing_dataset\\bearing_1st_test\\1st_test\\2003.10.22.12.06.24'

In [121]:
dict_test_rotes[list(dict_test_rotes)[0]][0]

'nasa_bearing_dataset\\bearing_1st_test\\1st_test\\2003.10.22.12.06.24'

In [122]:
dict_test_rotes.keys()

dict_keys(['1st_test', '2nd_test', '4th_test'])

In [123]:
tests = {key:None for key in range(len(dict_test_rotes))}

In [124]:
for index, (key,value) in enumerate(dict_test_rotes.items()):
    bearings_num = len(pd.read_csv(value[0], sep='\t',header=None).columns)
    bearings = {item:[] for item in range(bearings_num)}
    tests[index] = bearings

    for vibr in value:
        vibr_df = pd.read_csv(vibr, sep='\t',header=None)
        for column in vibr_df.columns:
            vibr_df[column].name = os.path.basename(vibr)[:-3]
            tests[index][column].append(vibr_df[column])

In [125]:
format_str = '%Y.%m.%d.%H.%M'
for key,value in tests.items():
    for bearing in value:
        tests[key][bearing] = {0:pd.DataFrame(tests[key][bearing])}
        tests[key][bearing][0].index = pd.to_datetime(tests[key][bearing][0].index, format= format_str)

In [126]:
# for index, (key,value) in enumerate(dict_test_rotes.items()):
#     bearings_num = len(pd.read_csv(value[0], sep='\t',header=None).columns)
#     bearings = {item:{0:pd.DataFrame()} for item in range(bearings_num)}
#     test[index] = bearings


#     for vibr in value:
#         vibr_df = pd.read_csv(vibr, sep='\t',header=None)
#         moment:list[pd.DataFrame()] = [pd.DataFrame()]*bearings_num
#         for column in vibr_df.columns:
#             vibr_df[column].name = os.path.basename(vibr)[:-3] # dataserie
#             moment[column] = vibr_df[column].to_frame().T
#             test[index][column][0] = pd.concat([test[index][column][0],moment[column]], axis=0)
            

In [127]:
# bearings = {}

# bearings[0] = []
# bearings[1] = []
# bearings[2] = []
# bearings[3] = []
# # bearings[4] = []
# # bearings[5] = []
# # bearings[6] = []
# # bearings[7] = []


# for root, dirs, files in os.walk("nasa_bearing_dataset/bearing_2nd_test/2nd_test/", topdown=False):
    
#     for file_name in files:
#         path = os.path.join(root, file_name)
#         # print(file_name[:-3])
#         dataset=pd.read_csv(path, sep='\t',header=None)

#         for column in dataset.columns:
#             dataset[column].name = file_name[:-3]
#             bearings[column].append(dataset[column])

In [128]:
# for key_tests,bearings in tests.items():
#     print(f'tests {key_tests+1} started!')
#     for key_bearing,bearing in bearings.items():
#         # for i in range(3):
#         i = 0
#         if key_tests == 0 and key_bearing == 0:
#             del tests[key_tests][key_bearing][i]
#         elif key_tests == 0 and key_bearing == 1:   
#             del tests[key_tests][key_bearing][i]
#         elif key_tests == 0 and key_bearing == 2:   
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 0 and key_bearing == 3:   
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 0 and key_bearing == 4:
#             pass
#         elif key_tests == 0 and key_bearing == 5:
#             pass
#         elif key_tests == 0 and key_bearing == 6:
#             pass
#         elif key_tests == 0 and key_bearing == 7:
#             pass
#         elif key_tests == 1 and key_bearing == 0:
#             pass
#         elif key_tests == 1 and key_bearing == 1:
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 1 and key_bearing == 2:
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 1 and key_bearing == 3:
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 2 and key_bearing == 0:
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 2 and key_bearing == 1:
#             del tests[key_tests][key_bearing][i] 
#         elif key_tests == 2 and key_bearing == 2:
#             pass
#         elif key_tests == 2 and key_bearing == 3:
#             del tests[key_tests][key_bearing][i] 
        
#         print(f'Bearing {key_bearing+1} done!')

In [129]:
for key_test,bearings in tests.items():
    for key_bearing,bearing in bearings.items():
        tests[key_test][key_bearing][1] = to_feature(tests[key_test][key_bearing][0])

In [130]:
for key_test,bearings in tests.items():
    for key_bearing,bearing in bearings.items():
        tests[key_test][key_bearing][2] = to_freq2(tests[key_test][key_bearing][0])

In [131]:
tests[0][5][2].shape

(2156, 10240)

In [132]:
for key_tests,bearings in tests.items():
    print(f'tests {key_tests+1} started!')
    for key_bearing,bearing in bearings.items():
        for i in range(3):
            if key_tests == 0 and key_bearing == 0:
                # normal = [0,0]
                fault = [0,2155]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 1:   
                # normal = [0,0]
                fault = [0,2155]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 2:   
                # normal = [0,0]
                fault = [0,2155]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 3:   
                normal = [228,1800]
                fault = [1801,2155]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 4:
                normal = [1500,2100]
                # normal = [0,2007]
                fault = [2121,2155]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'inner race'
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 5:
                normal = [1500,2100]
                # normal = [0,2007]
                fault = [2121,2155]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'inner race'
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 6:
                normal = [1000,1500]
                # normal = [0,1250]
                fault = [1574,2000]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'roller element'
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 0 and key_bearing == 7:
                normal = [700,1500]
                # normal = [0,1250]
                fault = [1574,2000]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'roller element'
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'

            elif key_tests == 1 and key_bearing == 0:
                normal = [0,531]
                fault = [532,982]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'outer race'
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 1 and key_bearing == 1:
                normal = [0,698]
                fault = [703,983]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 1 and key_bearing == 2:
                normal = [0,840]
                fault = [886,983]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 1 and key_bearing == 3:
                normal = [0,678]
                fault = [709,983]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'

            elif key_tests == 2 and key_bearing == 0:
                normal = [0,6040]
                fault = [6181,6323]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 2 and key_bearing == 1:
                normal = [0,6040]
                fault = [6181,6323]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
            #     tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 2 and key_bearing == 2:
                normal = [4000,6040]
                fault = [6070,6323]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'outer race'
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'
            elif key_tests == 2 and key_bearing == 3:
                normal = [0,6040]
                fault = [6060,6323]
                tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[fault[0]]:tests[key_tests][key_bearing][0].index[fault[1]],'target'] = 'rare'
                # tests[key_tests][key_bearing][i].loc[tests[key_tests][key_bearing][0].index[normal[0]]:tests[key_tests][key_bearing][0].index[normal[1]],'target'] = 'normal'      
        
        print(f'Bearing {key_bearing+1} done!')


tests 1 started!
Bearing 1 done!
Bearing 2 done!
Bearing 3 done!
Bearing 4 done!
Bearing 5 done!
Bearing 6 done!
Bearing 7 done!
Bearing 8 done!
tests 2 started!
Bearing 1 done!
Bearing 2 done!
Bearing 3 done!
Bearing 4 done!
tests 3 started!
Bearing 1 done!
Bearing 2 done!
Bearing 3 done!
Bearing 4 done!


In [133]:
for key_test,bearings in tests.items():
    print(f'test {key_test+1} started!')
    for key_bearing,bearing in bearings.items():
        for i in range(3):
                tests[key_test][key_bearing][i].dropna(subset = ['target'],inplace = True)
                tests[key_test][key_bearing][i] = tests[key_test][key_bearing][i].loc[tests[key_test][key_bearing][i]['target'] != 'rare',:]

test 1 started!
test 2 started!
test 3 started!


In [134]:
del tests[0][0]
del tests[0][1]
del tests[0][2] 
del tests[0][3]
del tests[1][1]
del tests[1][2]
del tests[1][3]
del tests[2][0]
del tests[2][1]
del tests[2][3]

In [135]:
# for key_test,bearings in tests.items():
#     print(f'test {key_test} starting')
#     for key_bearing,bearing in bearings.items():
#         print(f'bearing {key_bearing} starting')
#         for i in range(3):
#             tests[key_test][key_bearing][i].to_csv(f'nasa_bearing_dataset/{i}_test{key_test}_bearing{key_bearing}.csv')

In [136]:
file_rute = glob.glob(f"nasa_bearing_dataset/*.csv",recursive=False)
file_rute

['nasa_bearing_dataset\\0_test0_bearing0.csv',
 'nasa_bearing_dataset\\0_test0_bearing1.csv',
 'nasa_bearing_dataset\\0_test0_bearing2.csv',
 'nasa_bearing_dataset\\0_test0_bearing3.csv',
 'nasa_bearing_dataset\\0_test0_bearing4.csv',
 'nasa_bearing_dataset\\0_test0_bearing5.csv',
 'nasa_bearing_dataset\\0_test0_bearing6.csv',
 'nasa_bearing_dataset\\0_test0_bearing7.csv',
 'nasa_bearing_dataset\\0_test1_bearing0.csv',
 'nasa_bearing_dataset\\0_test1_bearing1.csv',
 'nasa_bearing_dataset\\0_test1_bearing2.csv',
 'nasa_bearing_dataset\\0_test1_bearing3.csv',
 'nasa_bearing_dataset\\0_test2_bearing0.csv',
 'nasa_bearing_dataset\\0_test2_bearing1.csv',
 'nasa_bearing_dataset\\0_test2_bearing2.csv',
 'nasa_bearing_dataset\\0_test2_bearing3.csv',
 'nasa_bearing_dataset\\1_test0_bearing0.csv',
 'nasa_bearing_dataset\\1_test0_bearing1.csv',
 'nasa_bearing_dataset\\1_test0_bearing2.csv',
 'nasa_bearing_dataset\\1_test0_bearing3.csv',
 'nasa_bearing_dataset\\1_test0_bearing4.csv',
 'nasa_bearin

In [137]:
# list_df = [4,5,6,7,8,14]
# format_str = '%Y.%m.%d.%H.%M'
# for i,file in enumerate(file_rute):
#     if i in list_df:
#         list_df.append(pd.read_csv(f'nasa_bearing_dataset/test{key_test}_bearing_{key_bearing}_{i}.csv', index_col= 'Unnamed: 0'))
#         list_df[i].index = pd.to_datetime(list_df[i].index, format= format_str)

In [138]:
# format_str = '%Y.%m.%d.%H.%M'
# for i in range(8):
#     df.append(pd.read_csv(f'nasa_bearing_dataset/test{test}_bearing_{i}_time.csv', index_col= 'Unnamed: 0'))
#     df[i].index = pd.to_datetime(df[i].index, format= format_str)

In [139]:
for key_test,bearings in tests.items():
    
    for key_bearing,bearing in bearings.items():
        print(f'test {key_test+1} / {key_bearing} started!')
        print(tests[key_test][key_bearing][0]['target'].unique())

test 1 / 4 started!
['normal' 'inner race']
test 1 / 5 started!
['normal' 'inner race']
test 1 / 6 started!
['normal' 'roller element']
test 1 / 7 started!
['normal' 'roller element']
test 2 / 0 started!
['normal' 'outer race']
test 3 / 2 started!
['normal' 'outer race']


In [140]:
for key_test,bearings in tests.items():
    
    for key_bearing,bearing in bearings.items():
        print(f'test {key_test} / {key_bearing} started!')
        print(tests[key_test][key_bearing][0].isna().sum().sum())

test 0 / 4 started!
0
test 0 / 5 started!
0
test 0 / 6 started!
0
test 0 / 7 started!
0
test 1 / 0 started!
0
test 2 / 2 started!
0


In [141]:
tests[0][4][0].isna().sum().sum()

0

In [142]:
df_time = pd.DataFrame()
df_feat = pd.DataFrame()
df_freq = pd.DataFrame()

In [143]:
num = 0
df_time = pd.concat([tests[0][4][num].reset_index(drop = True),
                     tests[0][5][num].reset_index(drop = True),
                     tests[0][6][num].reset_index(drop = True),
                     tests[0][7][num].reset_index(drop = True),
                     tests[1][0][num].reset_index(drop = True),
                     tests[2][2][num].reset_index(drop = True)], axis = 0)


In [144]:
df_time.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20471,20472,20473,20474,20475,20476,20477,20478,20479,target
0,0.051,-0.156,-0.137,-0.146,0.042,-0.2,-0.088,-0.085,0.029,-0.117,...,-0.291,0.11,-0.09,-0.178,-0.195,0.249,-0.22,-0.164,-0.178,normal
1,-0.095,-0.161,-0.127,0.051,0.063,0.044,-0.154,-0.032,0.007,-0.149,...,-0.22,0.117,-0.066,-0.356,-0.276,0.024,-0.002,-0.117,-0.112,normal
2,-0.164,-0.327,0.127,0.083,-0.22,-0.049,-0.198,-0.278,-0.122,-0.1,...,-0.063,0.054,0.076,-0.103,-0.176,-0.115,-0.244,-0.103,-0.151,normal
3,-0.19,-0.066,-0.034,-0.042,-0.21,-0.076,-0.059,-0.054,-0.142,-0.278,...,-0.125,-0.071,-0.159,-0.12,-0.227,-0.093,-0.193,-0.2,0.015,normal
4,-0.068,-0.134,-0.127,-0.164,0.142,-0.132,-0.286,-0.054,-0.146,-0.212,...,-0.063,-0.054,0.037,-0.139,-0.161,-0.164,-0.139,-0.195,-0.151,normal


In [145]:
df_time.isna().sum().sum()

0

In [146]:
df_time.to_csv('nasa_bearing_dataset\df_time.csv',index=False)

In [147]:
num = 1
df_feat = pd.concat([tests[0][4][num].reset_index(drop = True),
                     tests[0][5][num].reset_index(drop = True),
                     tests[0][6][num].reset_index(drop = True),
                     tests[0][7][num].reset_index(drop = True),
                     tests[1][0][num].reset_index(drop = True),
                     tests[2][2][num].reset_index(drop = True)], axis = 0)

In [148]:
df_feat.head()

Unnamed: 0,max,min,mean,std,rms,skewness,kurtosis,crest_factor,form_factor,target
0,0.486,-0.659,-0.114358,0.111572,0.159767,0.047967,0.557259,3.041937,-1.397075,normal
1,0.662,-0.845,-0.113785,0.112849,0.160254,0.072265,0.935819,4.130944,-1.408397,normal
2,0.439,-0.691,-0.11444,0.10929,0.158241,0.016991,0.592992,2.774243,-1.382745,normal
3,0.459,-0.762,-0.115351,0.112622,0.161211,-0.000778,0.548151,2.847195,-1.39757,normal
4,0.525,-0.833,-0.114336,0.109718,0.158462,0.004558,0.693244,3.313105,-1.38593,normal


In [149]:
df_feat.isna().sum().sum()

0

In [150]:
df_feat.to_csv('nasa_bearing_dataset\df_feat.csv',index=False)

In [151]:
num = 2
df_freq = pd.concat([tests[0][4][num].reset_index(drop = True),
                     tests[0][5][num].reset_index(drop = True),
                     tests[0][6][num].reset_index(drop = True),
                     tests[0][7][num].reset_index(drop = True),
                     tests[1][0][num].reset_index(drop = True),
                     tests[2][2][num].reset_index(drop = True)], axis = 0)

In [152]:
df_freq.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10231,10232,10233,10234,10235,10236,10237,10238,10239,target
0,0.267459,0.001168,0.001828,0.001987,0.002317,0.001537,0.001837,0.007169,0.004013,0.002827,...,0.000134,0.000284,0.00032,0.000333,0.000109,0.000244,0.000192,0.000161,0.00014,normal
1,0.269765,0.001087,0.00084,0.002886,0.002624,0.001982,0.001911,0.003913,0.005833,0.00296,...,0.00028,0.000182,0.00029,0.000255,0.000298,0.000117,0.000107,0.000202,0.000195,normal
2,0.261961,0.001718,0.002398,0.001542,0.003211,0.002696,0.00361,0.007068,0.005708,0.003775,...,0.00026,0.000167,0.000119,1.2e-05,0.000232,0.00013,0.000161,9.8e-05,0.000306,normal
3,0.269995,0.001529,0.003989,0.002137,0.003351,0.001355,0.00277,0.004761,0.002046,0.002703,...,0.000225,0.000273,9.2e-05,0.000286,0.000514,0.000119,0.000418,0.000588,0.000133,normal
4,0.262652,0.003813,0.003515,0.001072,0.002476,0.002688,0.002649,0.005884,0.005924,0.002753,...,0.00033,8.2e-05,9.4e-05,0.00029,0.000293,6.4e-05,0.000123,0.000182,0.000246,normal


In [153]:
df_freq.isna().sum().sum()

0

In [154]:
df_freq.to_csv('nasa_bearing_dataset\df_freq.csv',index=False)

In [155]:
# for key_test,bearings in tests.items():
#     print(f'test {key_test+1} started!')
#     for key_bearing,bearing in bearings.items():
#         for i in range(3):
#             if i == 0:
#                 # df_time.columns = tests[key_test][key_bearing][i].columns
#                 df_time = pd.concat([df_time, tests[key_test][key_bearing][i].reset_index()],axis = 1)
#                 del tests[key_test][key_bearing][i]
#             # elif i == 1:
#             #     # df_time.columns = tests[key_test][key_bearing][i].columns
#             #     df_feat = pd.concat([df_time, tests[key_test][key_bearing][i].reset_index()],axis = 1)
#             #     del tests[key_test][key_bearing][i] 
#             # elif i == 2:
#             #     # df_time.columns = tests[key_test][key_bearing][i].columns
#             #     df_feat = pd.concat([df_time, tests[key_test][key_bearing][i].reset_index()],axis = 1)
#             #     del tests[key_test][key_bearing][i] 


In [156]:
df_time.shape

(6706, 20481)

In [157]:
df_time['target'].unique()

array(['normal', 'inner race', 'roller element', 'outer race'],
      dtype=object)

In [158]:
# test = 2
# for bearing in bearings:
#     pd.DataFrame(bearings[bearing]).to_csv(f'nasa_bearing_dataset/test{test}_bearing_{bearing}_time.csv')
#     to_feature(pd.DataFrame(bearings[bearing])).to_csv(f'nasa_bearing_dataset/test{test}_bearing_{bearing}_feature.csv')

In [159]:
# df = []
# format_str = '%Y.%m.%d.%H.%M'
# for i in range(8):
#     df.append(pd.read_csv(f'nasa_bearing_dataset/test{test}_bearing_{i}_time.csv', index_col= 'Unnamed: 0'))
#     df[i].index = pd.to_datetime(df[i].index, format= format_str)

In [160]:
# df_f = []
# format_str = '%Y.%m.%d.%H.%M'
# for i in range(8):
#     df_f.append(pd.read_csv(f'nasa_bearing_dataset/test{test}_bearing_{i}_feature.csv', index_col= 'Unnamed: 0'))
#     df_f[i].index = pd.to_datetime(df_f[i].index, format= format_str)

In [161]:
# test = 2
# # bearing = 1

# for column in tests[test-1][0][1].columns:
#     plt.figure(figsize=(20, 5))
#     for key_bearing,bearing in tests[test-1].items():
#         plt.plot(bearing[1].index,bearing[1][column])

#     plt.legend([f'Bearing - {item}' for item in range(len(tests[test-1]))])
#     plt.xlabel("Date-Time")
#     plt.ylabel(column)
#     plt.title(column)
#     plt.show()


In [162]:
len(tests[0])

4

In [163]:
test = 1
bearing = 1

In [164]:
num = 0

In [165]:
tests[test-1][bearing -1][0].shape

KeyError: 0

In [None]:
min(tests[test-1][bearing -1][0].index)

In [None]:
max(tests[test-1][bearing -1][0].index)

In [None]:
tests[key_test][key_bearing][0].columns

In [None]:
# for key_test,bearings in tests.items():
#     print(f'test {key_test+1} started!')
#     for key_bearing,bearing in bearings.items():
#         if key_test == 0 or key_test ==1 or key_bearing == 0 or key_bearing == 1 or key_bearing == 2:
#             pass
#         else:
#             for num in range(len(tests[key_test][key_bearing][0].index)):
#                 # if num % 10 == 0:
#                 try: 
#                     image = to_fft3(tests[key_test][key_bearing][0].iloc[num,:], 20480)
#                     image.savefig(f'nasa_bearing_dataset/images/test_{key_test+1}/bearing_{key_bearing+1}/fft_{num:05}.png', pad_inches=0.1)
#                     image.close()
#                     # image.show()
#                 except Exception as e:
#                     # Print the exception message and continue
#                     print(f"An error occurred: {e}")

#             print(f'Bearing {key_bearing+1} done!')


In [None]:
test = 1
bearing = 7   # Roller element failure 2500
num = 2154
image = to_fft3(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 1
bearing = 7   # Roller element failure
num = 2154
image = to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 1
bearing = 5  # Inner race fault
num = 2150
image = to_fft3(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 1
bearing = 5  # Inner race fault
num = 2150
image = to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 2
bearing = 1   # Outer race failure
num = 980
image = to_fft3(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 3
bearing = 3   # Outer race failure
num = 6320
image = to_fft3(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 3
bearing = 3    # Outer race failure
num = 6320
image = to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][0].iloc[num,:].name

In [None]:
test = 2
bearing = 1    # Outer race failure
num = 980
image = to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][1].iloc[num,:].name

In [None]:
# num = 0
# image = to_fft2(tests[test-1][bearing -1][0].loc['2004-02-12 10:32:00',:], 20480)
# # # print(tests[test-1][bearing -1][1].iloc[num,:].name)
# # image.savefig('nasa_bearing_dataset/images/prueba02.png')
# image.show()

In [None]:
num = 1500
image = to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
image.show()
tests[test-1][bearing -1][1].iloc[num,:].name

In [None]:
num = 2100
to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
tests[test-1][bearing -1][1].iloc[num,:].name

In [None]:
num = 2155
to_fft2(tests[test-1][bearing -1][0].iloc[num,:], 20480)
tests[test-1][bearing -1][1].iloc[num,:].name

In [None]:
tests[test-1][bearing -1][0].isna().sum().sum()

In [None]:
tests[test-1][bearing -1][1].isna().sum().sum()

In [None]:
tests[test-1][bearing -1][2].isna().sum().sum()

In [None]:
tests[test-1][bearing -1][2].isna().sum()

In [None]:
# df[0].loc['2004-02-12 10:32:00':'2004-02-16 03:42:00','fault'] = 'Normal'
# df[0].loc['2004-02-16 03:52:00':'2004-02-19 06:02:00','fault'] = 'Outer Race'
# df[0].to_csv(f'nasa_bearing_dataset/target_test{test}_bearing_{bearing}_time.csv')

In [None]:
# df_f[0].loc['2004-02-12 10:32:00':'2004-02-16 03:42:00','fault'] = 'Normal'
# df_f[0].loc['2004-02-16 03:52:00':'2004-02-19 06:02:00','fault'] = 'Outer Race'
# df_f[0].to_csv(f'nasa_bearing_dataset/target_test{test}_bearing_{bearing}_feature.csv')

In [None]:
# df_freq[0].loc['2004-02-12 10:32:00':'2004-02-16 03:42:00','fault'] = 'Normal'
# df_freq[0].loc['2004-02-16 03:52:00':'2004-02-19 06:02:00','fault'] = 'Outer Race'
# df_freq[0].to_csv(f'nasa_bearing_dataset/target_test{test}_bearing_{bearing}_freq.csv')