In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import glob
from scipy.signal import detrend, get_window

In [2]:
def compute_skewness(x):
    
    n = len(x)
    third_moment = np.sum((x - np.mean(x))**3) / n
    s_3 = np.std(x, ddof = 1) ** 3
    return third_moment/s_3

In [3]:
def compute_kurtosis(x):
    
    n = len(x)
    fourth_moment = np.sum((x - np.mean(x))**4) / n
    s_4 = np.std(x, ddof = 1) ** 4
    return fourth_moment / s_4 - 3

In [4]:
def to_feature(df):
    data = pd.DataFrame()
    data['max'] = df[df.columns[:]].max(axis = 1)
    data['min'] = df[df.columns[:]].min(axis = 1)
    data['mean'] = df[df.columns[:]].mean(axis = 1)
    data['std'] = df[df.columns[:]].std(ddof=1,axis = 1)
    data['rms'] = df[df.columns[:]].apply(lambda x: np.sqrt(np.mean(x**2)), axis=1)
    data['skewness'] = df[df.columns[:]].apply(lambda x: compute_skewness(x), axis=1)
    data['kurtosis'] = df[df.columns[:]].apply(lambda x: compute_kurtosis(x), axis=1)
    data['crest_factor'] = data['max'] / data['rms']
    data['form_factor'] = data['rms'] / data['mean']
    return data

In [5]:
def fft_freq(df_time:pd.DataFrame):
    df_freq = df_time.copy()
    filter_vibr = [str(i) for i in range(len(df[0].columns))]
    df_freq = df_freq.drop(columns=filter_vibr, axis = 1)
    
    freq_data = []
    for i,row in df_time.iterrows():
        y = np.array(df_time.loc[i,filter_vibr].tolist())
        # vibration_data = data['Vibration'].values
        vibration_data = y
        # Time parameters
        sampling_rate =  20000 # Hz, example sampling rate
        n = len(vibration_data)
        time = np.arange(n) / sampling_rate

        # Perform FFT
        freq_domain = np.fft.fft(vibration_data)
        freq = np.fft.fftfreq(n, d=1/sampling_rate)

        x_freq = freq[:n//2]
        y_freq = np.abs(freq_domain)[:n//2] * 1/n

        freq_data.append(y_freq)

    df_freq = pd.DataFrame(data = freq_data)
    # df_freq = pd.concat([df_freq,pd.DataFrame(freq_data)],axis=0)
    # columns = [str(x) for x in df_freq.columns]
    # df_freq.columns = columns
    df_freq.index = df_time.index
    return df_freq


In [6]:
test_rutes = glob.glob(f"nasa_bearing_dataset/bear*/*",recursive=False)
test_rutes


['nasa_bearing_dataset\\bearing_1st_test\\1st_test',
 'nasa_bearing_dataset\\bearing_2nd_test\\2nd_test',
 'nasa_bearing_dataset\\bearing_3rd_test\\4th_test']

In [7]:
os.path.basename(test_rutes[0])

'1st_test'

In [8]:
number_test = len(test_rutes)
number_test

3

In [9]:
# dict_test_rotes = {os.path.basename(key): None for key in test_rutes}
dict_test_rotes = {}
for file_folder in test_rutes:
    dict_test_rotes[os.path.basename(file_folder)] = glob.glob(os.path.join(file_folder,f'*'),recursive=True)

list(dict_test_rotes)[0]
next(iter(dict_test_rotes.values()))[0]


'nasa_bearing_dataset\\bearing_1st_test\\1st_test\\2003.10.22.12.06.24'

In [10]:
dict_test_rotes[list(dict_test_rotes)[0]][0]

'nasa_bearing_dataset\\bearing_1st_test\\1st_test\\2003.10.22.12.06.24'

In [11]:
dict_test_rotes.keys()

dict_keys(['1st_test', '2nd_test', '4th_test'])

In [12]:
test = {key:None for key in range(len(dict_test_rotes))}

In [13]:
for index, (key,value) in enumerate(dict_test_rotes.items()):
    bearings_num = len(pd.read_csv(value[0], sep='\t',header=None).columns)
    bearings = {item:[] for item in range(bearings_num)}
    test[index] = bearings

    for vibr in value:
        vibr_df = pd.read_csv(vibr, sep='\t',header=None)
        for column in vibr_df.columns:
            vibr_df[column].name = os.path.basename(vibr)[:-3]
            test[index][column].append(vibr_df[column])
    

In [14]:
format_str = '%Y.%m.%d.%H.%M'
for key,value in test.items():
    for bearing in value:
        test[key][bearing] = {0:pd.DataFrame(test[key][bearing])}
        test[key][bearing][0].index = pd.to_datetime(test[key][bearing][0].index, format= format_str)

In [15]:
# for index, (key,value) in enumerate(dict_test_rotes.items()):
#     bearings_num = len(pd.read_csv(value[0], sep='\t',header=None).columns)
#     bearings = {item:{0:pd.DataFrame()} for item in range(bearings_num)}
#     test[index] = bearings


#     for vibr in value:
#         vibr_df = pd.read_csv(vibr, sep='\t',header=None)
#         moment:list[pd.DataFrame()] = [pd.DataFrame()]*bearings_num
#         for column in vibr_df.columns:
#             vibr_df[column].name = os.path.basename(vibr)[:-3] # dataserie
#             moment[column] = vibr_df[column].to_frame().T
#             test[index][column][0] = pd.concat([test[index][column][0],moment[column]], axis=0)
            

In [16]:
len(test[2][0])

1

In [17]:
test[2][0][0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20470,20471,20472,20473,20474,20475,20476,20477,20478,20479
2004-03-04 09:27:00,0.034,0.103,0.095,0.000,0.005,0.010,0.005,-0.027,-0.088,-0.071,...,-0.054,0.020,0.100,0.049,-0.083,-0.010,0.117,0.024,-0.090,-0.129
2004-03-04 09:32:00,-0.105,-0.039,0.010,-0.015,0.027,-0.037,-0.007,-0.027,-0.046,-0.002,...,0.105,0.068,-0.007,-0.007,0.042,0.039,-0.051,-0.054,-0.071,-0.007
2004-03-04 09:42:00,-0.005,0.056,0.034,0.020,0.049,-0.020,-0.110,-0.010,-0.051,-0.076,...,-0.015,0.081,0.103,0.159,0.103,0.144,0.242,0.234,0.085,-0.046
2004-03-04 09:52:00,-0.010,-0.005,-0.039,-0.132,-0.117,-0.132,-0.085,-0.020,-0.120,-0.059,...,-0.063,-0.071,0.056,-0.017,-0.151,-0.139,-0.103,0.000,-0.032,-0.059
2004-03-04 10:02:00,-0.046,-0.117,-0.178,-0.100,0.046,0.093,0.112,0.042,0.166,0.066,...,-0.007,-0.063,-0.049,0.005,0.020,-0.073,0.000,0.037,0.063,0.081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2004-04-18 02:02:00,0.085,0.088,0.022,-0.076,-0.212,-0.010,0.266,0.168,-0.215,-0.530,...,0.010,0.073,0.193,-0.120,-0.330,-0.120,0.278,0.115,-0.200,-0.042
2004-04-18 02:12:00,0.046,0.039,-0.007,-0.005,-0.037,0.054,0.027,-0.134,-0.139,-0.117,...,0.161,0.046,-0.027,0.037,-0.073,-0.178,0.095,0.015,-0.134,0.159
2004-04-18 02:22:00,-0.103,0.012,-0.059,0.120,0.071,-0.137,-0.159,-0.137,-0.103,0.122,...,-0.208,-0.044,0.000,-0.090,-0.127,-0.051,-0.059,-0.024,0.142,-0.012
2004-04-18 02:32:00,-0.134,-0.144,-0.059,-0.181,-0.154,-0.068,-0.103,-0.117,-0.115,-0.061,...,0.088,-0.251,-0.320,-0.110,0.068,-0.022,-0.454,-0.247,0.168,0.098


In [None]:
pd.concat([data3,data0,data1],axis=0)

In [None]:
# bearings = {}

# bearings[0] = []
# bearings[1] = []
# bearings[2] = []
# bearings[3] = []
# # bearings[4] = []
# # bearings[5] = []
# # bearings[6] = []
# # bearings[7] = []


# for root, dirs, files in os.walk("nasa_bearing_dataset/bearing_2nd_test/2nd_test/", topdown=False):
    
#     for file_name in files:
#         path = os.path.join(root, file_name)
#         # print(file_name[:-3])
#         dataset=pd.read_csv(path, sep='\t',header=None)

#         for column in dataset.columns:
#             dataset[column].name = file_name[:-3]
#             bearings[column].append(dataset[column])

In [None]:
bearings_feature = {}

In [None]:
prueba = to_feature(test[0][0][0])

In [None]:
prueba

In [None]:
# test = 2
# for bearing in bearings:
#     pd.DataFrame(bearings[bearing]).to_csv(f'nasa_bearing_dataset/test{test}_bearing_{bearing}_time.csv')
#     to_feature(pd.DataFrame(bearings[bearing])).to_csv(f'nasa_bearing_dataset/test{test}_bearing_{bearing}_feature.csv')


In [None]:
test = 1

In [None]:
df = []
format_str = '%Y.%m.%d.%H.%M'
for i in range(8):
    df.append(pd.read_csv(f'nasa_bearing_dataset/test{test}_bearing_{i}_time.csv', index_col= 'Unnamed: 0'))
    df[i].index = pd.to_datetime(df[i].index, format= format_str)

In [None]:
df_f = []
format_str = '%Y.%m.%d.%H.%M'
for i in range(8):
    df_f.append(pd.read_csv(f'nasa_bearing_dataset/test{test}_bearing_{i}_feature.csv', index_col= 'Unnamed: 0'))
    df_f[i].index = pd.to_datetime(df_f[i].index, format= format_str)

In [None]:
df_f[0].columns

In [None]:
len(df[0].columns)

In [None]:
for col in (df_f[0].columns):  
    
        plt.figure(figsize=(20, 5))
        for i in range(8):
                plt.plot(df_f[0].index,df_f[i][col])
        # plt.plot(df_f[0].index,df_f[1][col])
        # plt.plot(df_f[0].index,df_f[2][col])
        # plt.plot(df_f[0].index,df_f[3][col])

        plt.legend(['bearing-1','bearing-2','bearing-3','bearing-4'])

        plt.xlabel("Date-Time")
        plt.ylabel(col)
        plt.title(col)
        plt.show()

In [None]:
# Load your vibration data from a CSV file
# Assume the CSV file has a single column of vibration data with a header
# data = pd.read_csv('vibration_data.csv')
def to_fft(y, sample_rate):
    # vibration_data = data['Vibration'].values
    vibration_data = y
    # Time parameters
    sampling_rate = sample_rate  # Hz, example sampling rate
    n = len(vibration_data)
    time = np.arange(n) / sampling_rate

    # Perform FFT
    freq_domain = np.fft.fft(vibration_data)
    freq = np.fft.fftfreq(n, d=1/sampling_rate)

    x_freq = freq[:n//2]
    y_freq = np.abs(freq_domain)[:n//2] * 1/n


    # Plot the results
    plt.figure(figsize=(20,6))

    # Time domain plot
    plt.subplot(2, 1, 1)
    plt.plot(time, vibration_data)
    plt.title('Time Domain')
    plt.xlabel('Time (s)')
    plt.ylabel('Vibration Amplitude')

    # Frequency domain plot
    plt.subplot(2, 1, 2)
    plt.plot(freq[:n//20], np.abs(freq_domain)[:n//20] * 1/n)
    plt.title('Frequency Domain')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Magnitude')

    plt.tight_layout()
    plt.show()


In [None]:
def to_fft2(y,sample_rate):
    # Remove DC component
    signal = detrend(y)

    # Apply a window function (e.g., Hamming window)
    # window = get_window('hamming', len(signal))
    windowed_signal = signal # * window
    # Apply FFT
    fft_result = np.fft.fft(windowed_signal)
    fs = sample_rate
    t = np.arange(0, 1, 1/fs)
    # Frequency bins
    N = len(windowed_signal)
    freqs = np.fft.fftfreq(N, 1/fs)
    # Compute the magnitude of the FFT
    magnitude = np.abs(fft_result) / N

    # Single-sided spectrum (only positive frequencies)
    half_N = N // 2
    freqs = freqs[:half_N]
    magnitude = magnitude[:half_N] * 2  # Multiply by 2 (except for DC and Nyquist component)

    # Correct the magnitude for windowing effect
    # magnitude /= np.sum(window) / len(window)

    plt.figure(figsize=(20, 6))

    # Plot time-domain signal
    plt.subplot(2, 1, 1)
    plt.plot(t, signal)
    plt.title('Time Domain Signal')
    plt.xlabel('Time [s]')
    plt.ylabel('Amplitude')

    # Plot frequency-domain signal
    plt.subplot(2, 1, 2)
    plt.plot(freqs, magnitude)
    plt.title('Frequency Domain Signal')
    plt.xlabel('Frequency [Hz]')
    plt.ylabel('Magnitude')
    # Set custom x-ticks to show more frequency markers
    # max_freq = fs / 2
    # step = 500  # Adjust this value to show more or fewer frequency markers
    # plt.xticks(np.arange(0, max_freq, step))

    plt.tight_layout()
    plt.show()

In [None]:
df[5].iloc[num,:]

In [None]:
df[5].shape

In [None]:
min(df[5].index)

In [None]:
num = 0
to_fft2(df[5].iloc[num,:], 20480)
df[4].iloc[num,:].name

In [None]:
num = 1500
to_fft2(df[5].iloc[num,:], 20480)
df[4].iloc[num,:].name

In [None]:
num = 2100
to_fft2(df[5].iloc[num,:], 20480)
df[4].iloc[num,:].name

In [None]:
num = 2155
to_fft2(df[5].iloc[num,:], 20480)
df[4].iloc[num,:].name

In [None]:
'2003-10-22 12:06:00','2003-10-22 12:09:00','2003-10-22 12:14:00'

In [None]:
df[4].drop(index=['2003-10-22 12:14:00'], inplace = True)

In [None]:
df[0].drop(index=['2004-02-19 06:22:00','2004-02-19 06:12:00'], inplace = True)

In [None]:
df[0].head()

In [None]:
df_f[0].drop(index=['2004-02-19 06:22:00','2004-02-19 06:12:00'], inplace = True)

In [None]:
df_f[0].head()

In [None]:
df_freq[0].head()

In [None]:
df[0].isna().sum().sum()

In [None]:
df_f[0].isna().sum().sum()

In [None]:
df_freq[0].isna().sum().sum()

In [None]:
freq_df = fft_freq(df[0])

In [None]:
freq_df.shape

In [None]:
df_freq = []
test = 2
for i in range(4):
    df_freq.append(fft_freq(df[i]))
    df_freq[i].to_csv(f'nasa_bearing_dataset/test{test}_bearing_{i}_freq.csv')

In [None]:
test = 2
bearing = 0

In [None]:
df[0].loc['2004-02-12 10:32:00':'2004-02-16 03:42:00','fault'] = 'Normal'
df[0].loc['2004-02-16 03:52:00':'2004-02-19 06:02:00','fault'] = 'Outer Race'
df[0].to_csv(f'nasa_bearing_dataset/target_test{test}_bearing_{bearing}_time.csv')

In [None]:
df_f[0].loc['2004-02-12 10:32:00':'2004-02-16 03:42:00','fault'] = 'Normal'
df_f[0].loc['2004-02-16 03:52:00':'2004-02-19 06:02:00','fault'] = 'Outer Race'
df_f[0].to_csv(f'nasa_bearing_dataset/target_test{test}_bearing_{bearing}_feature.csv')

In [None]:
df_freq[0].loc['2004-02-12 10:32:00':'2004-02-16 03:42:00','fault'] = 'Normal'
df_freq[0].loc['2004-02-16 03:52:00':'2004-02-19 06:02:00','fault'] = 'Outer Race'
df_freq[0].to_csv(f'nasa_bearing_dataset/target_test{test}_bearing_{bearing}_freq.csv')