In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
time_path = "Time"
frequency_path = "Frequency"

In [3]:
def process_file(file_path):
    data = pd.read_csv(file_path, header=None) 
    return data.values.flatten()


In [4]:
def process_folder(base_path, data_type):
    dataset = []
    conditions = ["Healthy", "Faulty_15PercentCrack", "Faulty_50PercentCrack"]
    sensors = ["Motor", "Gear", "GearCasing"]
    
    for condition in conditions:
        for sensor in sensors:
            folder_path = os.path.join(base_path, condition, sensor)
            if not os.path.exists(folder_path):
                continue 
            
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                try:
                    time_series = process_file(file_path) 
                    dataset.append(list(time_series) + [condition, sensor, data_type])
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
    return dataset

In [5]:
time_data = process_folder(time_path, "Time")
frequency_data = process_folder(frequency_path, "Frequency")  

In [6]:
columns = ['Data'] + ['Condition', 'Sensor', 'Type'] 

In [7]:
time_df = pd.DataFrame(time_data)
frequency_df = pd.DataFrame(frequency_data)

In [8]:
final_dataset = pd.concat([time_df, frequency_df], ignore_index=True)

In [9]:
final_dataset = final_dataset.drop(final_dataset.columns[0], axis=1)

In [10]:
final_dataset.to_csv("simplified_gearbox_dataset.csv", index=False)

In [11]:
print(final_dataset.head())

                    1                       2                    3      \
0  3.906250E-5\t-0.002228  7.812500E-5\t-0.000796   0.000117\t0.001085   
1     1.000039\t-0.001193     1.000078\t-0.003348   1.000117\t0.001864   
2      2.000039\t0.002885      2.000078\t0.002716   2.000117\t0.002946   
3      3.000039\t0.001323     3.000078\t-0.003716  3.000117\t-0.002658   
4      4.000039\t0.006794     4.000078\t-0.001329  4.000117\t-0.005228   

                    4                      5                    6      \
0      0.000156\t0.007732  0.000195\t3.714066E-5  0.000234\t-0.006019   
1      1.000156\t0.001432     1.000195\t0.000166   1.000234\t0.002730   
2  2.000156\t-9.699950E-5    2.000195\t-0.000701  2.000234\t-0.001655   
3      3.000156\t0.000222    3.000195\t-0.001560  3.000234\t-0.001640   
4      4.000156\t0.000688     4.000195\t0.002041  4.000234\t-0.001977   

                 7                    8                      9      \
0  0.000273\t-0.002078   0.000312\t0.002464   