In [1]:
from glob import glob
import numpy as np
import os
import pandas as pd
from scipy.io import loadmat
import torch

# DataFolder Metadata

In [2]:
healthy_dict = {
    "Folder_Name": ["K001", "K002", "K003", "K004", "K005", "K006"],
    "Damage": ["Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy"]
}
healthy_df = pd.DataFrame(healthy_dict)
print("HEALTHY BEARINGS")
print(healthy_df)

HEALTHY BEARINGS
  Folder_Name   Damage
0        K001  Healthy
1        K002  Healthy
2        K003  Healthy
3        K004  Healthy
4        K005  Healthy
5        K006  Healthy


In [3]:
artificial_dict = {
    "Folder_Name": ["KA01", "KA03", "KA05", "KA06", "KA07", "KA09", "KI01", "KI03", "KI05", "KI07", "KI08"],
    "Damage": ["Outer", "Outer", "Outer", "Outer", "Outer", "Outer", "Inner", "Inner", "Inner", "Inner", "Inner"]
}
artificial_df = pd.DataFrame(artificial_dict)
print("ARTIFICIAL DAMAGES")
print(artificial_df)

ARTIFICIAL DAMAGES
   Folder_Name Damage
0         KA01  Outer
1         KA03  Outer
2         KA05  Outer
3         KA06  Outer
4         KA07  Outer
5         KA09  Outer
6         KI01  Inner
7         KI03  Inner
8         KI05  Inner
9         KI07  Inner
10        KI08  Inner


In [4]:
real_dict = {
    "Folder_Name": ["KA04", "KA15", "KA16", "KA22", "KA30", "KI04", "KI14", "KI16", "KI17", "KI18", "KI21"],
    "Damage": ["Outer", "Outer", "Outer", "Outer", "Outer", "Inner", "Inner", "Inner", "Inner", "Inner", "Inner"]
}
real_df = pd.DataFrame(real_dict)
print("REAL DAMAGES")
print(real_df)

REAL DAMAGES
   Folder_Name Damage
0         KA04  Outer
1         KA15  Outer
2         KA16  Outer
3         KA22  Outer
4         KA30  Outer
5         KI04  Inner
6         KI14  Inner
7         KI16  Inner
8         KI17  Inner
9         KI18  Inner
10        KI21  Inner


# Constants

In [5]:
data_dir = os.path.join(os.getcwd(), "raw_data/Paderborn_university")
N_POINTS = 249600 #? 64k Sampling rate for 3.9 seconds
domains = {"Normal": "N15_M07_F10", "Rotate": "N09_M07_F10", "Load": "N15_M01_F10", "Radial": "N15_M07_F04"}

# Split files into different dataset

In [6]:
# artificial_dataset = ["K001", "K002", "K003", "K004", "K005", "K006",
#                       "KA01", "KA03", "KA05", "KA06", "KA07", "KA09", "KI01", "KI03", "KI05", "KI07", "KI08"]

In [7]:
artificial_dataset = ["K001",
                      "KA01", "KA03", "KA05", "KA07", "KI01", "KI03", "KI07"]

In [8]:
# real_dataset = ["K001", "K002", "K003", "K004", "K005", "K006",
#                "KA04", "KA15", "KA16", "KA22", "KA30", "KI14", "KI16", "KI17", "KI18", "KI21"] # KI04 was removed because it is a duplicate of KI14

In [9]:
real_dataset = ["K001",
               "KA04", "KB23", "KB27", "KI04"]

# Load datas

In [10]:
def load_data(dataset_list):
    #* This function will be responsible for extracting & label the data from the list defined above
    #* Each of folder should contain 4 conditions with 20 measurements each
    
    #? The array shape is defined here as we can calculate the total number of samples which allows us to avoid using np.concatenate
    #? This prevents np from copying the large array everytime it concatenates/update its value
    
    Normal = {"x": np.zeros((len(dataset_list) * 20, N_POINTS)), "y": np.zeros(len(dataset_list) * 20)}
    Rotate = {"x": np.zeros((len(dataset_list) * 20, N_POINTS)), "y": np.zeros(len(dataset_list) * 20)}
    Load = {"x": np.zeros((len(dataset_list) * 20, N_POINTS)), "y": np.zeros(len(dataset_list) * 20)}
    Radial = {"x": np.zeros((len(dataset_list) * 20, N_POINTS)), "y": np.zeros(len(dataset_list) * 20)}
    
    for label, foldername in enumerate(dataset_list):
        #? Each gear is assumed to be a class
        folder_path = os.path.join(data_dir, foldername)
        
        #? List with all .mat file's path in folder_path
        #? file_path should take the form cwd/raw_data/Paderborn_university/N09_M07_F10_K001_1.mat or other values
        file_path_list = glob(os.path.join(folder_path, "*.mat"))
        
        Normal_files = [file for file in file_path_list if "N15_M07_F10" in file]
        Rotate_files = [file for file in file_path_list if "N09_M07_F10" in file]
        Load_files = [file for file in file_path_list if "N15_M01_F10" in file]
        Radial_files = [file for file in file_path_list if "N15_M07_F04" in file]
        
        combined = zip(Normal_files, Rotate_files, Load_files, Radial_files)
        
        for i, (Normal_file, Rotate_file, Load_file, Radial_file) in enumerate(combined):
            # Get vibration data
            Normal_vibration = load_mat(Normal_file)
            Rotate_vibration = load_mat(Rotate_file)
            Load_vibration = load_mat(Load_file)
            Radial_vibration = load_mat(Radial_file)

            sample_id = (label * 20) + i

            Normal["x"][sample_id] = Normal_vibration
            Normal["y"][sample_id] = label

            Rotate["x"][sample_id] = Rotate_vibration
            Rotate["y"][sample_id] = label

            Load["x"][sample_id] = Load_vibration
            Load["y"][sample_id] = label

            Radial["x"][sample_id] = Radial_vibration
            Radial["y"][sample_id] = label
            
    Normal["x"],  Normal["y"] = torch.from_numpy(Normal["x"]), torch.from_numpy(Normal["y"])
    Rotate["x"],  Rotate["y"] = torch.from_numpy(Rotate["x"]), torch.from_numpy(Rotate["y"])
    Load["x"],  Load["y"] = torch.from_numpy(Load["x"]), torch.from_numpy(Load["y"])
    Radial["x"],  Radial["y"] = torch.from_numpy(Radial["x"]), torch.from_numpy(Radial["y"])
              
    return Normal, Rotate, Load, Radial
              
def load_mat(mat_file):
    #* Get the file name without .mat extension
    #? Will be used to get the relevant data when loading the mat file
    file_name = os.path.splitext(os.path.basename(mat_file))[0]
    
    mat_file_array = loadmat(mat_file)
    #* Get the vibration data
    #? vibration_data should be an array of shape (1, N_POINTS)
    vibration_data = mat_file_array[file_name]["Y"][0][0][0][6][2][:,0:N_POINTS]
    vibration_data = np.array(vibration_data)
    
    return vibration_data

In [11]:
Real_Normal, Real_Rotate, Real_Load, Real_Radial = load_data(real_dataset)
Artificial_Normal, Artificial_Rotate, Artificial_Load, Artificial_Radial = load_data(artificial_dataset)

print(Real_Normal)
print(Real_Normal["x"].shape)
print(Real_Normal["y"].shape)

{'x': tensor([[-0.0488,  0.2167,  0.2350,  ..., -0.0488,  0.3662, -0.1526],
        [ 0.1587,  0.0641, -0.0824,  ..., -0.2594, -0.2014, -0.0641],
        [-0.3204, -0.3845, -0.2655,  ...,  0.1495,  0.2167,  0.2838],
        ...,
        [ 0.5188,  0.3876,  0.4089,  ...,  0.2808,  0.1587,  0.2289],
        [ 0.2655,  0.2075,  0.0671,  ..., -0.1251, -0.1617, -0.2563],
        [-0.2258, -0.3754, -0.0732,  ...,  0.4486,  0.7812,  0.3967]],
       dtype=torch.float64), 'y': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4.], dtype=torch.float64)}
torch.Size([100, 249600])
torch.Size([100])


# Sampling

In [12]:
### CONSTANTS ###
SAMPLE_LEN = 1024
STRIDE = SAMPLE_LEN #! NO overlap
TEST_SIZE = 0.2
# N_FOLDS = 5 #! Validation set not created

In [13]:
def sample(x, y):
    # Unfold the tensor with a sliding window
    output_x = x.unfold(1, SAMPLE_LEN, STRIDE)
    
    # Reshape the output tensor to be 2D
    output_x = output_x.contiguous().view(-1, SAMPLE_LEN)
    
    # Calculate number of windows per sample
    windows_per_sample = output_x.size(0) // y.size(0)
    
    # Repeat y values for each sub-sample
    output_y = y.repeat_interleave(windows_per_sample)
    
    return output_x, output_y

In [14]:
Real_Normal["x"], Real_Normal["y"] = sample(Real_Normal["x"], Real_Normal["y"])
Real_Rotate["x"], Real_Rotate["y"] = sample(Real_Rotate["x"], Real_Rotate["y"])
Real_Load["x"], Real_Load["y"] = sample(Real_Load["x"], Real_Load["y"])
Real_Radial["x"], Real_Radial["y"] = sample(Real_Radial["x"], Real_Radial["y"])

Artificial_Normal["x"], Artificial_Normal["y"] = sample(Artificial_Normal["x"], Artificial_Normal["y"])
Artificial_Rotate["x"], Artificial_Rotate["y"] = sample(Artificial_Rotate["x"], Artificial_Rotate["y"])
Artificial_Load["x"], Artificial_Load["y"] = sample(Artificial_Load["x"], Artificial_Load["y"])
Artificial_Radial["x"], Artificial_Radial["y"] = sample(Artificial_Radial["x"], Artificial_Radial["y"])

print(Real_Normal)
print(Real_Normal["x"].shape)
print(Real_Normal["y"].shape)

{'x': tensor([[-0.0488,  0.2167,  0.2350,  ...,  0.3876,  0.0336, -3.3417],
        [ 0.1068,  0.6622,  0.0427,  ...,  0.0153, -0.1404, -0.2167],
        [-0.1617, -0.1038,  0.0793,  ..., -0.0488,  0.0580,  0.4120],
        ...,
        [-0.0763, -0.2197, -0.3296,  ...,  0.3693,  0.0702, -0.2289],
        [-0.5249, -0.4517, -0.4395,  ..., -1.0101, -1.1383, -0.1617],
        [ 0.0732,  0.0122, -0.0305,  ..., -0.0153, -0.3479, -0.3235]],
       dtype=torch.float64), 'y': tensor([0., 0., 0.,  ..., 4., 4., 4.], dtype=torch.float64)}
torch.Size([24300, 1024])
torch.Size([24300])


# Train-test split

In [15]:
def train_test_split(x, y):
    total_size = len(x)
    train_size = int(0.8 * total_size)  # 80% of the dataset
    test_size = total_size - train_size
    
    indices = torch.randperm(total_size)
    
    train_indices = indices[:train_size]
    test_indices = indices[train_size:]

    x_train = x[train_indices]
    y_train = y[train_indices]

    x_test = x[test_indices]
    y_test = y[test_indices]
    
    return x_train, y_train, x_test, y_test

In [16]:
# Set seed
torch.manual_seed(42)

Real_Normal_x_train, Real_Normal_y_train, Real_Normal_x_test, Real_Normal_y_test = train_test_split(Real_Normal["x"], Real_Normal["y"])
Real_Rotate_x_train, Real_Rotate_y_train, Real_Rotate_x_test, Real_Rotate_y_test = train_test_split(Real_Rotate["x"], Real_Rotate["y"])
Real_Load_x_train, Real_Load_y_train, Real_Load_x_test, Real_Load_y_test = train_test_split(Real_Load["x"], Real_Load["y"])
Real_Radial_x_train, Real_Radial_y_train, Real_Radial_x_test, Real_Radial_y_test = train_test_split(Real_Radial["x"], Real_Radial["y"])

Artificial_Normal_x_train, Artificial_Normal_y_train, Artificial_Normal_x_test, Artificial_Normal_y_test = train_test_split(Artificial_Normal["x"], Artificial_Normal["y"])
Artificial_Rotate_x_train, Artificial_Rotate_y_train, Artificial_Rotate_x_test, Artificial_Rotate_y_test = train_test_split(Artificial_Rotate["x"], Artificial_Rotate["y"])
Artificial_Load_x_train, Artificial_Load_y_train, Artificial_Load_x_test, Artificial_Load_y_test = train_test_split(Artificial_Load["x"], Artificial_Load["y"])
Artificial_Radial_x_train, Artificial_Radial_y_train, Artificial_Radial_x_test, Artificial_Radial_y_test = train_test_split(Artificial_Radial["x"], Artificial_Radial["y"])

print(f"{Real_Normal_x_train.shape}, {Real_Normal_y_train.shape}")
print(f"{Real_Normal_x_test.shape}, {Real_Normal_y_test.shape}")

torch.Size([19440, 1024]), torch.Size([19440])
torch.Size([4860, 1024]), torch.Size([4860])


In [17]:
# Real domain
Real_Normal_train = {"samples": Real_Normal_x_train, "labels": Real_Normal_y_train}
Real_Normal_test = {"samples": Real_Normal_x_test, "labels": Real_Normal_y_test}

Real_Rotate_train = {"samples": Real_Rotate_x_train, "labels": Real_Rotate_y_train}
Real_Rotate_test = {"samples": Real_Rotate_x_test, "labels": Real_Rotate_y_test}

Real_Load_train = {"samples": Real_Load_x_train, "labels": Real_Load_y_train}
Real_Load_test = {"samples": Real_Load_x_test, "labels": Real_Load_y_test}

Real_Radial_train = {"samples": Real_Radial_x_train, "labels": Real_Radial_y_train}
Real_Radial_test = {"samples": Real_Radial_x_test, "labels": Real_Radial_y_test}

# Artificial domain
Artificial_Normal_train = {"samples": Artificial_Normal_x_train, "labels": Artificial_Normal_y_train}
Artificial_Normal_test = {"samples": Artificial_Normal_x_test, "labels": Artificial_Normal_y_test}

Artificial_Rotate_train = {"samples": Artificial_Rotate_x_train, "labels": Artificial_Rotate_y_train}
Artificial_Rotate_test = {"samples": Artificial_Rotate_x_test, "labels": Artificial_Rotate_y_test}

Artificial_Load_train = {"samples": Artificial_Load_x_train, "labels": Artificial_Load_y_train}
Artificial_Load_test = {"samples": Artificial_Load_x_test, "labels": Artificial_Load_y_test}

Artificial_Radial_train = {"samples": Artificial_Radial_x_train, "labels": Artificial_Radial_y_train}
Artificial_Radial_test = {"samples": Artificial_Radial_x_test, "labels": Artificial_Radial_y_test}

# Save files

In [19]:
# Get parent directory
parent_dir = os.path.dirname(os.getcwd())
Real_dir = os.path.join(parent_dir, "dataset", "PU_Real")
Artificial_dir = os.path.join(parent_dir, "dataset", "PU_Artificial")

if not os.path.exists(Real_dir):
    os.makedirs(Real_dir)

if not os.path.exists(Artificial_dir):
    os.makedirs(Artificial_dir)

torch.save(Real_Normal_train, os.path.join(Real_dir, "train_Normal.pt"))
torch.save(Real_Normal_test, os.path.join(Real_dir, "test_Normal.pt"))
torch.save(Real_Rotate_train, os.path.join(Real_dir, "train_Rotate.pt"))
torch.save(Real_Rotate_test, os.path.join(Real_dir, "test_Rotate.pt"))
torch.save(Real_Load_train, os.path.join(Real_dir, "train_Load.pt"))
torch.save(Real_Load_test, os.path.join(Real_dir, "test_Load.pt"))
torch.save(Real_Radial_train, os.path.join(Real_dir, "train_Radial.pt"))
torch.save(Real_Radial_test, os.path.join(Real_dir, "test_Radial.pt"))

torch.save(Artificial_Normal_train, os.path.join(Artificial_dir, "train_Normal.pt"))
torch.save(Artificial_Normal_test, os.path.join(Artificial_dir, "test_Normal.pt"))
torch.save(Artificial_Rotate_train, os.path.join(Artificial_dir, "train_Rotate.pt"))
torch.save(Artificial_Rotate_test, os.path.join(Artificial_dir, "test_Rotate.pt"))
torch.save(Artificial_Load_train, os.path.join(Artificial_dir, "train_Load.pt"))
torch.save(Artificial_Load_test, os.path.join(Artificial_dir, "test_Load.pt"))
torch.save(Artificial_Radial_train, os.path.join(Artificial_dir, "train_Radial.pt"))
torch.save(Artificial_Radial_test, os.path.join(Artificial_dir, "test_Radial.pt"))