In [47]:
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
from scipy.ndimage import zoom
from PIL import Image
import random
from tqdm import tqdm
import pandas as pd

SCRIPT_PATH = os.getcwd()
INPUT_DIR = os.path.abspath(os.path.join(SCRIPT_PATH, '..', 'data', 'IntermediateData'))
OUTPUT_DIR = os.path.join(SCRIPT_PATH, '..', 'data', 'preprocessed', 'spectrograms')
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [48]:
IMG_SIZE = (224, 224)
SPLIT_RATIOS = (0.7, 0.15, 0.15)  # train, val, test
SEED = 42
random.seed(SEED)

In [49]:
def compute_spectrogram_normalized(signal, fs):
    """
    Compute spectrogram and return normalized 2D array.
    """
    f, t, Sxx = spectrogram(signal, fs=fs, nperseg=256, noverlap=128)
    Sxx_db = 10 * np.log10(Sxx + 1e-10)
    # Normalize to [0, 1]
    Sxx_norm = (Sxx_db - Sxx_db.min()) / (Sxx_db.max() - Sxx_db.min() + 1e-10)
    return Sxx_norm

In [50]:
def spec_to_rgb_image(spec):
    """Convert normalized spectrogram to RGB image by replicating across channels."""
    gray = (spec * 255).astype(np.uint8)
    return np.stack([gray, gray, gray], axis=-1)

In [51]:
def combine_spectrograms_rgb(spec1, spec2):
    """
    Combine two spectrograms into RGB channels for better distinction.
    Method: spec1 -> Red channel, spec2 -> Green channel, difference -> Blue channel
    Based on multi-modal fusion techniques (Wang et al., 2019)
    """
    # Ensure same shape
    if spec1.shape != spec2.shape:
        ratio = np.array(spec2.shape) / np.array(spec1.shape)
        spec1 = zoom(spec1, ratio, order=1)
    
    # Create RGB image
    h, w = spec1.shape
    rgb_img = np.zeros((h, w, 3), dtype=np.uint8)
    
    # R channel: CURRENT signal
    rgb_img[:, :, 0] = (spec1 * 255).astype(np.uint8)
    
    # G channel: RPM signal
    rgb_img[:, :, 1] = (spec2 * 255).astype(np.uint8)
    
    # B channel: Normalized difference (highlights discrepancies)
    diff = np.abs(spec1 - spec2)
    diff_norm = (diff - diff.min()) / (diff.max() - diff.min() + 1e-10)
    rgb_img[:, :, 2] = (diff_norm * 255).astype(np.uint8)
    
    return rgb_img

In [52]:
def process_and_split_dataset(df, label, output_root=OUTPUT_DIR, fs_estimate=1000,
                              target_size=IMG_SIZE, split_ratios=SPLIT_RATIOS, seed=SEED):
    """
    Generates spectrograms in 3 variants: combined RGB, current only, speed only.
    Saves to: combined/, current/, speed/ subdirectories.
    """
    np.random.seed(seed)
    random.seed(seed)

    # Create folders for all 3 variants
    variants = ['combined', 'current', 'speed']
    subsets = ['train', 'val', 'test']
    for variant in variants:
        for subset in subsets:
            os.makedirs(os.path.join(output_root, variant, subset, label), exist_ok=True)

    experiment_ids = df['Experiment ID'].unique()
    n_total = len(experiment_ids)

    n_train = int(split_ratios[0] * n_total)
    n_val = int(split_ratios[1] * n_total)
    # rest to test
    random.shuffle(experiment_ids)
    split_sets = {
        'train': experiment_ids[:n_train],
        'val': experiment_ids[n_train:n_train + n_val],
        'test': experiment_ids[n_train + n_val:]
    }

    print(f"Generating spectrograms for {label}: "
          f"{n_train} train / {n_val} val / {n_total - n_train - n_val} test")

    for subset, ids in split_sets.items():
        for exp_id in tqdm(ids, desc=f"{label} – {subset}"):
            tmp = df[df['Experiment ID'] == exp_id]
            time = tmp['Time (s)'].values
            current = tmp['CURRENT (A)'].values
            speed = tmp['ROTO (RPM)'].values

            # --- Estimate sample frequency (fs) ---
            dt = np.median(np.diff(time))
            if dt <= 0:
                continue
            fs = 1.0 / dt  # Hz

            # --- Generate normalized spectrograms ---
            spec_curr = compute_spectrogram_normalized(current, fs)
            spec_speed = compute_spectrogram_normalized(speed, fs)

            # --- 1. Save COMBINED RGB ---
            rgb_combined = combine_spectrograms_rgb(spec_curr, spec_speed)
            combined_img = Image.fromarray(rgb_combined).resize(target_size, Image.LANCZOS)
            out_path_combined = os.path.join(output_root, 'combined', subset, label, f"exp{exp_id}.png")
            combined_img.save(out_path_combined, format="PNG", compress_level=0)

            # --- 2. Save CURRENT only (RGB) ---
            curr_rgb = spec_to_rgb_image(spec_curr)
            curr_img = Image.fromarray(curr_rgb).resize(target_size, Image.LANCZOS)
            out_path_curr = os.path.join(output_root, 'current', subset, label, f"exp{exp_id}.png")
            curr_img.save(out_path_curr, format="PNG", compress_level=0)

            # --- 3. Save SPEED only (RGB) ---
            speed_rgb = spec_to_rgb_image(spec_speed)
            speed_img = Image.fromarray(speed_rgb).resize(target_size, Image.LANCZOS)
            out_path_speed = os.path.join(output_root, 'speed', subset, label, f"exp{exp_id}.png")
            speed_img.save(out_path_speed, format="PNG", compress_level=0)

    print(f"\n✅ Saved spectrograms for {label} to {output_root}\n")

In [53]:
faulty = pd.read_csv(os.path.join(INPUT_DIR, 'faulty.csv'))
healthy = pd.read_csv(os.path.join(INPUT_DIR, 'healthy.csv'))
faulty_zip = pd.read_csv(os.path.join(INPUT_DIR, 'faulty_zip.csv'))
healthy_zip = pd.read_csv(os.path.join(INPUT_DIR, 'healthy_zip.csv'))

process_and_split_dataset(faulty, "faulty")
process_and_split_dataset(healthy, "healthy")
process_and_split_dataset(faulty_zip, "faulty_zip")
process_and_split_dataset(healthy_zip, "healthy_zip")


Generating spectrograms for faulty: 32 train / 6 val / 8 test


faulty – train: 100%|██████████| 32/32 [00:00<00:00, 77.18it/s]
faulty – val: 100%|██████████| 6/6 [00:00<00:00, 75.03it/s]
faulty – test: 100%|██████████| 8/8 [00:00<00:00, 71.03it/s]



✅ Saved spectrograms for faulty to C:\Users\Jakub\Kuba_lokalne\studia\Praca_magisterska\Projekt\Fault-detection-in-mechanical-devices\preprocessing\..\data\preprocessed\spectrograms

Generating spectrograms for healthy: 32 train / 6 val / 8 test


healthy – train: 100%|██████████| 32/32 [00:00<00:00, 75.88it/s]
healthy – val: 100%|██████████| 6/6 [00:00<00:00, 78.29it/s]
healthy – test: 100%|██████████| 8/8 [00:00<00:00, 79.42it/s]



✅ Saved spectrograms for healthy to C:\Users\Jakub\Kuba_lokalne\studia\Praca_magisterska\Projekt\Fault-detection-in-mechanical-devices\preprocessing\..\data\preprocessed\spectrograms

Generating spectrograms for faulty_zip: 32 train / 6 val / 8 test


faulty_zip – train: 100%|██████████| 32/32 [00:00<00:00, 73.37it/s]
faulty_zip – val: 100%|██████████| 6/6 [00:00<00:00, 76.65it/s]
faulty_zip – test: 100%|██████████| 8/8 [00:00<00:00, 75.42it/s]



✅ Saved spectrograms for faulty_zip to C:\Users\Jakub\Kuba_lokalne\studia\Praca_magisterska\Projekt\Fault-detection-in-mechanical-devices\preprocessing\..\data\preprocessed\spectrograms

Generating spectrograms for healthy_zip: 32 train / 6 val / 8 test


healthy_zip – train: 100%|██████████| 32/32 [00:00<00:00, 76.79it/s]
healthy_zip – val: 100%|██████████| 6/6 [00:00<00:00, 78.08it/s]
healthy_zip – test: 100%|██████████| 8/8 [00:00<00:00, 77.02it/s]


✅ Saved spectrograms for healthy_zip to C:\Users\Jakub\Kuba_lokalne\studia\Praca_magisterska\Projekt\Fault-detection-in-mechanical-devices\preprocessing\..\data\preprocessed\spectrograms




