In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt
from scipy.fft import rfft, rfftfreq
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMClassifier
import shap, os, glob, pywt, json


In [3]:
import os
import glob

DATA_PATH = "/Users/macbook/Downloads/IMS/1st_test"

files = [
    f for f in glob.glob(os.path.join(DATA_PATH, "*"))
    if os.path.isfile(f)
]

print(f"Found {len(files)} data files")
print(files[:3])  # sanity check


Found 2156 data files
['/Users/macbook/Downloads/IMS/1st_test/2003.10.22.23.54.13', '/Users/macbook/Downloads/IMS/1st_test/2003.11.17.23.42.30', '/Users/macbook/Downloads/IMS/1st_test/2003.11.18.12.02.30']


In [4]:
def load_ims_file(path):
    df = pd.read_csv(path, sep="\t", header=None)
    df.columns = [
        "b1_x", "b1_y",
        "b2_x", "b2_y",
        "b3_x", "b3_y",
        "b4_x", "b4_y"
    ]
    return df

raw_data = []

for f in files:
    try:
        raw_data.append(load_ims_file(f))
    except Exception as e:
        print(f"Skipped {f}: {e}")

print(f"Loaded {len(raw_data)} vibration snapshots")


Loaded 2156 vibration snapshots


In [5]:
raw_data[0].head(), raw_data[0].shape


(    b1_x   b1_y   b2_x   b2_y   b3_x   b3_y   b4_x   b4_y
 0 -0.146 -0.073 -0.168 -0.120 -0.024 -0.022 -0.063 -0.193
 1 -0.081 -0.110 -0.107 -0.173 -0.198 -0.151  0.002 -0.125
 2 -0.110  0.000 -0.151 -0.234  0.034  0.127  0.034 -0.176
 3 -0.269 -0.002 -0.144 -0.212  0.007 -0.051 -0.066 -0.122
 4 -0.200 -0.103 -0.215 -0.227 -0.142  0.061 -0.103 -0.059,
 (20480, 8))

In [6]:
def bandpass_filter(signal, low=10, high=5000, fs=20000, order=4):
    nyq = 0.5 * fs
    low = low / nyq
    high = high / nyq
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)


In [7]:
def preprocess(df):
    return df.apply(lambda col: bandpass_filter(col.values))


In [8]:
processed_data = [preprocess(df) for df in raw_data]
print("Preprocessing completed")


Preprocessing completed


In [9]:
WINDOW_SIZE = 2048   # samples (~0.1 sec)
STEP_SIZE = 1024     # overlap

def create_windows(signal, window=WINDOW_SIZE, step=STEP_SIZE):
    windows = []
    for start in range(0, len(signal) - window, step):
        windows.append(signal[start:start + window])
    return np.array(windows)


In [10]:
from scipy.stats import kurtosis
from scipy.fft import rfft, rfftfreq

FS = 20000  # sampling rate (NASA IMS)

def extract_features(window):
    rms = np.sqrt(np.mean(window**2))
    kurt = kurtosis(window)
    crest = np.max(np.abs(window)) / rms
    
    spectrum = np.abs(rfft(window))
    freqs = rfftfreq(len(window), 1 / FS)
    peak_freq = freqs[np.argmax(spectrum)]
    
    spec_norm = spectrum / np.sum(spectrum)
    entropy = -np.sum(spec_norm * np.log2(spec_norm + 1e-12))
    
    return [rms, kurt, crest, peak_freq, entropy]


In [11]:
features = []

for df in processed_data:
    signal = df["b1_x"].values
    windows = create_windows(signal)
    
    for w in windows:
        features.append(extract_features(w))

X = np.array(features)
print("Feature matrix shape:", X.shape)


Feature matrix shape: (38808, 5)


In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [14]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

input_dim = X_scaled.shape[1]

inp = Input(shape=(input_dim,))
x = Dense(8, activation="relu")(inp)
x = Dense(4, activation="relu")(x)
x = Dense(8, activation="relu")(x)
out = Dense(input_dim)(x)

autoencoder = Model(inp, out)
autoencoder.compile(optimizer="adam", loss="mse")
