In [11]:
#Imports and Environments setup

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy.stats import kurtosis, skew
from sklearn.ensemble import IsolationForest

In [13]:
#Load Dataset 

In [14]:
df_train = pd.read_csv("dataset1.csv")
df_test = pd.read_csv("dataset2.csv")

In [15]:
# Data Structure Inspection

In [16]:
df_train.shape
df_train.columns
df_train.head()

Unnamed: 0,condition,vibration_0,vibration_1,vibration_2,vibration_3,vibration_4,vibration_5,vibration_6,vibration_7,vibration_8,...,vibration_477,vibration_478,vibration_479,vibration_480,vibration_481,vibration_482,vibration_483,vibration_484,vibration_485,label
0,normal,0.024836,0.111913,0.268353,0.425919,0.446902,0.549247,0.73431,0.778831,0.791603,...,0.278103,0.529282,0.538796,0.57706,0.628049,0.678297,0.840837,0.853121,0.867475,6
1,normal,-0.032329,0.016103,0.224372,0.253241,0.276873,0.418014,0.412934,0.429033,0.608956,...,0.904792,0.828028,0.863587,0.712925,0.788273,0.644256,0.519576,0.539455,0.438305,6
2,normal,0.042032,-0.011598,0.019749,-0.031413,0.061426,-0.016212,0.046681,0.184736,0.206779,...,-0.571009,-0.573756,-0.520952,-0.610621,-0.508896,-0.626743,-0.758805,-0.648551,-0.682308,6
3,normal,0.064488,0.075939,0.077562,0.065323,0.157914,0.167365,0.222016,0.321173,0.415419,...,0.894573,0.991867,1.010272,1.047118,1.001895,1.014782,0.980393,1.012239,0.978645,6
4,normal,0.031079,0.068739,0.192628,0.25754,0.398561,0.420336,0.411679,0.637037,0.59789,...,-0.669731,-0.796681,-0.838401,-0.957425,-0.893086,-0.921122,-0.976522,-0.933464,-1.015111,6


In [17]:
#Track Section Assignment

In [18]:
# Define simulated track sections (for demo/testing)
NUM_SECTIONS = 4
SECTIONS = [f"KM_{i}" for i in range(1, NUM_SECTIONS + 1)]

print("Simulated Track Sections:", SECTIONS)

Simulated Track Sections: ['KM_1', 'KM_2', 'KM_3', 'KM_4']


In [19]:
# Select Vibration Columns

In [20]:
vibration_cols = [c for c in df_train.columns if c.startswith("vibration_")]

In [21]:
# DataType Cleaning

In [22]:
df_train[vibration_cols] = df_train[vibration_cols].apply(
    pd.to_numeric, errors="coerce"
)
df_test[vibration_cols] = df_test[vibration_cols].apply(
    pd.to_numeric, errors="coerce"
)

df_train[vibration_cols] = df_train[vibration_cols].fillna(
    df_train[vibration_cols].mean()
)
df_test[vibration_cols] = df_test[vibration_cols].fillna(
    df_train[vibration_cols].mean()
)

In [23]:
#Feature Extraction 

In [24]:
def extract_features(row):
    return [
        np.mean(row),
        np.std(row),
        np.max(row) - np.min(row),
        np.sqrt(np.mean(row**2)),
        kurtosis(row),
        skew(row)
    ]

In [25]:
#Feature Matrix Creation

In [26]:
X_train = np.array([
    extract_features(df_train.loc[i, vibration_cols].values)
    for i in df_train.index
])

AttributeError: 'numpy.dtypes.ObjectDType' object has no attribute 'dtype'

In [None]:
# Now the raw vibration is ML Ready

In [None]:
# Train Isolation Forest

In [None]:
iso_forest = IsolationForest(
    n_estimators=300,
    contamination=0.05,
    random_state=42
)

iso_forest.fit(X_train)

In [None]:
# Test & Score Anomalies

In [None]:
X_test = np.array([
    extract_features(df_test.loc[i, vibration_cols].values)
    for i in df_test.index
])

scores = -iso_forest.score_samples(X_test)

In [None]:
#Visualization

In [None]:
plt.figure()
plt.plot(scores)
plt.title("Anomaly Scores per Vibration Window")
plt.xlabel("Window Index")
plt.ylabel("Anomaly Score")
plt.show()

In [None]:
#FURTHER ADD ONS for testing and understanding 

In [None]:
# testing

In [None]:
print("Number of trees:", len(iso_forest.estimators_))
print("Feature importance not applicable (unsupervised model)")

In [None]:
#Synthetic Tampering

In [None]:
signal_example = df_test[vibration_cols].iloc[0].values.copy()

# Inject artificial tampering spikes
signal_example[100:120] += 5

tampered_features = extract_features(signal_example)

normal_score = -iso_forest.score_samples([X_test[0]])[0]
tampered_score = -iso_forest.score_samples([tampered_features])[0]

print("Normal score:", normal_score)
print("Tampered score:", tampered_score)

In [None]:
#Threshold Logic

In [None]:
threshold = np.percentile(scores, 95)

alerts = scores > threshold
print("Number of high-risk alerts:", alerts.sum())

In [None]:
# FUSION-READY SENSOR OUTPUT

In [None]:
sensor_events = []

for i in range(len(scores)):
    event = {
        "window_id": i,
        "anomaly_score": float(scores[i]),
        "persistence_score": float(persistence_scores[i]),
        "risk_level": risk_levels[i],
        "explanation": explanations[i]
    }
    sensor_events.append(event)

# Preview final sensor output
sensor_events[:3]
