In [1]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [3]:
# Step 1: Load the dataset
file_path = r"C:\Users\paari\OneDrive\Desktop\models\braking_system_rul_dataset.csv" # Replace with your file path
braking_system_data = pd.read_csv(file_path)

In [5]:
# Step 2: Feature selection (exclude RUL column)
features = braking_system_data.drop(columns=['RUL'])

In [8]:
# Step 3: Scale the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [11]:
# Step 4: Fit the Isolation Forest model
isolation_forest = IsolationForest(contamination=0.1, random_state=42)
isolation_forest.fit(scaled_features)

In [13]:
# Step 5: Predict anomalies
anomaly_scores = -isolation_forest.decision_function(scaled_features)  # Negative to align high scores with anomalies
predicted_labels = isolation_forest.predict(scaled_features)
# Convert predicted labels (-1 for anomaly, 1 for normal) to binary (1 for anomaly, 0 for normal)
predicted_labels = (predicted_labels == -1).astype(int)

In [15]:
# Add predictions and scores to the dataset
braking_system_data['Anomaly Score'] = anomaly_scores
braking_system_data['Predicted Label'] = predicted_labels

In [17]:
# Step 6: Calculate metrics (assuming true labels are available for comparison)
# For demonstration, we'll use a synthetic true label column. Replace this with your actual labels if available.
import numpy as np
np.random.seed(42)
braking_system_data['True Label'] = np.random.choice([0, 1], size=len(braking_system_data), p=[0.9, 0.1])  # Simulated ground truth

accuracy = accuracy_score(braking_system_data['True Label'], braking_system_data['Predicted Label'])


# Print the metrics
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.82


In [19]:
# Step 7: Display rows with predictions
print("\nDataset with Anomaly Predictions:")
print(braking_system_data[['Anomaly Score', 'Predicted Label', 'True Label']].head(20))  # Display top 20 rows

# Step 8: Save the dataset with predictions
braking_system_data.to_csv("braking_system_anomaly_detection_results.csv", index=False)
print("Results saved to 'braking_system_anomaly_detection_results.csv'")


Dataset with Anomaly Predictions:
    Anomaly Score  Predicted Label  True Label
0       -0.069350                0           0
1       -0.048592                0           1
2       -0.108760                0           0
3        0.018710                1           0
4       -0.118752                0           0
5       -0.081078                0           0
6       -0.039628                0           0
7       -0.062151                0           0
8       -0.082071                0           0
9       -0.075285                0           0
10      -0.117635                0           0
11      -0.032325                0           1
12      -0.005564                0           0
13      -0.040455                0           0
14      -0.088920                0           0
15      -0.088855                0           0
16      -0.007263                0           0
17       0.003036                1           0
18      -0.108731                0           0
19      -0.068826        