In [12]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# Set pandas to display all rows
pd.set_option('display.max_rows', None)

# Load dataset
data = pd.read_csv(r"C:\Users\paari\OneDrive\Desktop\live8.csv")  # Replace with your actual file path

# Define features based on available columns
features = ['ENGINE_RPM ()', 'COOLANT_TEMPERATURE ()', 'ENGINE_LOAD ()', 'FUEL_TANK ()']

# Scale features
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[features])

# Train Isolation Forest model
isolation_forest = IsolationForest(contamination=0.01, random_state=42)
isolation_forest.fit(data_scaled)

# Predict anomalies on the full dataset
data['anomaly'] = isolation_forest.predict(data_scaled)
data['anomaly'] = data['anomaly'].apply(lambda x: 1 if x == -1 else 0)  # Convert -1 to 1 for anomaly

# Introduce synthetic anomalies for evaluation
synthetic_data = data[data['anomaly'] == 0].sample(50, random_state=42).copy()

# Introduce anomalies by modifying feature values significantly
synthetic_data['ENGINE_RPM ()'] += np.random.uniform(5000, 10000, size=synthetic_data.shape[0])  # Increase RPM
synthetic_data['FUEL_TANK ()'] -= np.random.uniform(20, 50, size=synthetic_data.shape[0])        # Decrease fuel tank level

# Append synthetic anomalies to the original data
data_with_synthetic = pd.concat([data, synthetic_data], ignore_index=True)

# Update the model to detect these synthetic anomalies
data_scaled_with_synthetic = scaler.transform(data_with_synthetic[features])
data_with_synthetic['anomaly'] = isolation_forest.predict(data_scaled_with_synthetic)
data_with_synthetic['anomaly'] = data_with_synthetic['anomaly'].apply(lambda x: 1 if x == -1 else 0)

# Add "true" column based on refined domain knowledge conditions
data_with_synthetic['true'] = (
    ((data_with_synthetic['ENGINE_RPM ()'] > 0) & (data_with_synthetic['ENGINE_RPM ()'] < 500)) |  # Very low RPM for a running engine
    (data_with_synthetic['ENGINE_RPM ()'] > 8000) |                                               # Extremely high RPM
    (data_with_synthetic['COOLANT_TEMPERATURE ()'] < 50) |                                        # Low coolant temperature when engine is running
    (data_with_synthetic['COOLANT_TEMPERATURE ()'] > 120) |                                       # Overheating
    ((data_with_synthetic['ENGINE_LOAD ()'] == 0) & (data_with_synthetic['ENGINE_RPM ()'] > 0)) | # Zero load with non-zero RPM
    (data_with_synthetic['ENGINE_LOAD ()'] > 95) |                                                # High engine load
    (data_with_synthetic['FUEL_TANK ()'] > 100) |                                                 # Fuel level over 100%
    (data_with_synthetic['FUEL_TANK ()'] < 0)                                                     # Fuel level below 0%
).astype(int)  # Convert boolean to integer (1 for anomaly, 0 for normal)

# Calculate metrics for model evaluation based on domain-knowledge "true" values
if data_with_synthetic['true'].sum() > 0:
    accuracy = accuracy_score(data_with_synthetic['true'], data_with_synthetic['anomaly'])
    precision = precision_score(data_with_synthetic['true'], data_with_synthetic['anomaly'], zero_division=0)
    recall = recall_score(data_with_synthetic['true'], data_with_synthetic['anomaly'], zero_division=0)
    f1 = f1_score(data_with_synthetic['true'], data_with_synthetic['anomaly'], zero_division=0)

    # Display all data with anomaly predictions and metrics
    print("Data with Anomaly Predictions:\n", data_with_synthetic)
    
    metrics = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }
    print("\nMetrics:\n", metrics)
else:
    print("No true anomalies present in the dataset for evaluation.")



Data with Anomaly Predictions:
       ENGINE_RUN_TINE ()  ENGINE_RPM ()  VEHICLE_SPEED ()  THROTTLE ()  \
0                      0       0.000000                 0       17.647   
1                      0       0.000000                 0       17.647   
2                      0       0.000000                 0       17.647   
3                      0       0.000000                 0       17.647   
4                      0       0.000000                 0       17.647   
5                      0       0.000000                 0       17.647   
6                      0       0.000000                 0       17.647   
7                      0       0.000000                 0       17.647   
8                      3    1698.200000                 0       19.608   
9                      3    1698.200000                 0       19.608   
10                     3    1698.200000                 0       19.608   
11                     3    1621.000000                 0       19.608   
12    