In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

In [None]:

# Step 1: Load the preprocessed data
df_pca = pd.read_csv('preprocessed_data.csv')

# Step 2: Separate features and target
X = df_pca.drop('Crash', axis=1)
y = df_pca['Crash']

# Step 3: Train the Isolation Forest model
iso_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
iso_forest.fit(X)

# Step 4: Predict anomalies
df_pca['Anomaly'] = iso_forest.predict(X)
df_pca['Anomaly'] = df_pca['Anomaly'].map({1: 0, -1: 1})  # 1 for anomalies

# Step 5: Evaluate model performance
print(classification_report(y, df_pca['Anomaly']))

# Step 6: Visualize anomalies
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(df_pca.index, df_pca['PC1'], label='Principal Component 1')
plt.scatter(df_pca[df_pca['Anomaly'] == 1].index, df_pca[df_pca['Anomaly'] == 1]['PC1'], color='r', label='Anomalies')
plt.title('Anomalies Detected by Isolation Forest')
plt.xlabel('Time')
plt.ylabel('Principal Component 1')
plt.legend()
plt.show()