In [None]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

# Step 2: Load your dataset
# Replace 'your_file.csv' with the actual filename
df = pd.read_csv('your_file.csv')
print(df.head())  # Preview the data

# Step 3: Basic data cleaning (optional)
# Drop missing values if any
df.dropna(inplace=True)

# Optional: Convert categorical columns to numeric using one-hot encoding
# df = pd.get_dummies(df)

# Step 4: Feature Scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

# Step 5: Initialize the Isolation Forest model
iso_forest = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
iso_forest.fit(scaled_data)

# Step 6: Predict anomalies
df['anomaly'] = iso_forest.predict(scaled_data)

# Step 7: Convert -1 (anomaly), 1 (normal) to readable format
df['anomaly'] = df['anomaly'].map({1: 'Normal', -1: 'Anomaly'})

# Step 8: Visualize anomaly counts
sns.countplot(x='anomaly', data=df)
plt.title('Anomaly Detection Results')
plt.show()

# Step 9: View anomalies
anomalies = df[df['anomaly'] == 'Anomaly']
print(anomalies)

# Optional: Save results to CSV
df.to_csv('anomaly_detection_output.csv', index=False)