In [None]:
# STEP 1: Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score
import joblib

: 

In [None]:
# STEP 2: Load the dataset
file_path = 'household_power_consumption.txt'
df = pd.read_csv(file_path, sep=';', na_values='?', low_memory=False)

In [None]:
# Step 3: Preprocess
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d/%m/%Y %H:%M:%S', errors='coerce')
df.drop(columns=['Date', 'Time'], inplace=True)

# Convert to numeric (include error coercion to catch non-numeric)
for col in df.columns:
    if col != 'Datetime':
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop any rows with NaNs AFTER all conversion
df.dropna(inplace=True)

In [None]:
# Step 4: Visualize
plt.figure(figsize=(10, 6))
sns.boxplot(data=df[['Global_active_power', 'Global_reactive_power', 'Voltage']])
plt.title("Boxplot of Power Features")
plt.show()

In [None]:
# Step 5: Isolation Forest for Anomaly Detection
features = ['Global_active_power', 'Global_reactive_power', 'Voltage', 
            'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']

X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Train the model
model = IsolationForest(n_estimators=100, contamination=0.01, random_state=42)
model.fit(X_scaled)

In [None]:
# Predict
df['anomaly'] = model.predict(X_scaled)
df['anomaly'] = df['anomaly'].map({1: 0, -1: 1})  # 1 means anomaly now

In [None]:
# Step 6: Visualize anomalies over time
plt.figure(figsize=(12, 6))
plt.plot(df['Datetime'], df['Global_active_power'], label='Global Active Power', alpha=0.7)
plt.scatter(df[df['anomaly'] == 1]['Datetime'], df[df['anomaly'] == 1]['Global_active_power'], 
            color='red', label='Anomaly', s=10)
plt.title("Anomalies in Global Active Power")
plt.xlabel("Datetime")
plt.ylabel("Global Active Power")
plt.legend()
plt.show()

In [None]:
# Step 7: Save model and scaler
joblib.dump(model, 'isolation_forest_model.pkl')
joblib.dump(scaler, 'scaler.pkl')