# Automated Anomaly Detection
**Objective**: Understand and practice automated anomaly detection using various techniques.

**Task**: Autoencoders for Anomaly Detection

**Steps**:
1. Data Set: Download a dataset of electricity consumption data.
2. Build an Autoencoder: Construct a simple autoencoder using a neural network for the
normal consumption data.
3. Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
4. Visualize: Plot both the actual and reconstructed data to highlight anomalies.

In [1]:
# write your code from here
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Step 1: Data Set
# Download the 'household_power_consumption.txt' file from the UCI Machine Learning Repository
# and place it in the same directory as this script.
# Link: https://archive.ics.uci.edu/ml/datasets/Individual+household+electric+power+consumption

# Load the dataset
df = pd.read_csv('household_power_consumption.txt', sep=';', low_memory=False, na_values=['?'])

# Preprocessing
df['Global_active_power'].fillna(df['Global_active_power'].mean(), inplace=True)
df['Global_reactive_power'].fillna(df['Global_reactive_power'].mean(), inplace=True)
df['Voltage'].fillna(df['Voltage'].mean(), inplace=True)
df['Global_intensity'].fillna(df['Global_intensity'].mean(), inplace=True)
df['Sub_metering_1'].fillna(df['Sub_metering_1'].mean(), inplace=True)
df['Sub_metering_2'].fillna(df['Sub_metering_2'].mean(), inplace=True)
df['Sub_metering_3'].fillna(df['Sub_metering_3'].mean(), inplace=True)

df['Timestamp'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
df = df.drop(['Date', 'Time'], axis=1)
df = df.set_index('Timestamp')

daily_consumption = df[['Global_active_power']].resample('D').sum()

scaler = MinMaxScaler()
scaled_consumption = scaler.fit_transform(daily_consumption)

train_size = int(len(scaled_consumption) * 0.8)
train_data = scaled_consumption[:train_size]
test_data = scaled_consumption[train_size:]

# Step 2: Build an Autoencoder
input_dim = train_data.shape[1]
encoding_dim = 2

autoencoder = Sequential([
    Dense(encoding_dim, activation='relu', input_shape=(input_dim,)),
    Dense(input_dim, activation='sigmoid')
])

autoencoder.compile(optimizer='adam', loss='mse')

epochs = 100
batch_size = 32
history = autoencoder.fit(train_data, train_data, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=0)

print("Autoencoder trained.")

# Step 3: Identify Anomalies
reconstructed_data = autoencoder.predict(test_data)
mse = np.mean(np.square(test_data - reconstructed_data), axis=1)
threshold = np.quantile(mse, 0.95)
anomalies = test_data[mse > threshold]
anomalies_indices = np.where(mse > threshold)[0]

print(f"Threshold for anomaly detection (95th percentile MSE): {threshold}")
print(f"Number of anomalies detected: {len(anomalies)}")

# Step 4: Visualize
original_test = scaler.inverse_transform(test_data)
reconstructed_original = scaler.inverse_transform(reconstructed_data)
anomalies_original = scaler.inverse_transform(anomalies)

start_date = daily_consumption.index[train_size]
test_timestamps = pd.date_range(start=start_date, periods=len(test_data), freq='D')

plt.figure(figsize=(12, 6))
plt.plot(test_timestamps, original_test, label='Actual Consumption', color='blue')
plt.plot(test_timestamps, reconstructed_original, label='Reconstructed Consumption', color='green', alpha=0.7)
plt.scatter(test_timestamps[anomalies_indices], anomalies_original, color='red', label='Anomaly')
plt.title('Anomaly Detection using Autoencoder (Daily Consumption)')
plt.xlabel('Date')
plt.ylabel('Global Active Power (kW)')
plt.legend()
plt.grid(True)
plt.show()


2025-05-12 09:12:36.006314: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-12 09:12:36.243412: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-12 09:12:36.389260: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747041156.727485   60908 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747041156.819614   60908 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747041157.488464   60908 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

FileNotFoundError: [Errno 2] No such file or directory: 'household_power_consumption.txt'