# Automated Anomaly Detection
**Objective**: Understand and practice automated anomaly detection using various techniques.

**Task**: Autoencoders for Anomaly Detection

**Steps**:
1. Data Set: Download a dataset of electricity consumption data.
2. Build an Autoencoder: Construct a simple autoencoder using a neural network for the
normal consumption data.
3. Identify Anomalies: Use the trained model to reconstruct the data and identify anomalies based on reconstruction error.
4. Visualize: Plot both the actual and reconstructed data to highlight anomalies.

In [3]:
# write your code from here

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers

# 1. Simulate or Load Electricity Consumption Data
# For demo, simulate normal consumption with anomalies
np.random.seed(42)
time_steps = 200
# Normal consumption around 100 with small noise
data_normal = np.random.normal(loc=100, scale=5, size=time_steps)

# Inject anomalies
data_normal[50] = 150  # High spike anomaly
data_normal[120] = 30  # Low dip anomaly
data_normal[180] = 160 # High spike anomaly

df = pd.DataFrame({'Consumption': data_normal})

# 2. Preprocess: Scale data to [0,1]
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

# 3. Build Autoencoder Model
input_dim = data_scaled.shape[1]  # 1 feature
encoding_dim = 8  # size of encoded representation

input_layer = Input(shape=(input_dim,))
# Encoder
encoded = Dense(encoding_dim, activation='relu',
                activity_regularizer=regularizers.l1(1e-5))(input_layer)
encoded = Dense(4, activation='relu')(encoded)
# Decoder
decoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# 4. Train Autoencoder on normal data (here full dataset, in real case train only on normal)
history = autoencoder.fit(data_scaled, data_scaled,
                          epochs=100,
                          batch_size=16,
                          shuffle=True,
                          validation_split=0.1,
                          verbose=0)

# 5. Use autoencoder to reconstruct and calculate reconstruction error
reconstructions = autoencoder.predict(data_scaled)
mse = np.mean(np.power(data_scaled - reconstructions, 2), axis=1)

# 6. Detect anomalies based on reconstruction error threshold
threshold = np.percentile(mse, 95)  # set threshold at 95th percentile
df['reconstruction_error'] = mse
df['anomaly'] = mse > threshold

# 7. Plot original vs reconstructed with anomalies highlighted
plt.figure(figsize=(14,6))
plt.plot(df.index, scaler.inverse_transform(data_scaled), label='Original Consumption')
plt.plot(df.index, scaler.inverse_transform(reconstructions), label='Reconstructed Consumption')
plt.scatter(df.index[df['anomaly']], scaler.inverse_transform(data_scaled)[df['anomaly']], color='red', label='Anomalies', s=100)
plt.title('Electricity Consumption - Autoencoder Anomaly Detection')
plt.xlabel('Time Step')
plt.ylabel('Consumption')
plt.legend()
plt.grid(True)
plt.show()

# Optional: print anomalies
print("Anomalies detected at indices:")
print(df[df['anomaly']])


ModuleNotFoundError: No module named 'tensorflow'