In [None]:
# DEEPLEARNING ASSIGNMENT -2
# For the given dataset with 9 features which are 6 analog and 3 digital,
# detect the frequency of the anomalies present in the dataset and the date and time of the occurrence of the anomalies using GAN with Binary Cross-Entropy (BCE) as the loss

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

# Load dataset
df = pd.read_csv('Dataset 14.csv')

# combining 'date' and time into 'timestamp'
df['Timestamp'] = pd.to_datetime(df['Date\t'] + ' ' + df['Time'], format="%d-%m-%Y %M:%S.%f")
df.set_index('Timestamp', inplace=True)
df.drop(['Date\t', 'Time'], axis=1, inplace=True)
df.head

<bound method NDFrame.head of                              P1      P2       P3       P4  P5  P6  P7  P8  P9
Timestamp                                                                    
2011-10-13 00:42:19.700 -0.0469 -0.0385  0.00557  0.01126  36  51   0   0   0
2011-10-13 00:42:23.300 -0.0469 -0.0385  0.00557  0.01126  37  51   0   0   0
2011-10-13 00:42:28.400 -0.0469 -0.0385  0.00557  0.01126  37  51   0   0   0
2011-10-13 00:42:29.900 -0.0469 -0.0385  0.00557  0.01126  37  51   0   0   0
2011-10-13 00:42:31.400 -0.0469 -0.0385  0.00557  0.01126  37  51   0   0   0
...                         ...     ...      ...      ...  ..  ..  ..  ..  ..
2011-11-18 00:39:42.500 -0.0469 -0.0385  0.00577  0.01131  37  51   0   0   0
2011-11-18 00:39:43.000 -0.0469 -0.0385  0.00582  0.01131  37  51   0   0   0
2011-11-18 00:39:44.600 -0.0469 -0.0385  0.00582  0.01131  37  51   0   0   0
2011-11-18 00:39:46.100 -0.0469 -0.0385  0.00582  0.01131  37  51   0   0   0
2011-11-18 00:43:55.500 -0.0469 -0

In [None]:
# Normalise numerical features
scaler = MinMaxScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Generator model
generator = Sequential([
    Dense(16, activation='relu', input_shape=(9,)),
    Dense(9, activation='sigmoid')
])

# Discriminator model
discriminator = Sequential([
    Dense(16, activation='relu', input_shape=(9,)),
    Dense(1, activation='sigmoid')
])

# Combined model --GAN
discriminator.trainable = False
gan_input = Input(shape=(9,))
x = generator(gan_input)
gan_output = discriminator(x)
gan = Model(gan_input, gan_output)
gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

# Compile discriminator
discriminator.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

# Compile GAN
gan.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

# Training the GAN
epochs = 5000
batch_size = 32

for epoch in range(epochs):
    idx = np.random.randint(0, df_normalized.shape[0], batch_size)
    real_data = df_normalized.iloc[idx]

    noise = np.random.normal(0, 1, size=[batch_size, 9])
    generated_data = generator.predict(noise)

    X = np.concatenate([real_data, generated_data])
    y_dis = np.zeros(2 * batch_size)
    y_dis[:batch_size] = 0.9

    d_loss = discriminator.train_on_batch(X, y_dis)

    noise = np.random.normal(0, 1, size=[batch_size, 9])
    y_gen = np.ones(batch_size)

    g_loss = gan.train_on_batch(noise, y_gen)

    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Discriminator Loss: {d_loss} | Generator Loss: {g_loss}")

# Generate synthetic normal data
noise = np.random.normal(0, 1, size=[df_normalized.shape[0], 9])
synthetic_normal_data = generator.predict(noise)

# Calculate BCE loss between actual and synthetic normal data
bce_loss = np.mean(np.abs(df_normalized - synthetic_normal_data), axis=1)

# anomaly threshold
anomaly_threshold = 0.7


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 100 | Discriminator Loss: 0.6860936880111694 | Generator Loss: 0.9977630376815796
Epoch 200 | Discriminator Loss: 0.6945117712020874 | Generator Loss: 0.9649803638458252
Epoch 300 | Discriminator Loss: 0.7094565629959106 | Generator Loss: 0.935075044631958
Epoch 400 | Discriminator Loss: 0.7194341421127319 | Generator Loss: 0.9060400724411011
Epoch 500 | Discriminator Loss: 0.729933500289917 | Generator Loss: 0.8847949504852295
Epoch 600 | Discriminator Loss: 0.7435872554779053 | Generator Loss: 0.8640073537826538
Epoch 700 | Discriminator Loss: 0.7482898235321045 | Generator Loss: 0.8379005789756775
Epoch 800 | Discriminator Loss: 0.7501879930496216 | Generator Loss: 0.8230727314949036
Epoch 900 | Discriminator Loss: 0.7601786255836487 | Generator Loss: 0.8134691715240479
Epoch 1000 | Discriminator Loss: 0.7625596523284912 | Generator Loss: 0.7974849939346313
Epoch 1100 | Discriminator Loss: 0.7722040414810181 | Ge

In [None]:
# find anomalies
anomalies = df.reset_index()[bce_loss > anomaly_threshold]

# Get timestamps of anomalies
anomaly_timestamps = anomalies['Timestamp']

# prinitng the timestamps
print("Anomaly Timestamps:")
print(anomaly_timestamps)


Anomaly Timestamps:
4299     2011-10-13 00:28:08.800
4300     2011-10-13 00:28:10.300
138821   2011-11-09 00:28:10.300
Name: Timestamp, dtype: datetime64[ns]
