<a href="https://colab.research.google.com/github/AliciaFalconCaro/PythonColabExamples/blob/main/AnomalyDetectionWithAutoencodersExample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This example is based on the tutorial presented in: https://www.geeksforgeeks.org/anomaly-detection-with-tensorflow/

Now we load the dataset, which is available on the Drive folder. The dataset was obtained from here: https://drive.google.com/file/d/1M4CIY_xH-8ySb615sdFwPvmWVzMLK8uR/view?usp=sharing

In [None]:
import csv
import pandas as pandas

Dataset = pandas.read_csv('creditcard.csv')

#let's visualize part of the data
#print(Dataset)

In [None]:
# We remove (drop) the "Time" column as it may not be relevant for anomaly detection
Dataset = Dataset.drop(['Time'], axis=1)

In [None]:
#Data cleaning and preprocessing
from sklearn.preprocessing import StandardScaler

# Standardize the features
scaler = StandardScaler()
Dataset['Amount'] = scaler.fit_transform(Dataset['Amount'].values.reshape(-1, 1))
Dataset['Class'] = Dataset['Class'].astype(str) # Convert Class column to string for one-hot encoding

# Create one-hot encoding for the 'Class' column
Dataset = pandas.get_dummies(Dataset, columns=['Class'], prefix=['Class'])

In [None]:
# Split the dataset into train and test sets (Train:0.8/Test:0.2)

from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(Dataset, test_size=0.2, random_state=42)

# Extract features (X) and labels (y) for training and testing
X_train = train_data.drop(['Class_0', 'Class_1'], axis=1).values
y_train = train_data[['Class_0', 'Class_1']].values

X_test = test_data.drop(['Class_0', 'Class_1'], axis=1).values
y_test = test_data[['Class_0', 'Class_1']].values


In [None]:
#Machine Learning Training
import tensorflow as tf
from keras import layers, models

# Build the Autoencoder Model
def build_autoencoder(input_shape):
	model = models.Sequential()
	# Encoder layer part
	model.add(layers.InputLayer(input_shape=input_shape))
	model.add(layers.Dense(64, activation='relu'))
	model.add(layers.Dense(32, activation='relu'))
	model.add(layers.Dense(16, activation='relu')) # bottleneck layer
	# Decoder layer part
	model.add(layers.Dense(32, activation='relu'))
	model.add(layers.Dense(64, activation='relu'))
	model.add(layers.Dense(input_shape, activation='tanh'))
	return model


input_shape = X_train.shape[1]
autoencoder = build_autoencoder(input_shape)

# Compile the Model
autoencoder.compile(optimizer='rmsprop', loss='mse', metrics=['accuracy'])

# Train the Autoencoder
history = autoencoder.fit(X_train, X_train, epochs=25, batch_size=64, shuffle=False, validation_data=(X_test, X_test))


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
#Model Evaluation
import numpy as np
from sklearn.metrics import accuracy_score

# Evaluate the Autoencoder
predictions = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - predictions, 2), axis=1)

# Set a threshold for anomaly detection
threshold = 0.6

# Classify anomalies based on the threshold
anomalies = mse > threshold

# Evaluate the Anomaly Detection Model
y_true = np.argmax(y_test, axis=1)
y_pred = anomalies.astype(int)

accuracy = accuracy_score(y_true, y_pred)
print(f'Test Accuracy: {accuracy:.4f}')
