Implement anomaly detection for given credit card dataset using Autoencoder and 
build the model by using the following steps: 
a. Import required libraries 
b. Upload / access the dataset 
c. Encoder converts it into latent representation 
d. Decoder networks convert it back to the original input 
e. Compile the models with Optimizer, Loss, and Evaluation Metrics

In [None]:
# --- a. Import required libraries ---
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

tf.random.set_seed(42)
np.random.seed(42)


In [11]:
# --- b. Upload / access the dataset ---
data = pd.read_csv("creditcard.csv")

X = data.drop(["Time", "Class"], axis=1).values
y = data["Class"].values

# time-aware split
n = len(data)
i1 = int(0.8*n)

X_tr_raw, y_tr = X[:i1], y[:i1]
X_va_raw, y_va = X[i1:], y[i1:]

# train autoencoder only on normal data
Xtr_norm = X_tr_raw[y_tr == 0]

# scale based ONLY on normal training data
scaler = MinMaxScaler().fit(Xtr_norm)
Xtr = scaler.transform(Xtr_norm)
Xva = scaler.transform(X_va_raw)


In [12]:
# --- c. Encoder converts into latent representation ---
d = Xtr.shape[1]
inp = Input(shape=(d,))
x = Dense(32, activation="relu")(inp)
x = Dense(16, activation="relu")(x)
z = Dense(8,  activation="relu")(x)   # latent (bottleneck)


In [13]:
# --- d. Decoder reconstructs back ---
x = Dense(16, activation="relu")(z)
x = Dropout(0.1)(x)
out = Dense(d, activation="sigmoid")(x)

autoencoder = Model(inp, out)


In [22]:
# --- e. Compile + Train + Evaluate ---
autoencoder.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")

cb = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

autoencoder.fit(
    Xtr, Xtr,
    epochs=100,
    batch_size=256,
    shuffle=True,
    validation_split=0.1,
    callbacks=[cb],
    verbose=1
)

# reconstruction error function
def recon_err(m, X):
    R = m.predict(X, verbose=0)
    return np.mean((X - R)**2, axis=1)

err_tr = recon_err(autoencoder, Xtr)
err_va = recon_err(autoencoder, Xva)

# threshold at 99.5 percentile of normal-train errors
thr = np.percentile(err_tr, 99.5)

yhat_va = (err_va > thr).astype(int)



Epoch 1/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 2/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 3/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 4/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 5/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 6/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 7/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 8/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 9/100
[1m800/800[0m [32

In [23]:
print("Threshold:", thr)
print("Confusion matrix:\n", confusion_matrix(y_va, yhat_va))
print(classification_report(y_va, yhat_va, digits=4, target_names=["normal","fraud"]))



Threshold: 0.006922152342632339
Confusion matrix:
 [[56632   255]
 [   29    46]]
              precision    recall  f1-score   support

      normal     0.9995    0.9955    0.9975     56887
       fraud     0.1528    0.6133    0.2447        75

    accuracy                         0.9950     56962
   macro avg     0.5762    0.8044    0.6211     56962
weighted avg     0.9984    0.9950    0.9965     56962

