In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from constants import MODEL1, MODEL2, MODEL3
from model import Autoencoder
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import time

In [2]:
train_X = pd.read_csv("data/train_X.csv")
X_test_scaled = pd.read_csv("data/X_test.csv")
y_test = pd.read_csv("data/y_test.csv")

print(train_X.shape[1])  
print(X_test_scaled.shape[1]) 

train_tensor = torch.tensor(train_X.values, dtype=torch.float32)
train_dataset = TensorDataset(train_tensor)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)



18
18


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL1 = Autoencoder(input_dim=18, hidden_dims=[16, 8])
model = MODEL1.to(device)  # Change MODEL1 to MODEL2 or MODEL3 as needed
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCELoss()

num_epochs = 20

In [15]:
for i, batch in enumerate(train_loader):
    x = batch[0]

    print(f"\nBatch {i}")
    print("Dtype:", x.dtype)
    print("NaNs:", torch.isnan(x).any().item())
    print("Infs:", torch.isinf(x).any().item())
    print("Min/Max:", x.min().item(), x.max().item())
    
    break  # just one batch for now



Batch 0
Dtype: torch.float32
NaNs: False
Infs: False
Min/Max: 0.0 1.0


In [4]:
losses = []
model.train()
for epoch in range(num_epochs):
    start_time = time.time()
    epoch_loss = 0.0
    for batch in train_loader:
        x = batch[0].to(device)
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, x)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * x.size(0)

    avg_loss = epoch_loss / len(train_loader.dataset)
    losses.append(avg_loss)
    elapsed = time.time() - start_time
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}, Time: {elapsed:.2f}s")

Epoch 1/20, Loss: 0.164532, Time: 38.28s
Epoch 2/20, Loss: 0.149115, Time: 36.80s
Epoch 3/20, Loss: 0.148763, Time: 36.48s
Epoch 4/20, Loss: 0.148508, Time: 36.87s
Epoch 5/20, Loss: 0.148346, Time: 37.26s
Epoch 6/20, Loss: 0.148254, Time: 36.46s
Epoch 7/20, Loss: 0.148204, Time: 36.79s
Epoch 8/20, Loss: 0.148165, Time: 36.45s
Epoch 9/20, Loss: 0.148131, Time: 36.46s
Epoch 10/20, Loss: 0.148103, Time: 36.31s
Epoch 11/20, Loss: 0.148063, Time: 36.31s
Epoch 12/20, Loss: 0.148000, Time: 36.86s
Epoch 13/20, Loss: 0.147942, Time: 37.16s
Epoch 14/20, Loss: 0.147772, Time: 36.98s
Epoch 15/20, Loss: 0.147713, Time: 36.50s
Epoch 16/20, Loss: 0.147690, Time: 36.49s
Epoch 17/20, Loss: 0.147675, Time: 36.42s
Epoch 18/20, Loss: 0.147663, Time: 37.72s
Epoch 19/20, Loss: 0.147653, Time: 37.30s
Epoch 20/20, Loss: 0.147634, Time: 36.41s


In [None]:
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32).to(device)
model.eval()
with torch.no_grad():
    reconstructions = model(X_test_tensor).cpu().numpy()

reconstruction_errors = np.mean((X_test_scaled - reconstructions) ** 2, axis=1)
X_test_scaled = np.nan_to_num(X_test_scaled)
reconstructions = np.nan_to_num(reconstructions)

reconstruction_errors = np.mean((X_test_scaled - reconstructions) ** 2, axis=1)

threshold = np.percentile(reconstruction_errors, 95)

y_pred = (reconstruction_errors > threshold).astype(int)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

ValueError: could not determine the shape of object type 'DataFrame'

In [None]:
print(f"\nThreshold: {threshold:.6f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

fpr, tpr, _ = roc_curve(y_test, reconstruction_errors)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.4f}")
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Anomaly Detection")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

