In [48]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset

import joblib
import pandas as pd

import sys
sys.path.append("..")  # so we can import from src

# from src.model import build_autoencoder
from src.train import load_creditcard_data, prepare_datasets, scale_data

In [49]:
import torch
from torch import nn

class Autoencoder(nn.Module):
    """
    Simple fully-connected autoencoder for tabular data.
    """
    def __init__(self, input_dim: int, latent_dim: int = 16):
        super().__init__()

        # Encoder: input -> latent
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, latent_dim),
        )

        # Decoder: latent -> input
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        z = self.encoder(x)
        recon = self.decoder(z)
        return recon

def build_autoencoder(input_dim: int, latent_dim: int = 16) -> Autoencoder:
    return Autoencoder(input_dim=input_dim, latent_dim=latent_dim)


In [52]:
import pandas as pd
import numpy as np
from pathlib import Path

# Try loading directly without the function first
csv_path = Path("../data/creditcard.csv")

try:
    df = pd.read_csv(str(csv_path))
    print(f"DataFrame shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")
    
    # Create X, y manually
    X = df.drop(columns=['Class']).values.astype(np.float32)
    y = df['Class'].values.astype(np.float32)
    
    print(f"Created X shape: {X.shape}, y shape: {y.shape}")
    
    # Continue with your pipeline
    (X_train_norm, _), (X_val_norm, _), (X_test, y_test) = prepare_datasets(X, y)
    scaler, X_train_scaled, X_val_scaled, X_test_scaled = scale_data(
        X_train_norm, X_val_norm, X_test
    )
    
    print("Train normal:", X_train_scaled.shape)
    print("Val normal:", X_val_scaled.shape)
    print("Test:", X_test_scaled.shape)
    
except FileNotFoundError:
    print(f"File not found at {csv_path.absolute()}")
    print("Current working directory:", Path.cwd())

DataFrame shape: (284807, 31)
Columns: ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount', 'Class']
Created X shape: (284807, 30), y shape: (284807,)
Train normal: (255883, 30)
Val normal: (28432, 30)
Test: (57355, 30)


In [53]:
def make_dataloader(x, batch_size=512, shuffle=True):
    tensor_x = torch.from_numpy(x)
    dataset = TensorDataset(tensor_x)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return loader

batch_size = 512
train_loader = make_dataloader(X_train_scaled, batch_size=batch_size, shuffle=True)
val_loader = make_dataloader(X_val_scaled, batch_size=batch_size, shuffle=False)

In [54]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using Device: {device}")

input_dim = X_train_scaled.shape[1]
latent_dim = 16

model = build_autoencoder(input_dim=input_dim, latent_dim=latent_dim).to(device)

print("Model repr:\n", model)
print("\nModel class:", model.__class__)
print("Is nn.Module?", isinstance(model, nn.Module))

params = list(model.parameters())
print("Number of parameter tensors:", len(params))
print("Total number of parameters:", sum(p.numel() for p in params))



Using Device: cpu
Model repr:
 Autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=30, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=16, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=30, bias=True)
  )
)

Model class: <class '__main__.Autoencoder'>
Is nn.Module? True
Number of parameter tensors: 12
Total number of parameters: 26542


In [56]:
criterion = nn.MSELoss(reduction="mean")
optimizer = Adam(model.parameters(), lr=1e-3)