In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score
import numpy as np
from tqdm import tqdm
from Models.AutoEncoder import AutoEncoder, AE_validDataset, AE_trainDataset
from Models.DAGMM import *
from utils.utils import process_data

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [27]:
num_epochs = 100
batch_size = 128
lr = 1e-4
encoding_dim = 28
n_gmm = 2
lambda_energy = 0.1
lambda_cov_diag = 0.005
l1_lambda = 1e-5

In [20]:
# Feature Selection
cat_features = ['Card', 'Gender', 'Card Brand', 'Card Type', 'Expires', 'Has Chip',
                'Year PIN last Changed', 'Whether Security Chip is Used', 'Day']
num_features = ['Current Age', 'Retirement Age', 'Per Capita Income - Zipcode', 'Zipcode',
                'Yearly Income', 'Total Debt', 'Credit Score', 'Credit Limit', 'Amount']
discarded = ['User', 'Birth Year', 'Birth Month']

print(len(cat_features)*5 + len(num_features))


54


In [24]:

# 데이터 전처리
data_path = '../data/[24-2 DS_Project2] Data.csv'
(train_cat_X, train_num_X, train_y), (valid_cat_X, valid_num_X, valid_y), label_encoders = process_data(
    data_path,
    cat_features,
    num_features,
    discarded
)

TRANSITION
IQR


TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [23]:
train_dataset = AE_trainDataset(train_cat_X, train_num_X, device)
valid_dataset = AE_validDataset(valid_cat_X, valid_num_X, valid_y, device)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

NameError: name 'train_cat_X' is not defined

In [26]:
model = DAGMM(
    encoding_dim=encoding_dim,
    n_gmm=n_gmm,
    cat_features=cat_features,
    num_features=num_features,
)

optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
criterion = nn.MSELoss()

In [None]:
# Training Loop
best_f1 = 0
for epoch in tqdm(range(num_epochs)):
    # Training Phase
    model.train()
    train_loss = 0
    for cat_features, num_features in train_loader:
        optimizer.zero_grad()
        if use_dagmm:
            enc, dec, z, gamma = model(torch.cat([cat_features, num_features], dim=1))
            loss, recon_loss, energy_loss, cov_diag_loss = model.loss_function(
                torch.cat([cat_features, num_features], dim=1), dec, z, gamma
            )
        else:
            y_hat, y = model(cat_features, num_features)
            mse_loss = criterion(y_hat, y)
            l1_reg = sum(torch.norm(param, 1) for param in model.parameters())
            loss = mse_loss + l1_lambda * l1_reg

        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation Phase (every 10 epochs)
    if epoch % 10 == 0:
        model.eval()
        valid_loss = 0
        reconstruction_errors = []
        all_labels = []

        with torch.no_grad():
            for cat_features, num_features, labels in valid_loader:
                if use_dagmm:
                    enc, dec, z, gamma = model(torch.cat([cat_features, num_features], dim=1))
                    sample_errors = model.compute_energy(z, model.phi, model.mu, model.cov)
                else:
                    y_hat, y = model(cat_features, num_features)
                    sample_errors = torch.mean((y_hat - y) ** 2, dim=1)

                reconstruction_errors.extend(sample_errors.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Performance Evaluation
        threshold = np.percentile(reconstruction_errors, 90)
        predictions = (np.array(reconstruction_errors) > threshold).astype(int)
        f1 = f1_score(all_labels, predictions)

        print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Valid F1 = {f1:.4f}")

        # Save Best Model
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), 'best_model.pth')

print(f"Best F1 Score: {best_f1}")