# 데이터 학습

In [1]:
import os; os.chdir("../")

In [2]:
import pickle

import random
import numpy as np
import torch
import torch.nn as nn

import xgboost as xgb

from models.tab_resnet import TabResNet, LogisticRegression

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


In [3]:
# Set seeds
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f3f41508030>

In [4]:
def train_model(X, y, model, loss_fn, optimizer, num_epochs):
    for epoch in range(num_epochs):
        # Forward pass
        y_pred = model.network(X)
        
        # Compute Loss
        loss = loss_fn(y_pred.squeeze(), y)
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 100 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

In [5]:
datasets = ["Adult", "Bank Marketing", "Statlog (German Credit Data)", "Wine Quality"]

dataset = datasets[0]

In [6]:
for dataset in datasets:
    path = f"data/{dataset}"

    X_train = np.load(f"{path}/X_train.npy")
    y_train = np.load(f"{path}/y_train.npy")
    X_test = np.load(f"{path}/X_test.npy")
    y_test = np.load(f"{path}/y_test.npy")

    xgb_clf = xgb.XGBClassifier()
    xgb_clf.fit(X_train, y_train)

    xgb_y_pred = xgb_clf.predict(X_test)
    xgb_accuracy = np.mean(xgb_y_pred == y_test)
    print(f"XGBoost Accuracy: {xgb_accuracy}")

    X_train = torch.from_numpy(X_train).float()
    y_train = torch.from_numpy(y_train).long()
    X_test = torch.from_numpy(X_test).float()

    feature_metadata = pickle.load(open(f"{path}/feature_metadata.pkl", "rb"))
    xgb_clf.save_model(f"{path}/xgb_model.json")


    input_dim = X_train.shape[1]
    output_dim = 2

    lr_model = LogisticRegression(input_dim, output_dim)
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(lr_model.parameters(), lr=0.01, weight_decay=0.01)

    train_model(X_train, y_train, lr_model, loss_fn, optimizer, 1000)

    resnet_model = TabResNet(input_dim, output_dim, num_blocks=1)
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(resnet_model.parameters(), lr=0.01, weight_decay=0.01)

    train_model(X_train, y_train, resnet_model, loss_fn, optimizer, 1000)

    lr_y_pred = lr_model(X_test).detach().argmax(dim=1).numpy()
    lr_accuracy = np.mean(lr_y_pred == y_test)
    print(f"Logistic Regression Accuracy: {lr_accuracy}")

    resnet_y_pred = resnet_model(X_test).detach().argmax(dim=1).numpy()
    resnet_accuracy = np.mean(resnet_y_pred == y_test)
    print(f"ResNet Accuracy: {resnet_accuracy}")

    torch.save(lr_model.state_dict(), f"{path}/lr_model.pth")
    torch.save(resnet_model.state_dict(), f"{path}/resnet_model.pth")

XGBoost Accuracy: 0.8780837342614393
Epoch 1/1000, Loss: 0.6880693435668945
Epoch 101/1000, Loss: 0.4647754728794098
Epoch 201/1000, Loss: 0.421127587556839
Epoch 301/1000, Loss: 0.3997373580932617
Epoch 401/1000, Loss: 0.3869837522506714
Epoch 501/1000, Loss: 0.3784954249858856
Epoch 601/1000, Loss: 0.37239620089530945
Epoch 701/1000, Loss: 0.36776065826416016
Epoch 801/1000, Loss: 0.3640848398208618
Epoch 901/1000, Loss: 0.36107316613197327
Epoch 1/1000, Loss: 0.7633286714553833
Epoch 101/1000, Loss: 0.3724764883518219
Epoch 201/1000, Loss: 0.3484494984149933
Epoch 301/1000, Loss: 0.3389897644519806
Epoch 401/1000, Loss: 0.3328791856765747
Epoch 501/1000, Loss: 0.32813552021980286
Epoch 601/1000, Loss: 0.32393258810043335
Epoch 701/1000, Loss: 0.3214186131954193
Epoch 801/1000, Loss: 0.3201175630092621
Epoch 901/1000, Loss: 0.31839215755462646
Logistic Regression Accuracy: 0.8283345275872659
ResNet Accuracy: 0.8535162247927116
XGBoost Accuracy: 0.9064469755612076
Epoch 1/1000, Loss: 