In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import json
import numpy as np
import random
import pandas as pd

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import ParameterGrid
from google.colab import drive

drive.mount('/content/drive')

# 재현 가능성을 위한 시드 고정
RANDOM_SEED = 18
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Dataloader 시드 고정 (아직 사용 x)
'''
generator = torch.Generator()
generator.manual_seed(RANDOM_SEED)

def worker_init_fn(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# DataLoader(worker_init_fn=worker_init_fn)
'''

# 데이터 경로 설정
train_csv_path = "/content/drive/MyDrive/train.csv"
test_csv_path = "/content/drive/MyDrive/test.csv"
submission_csv_path = '/content/drive/MyDrive/sample_submission.csv'

# 기타 경로 설정 : 저장되는 데이터 이름 등 (model, method 에 맞게 설정)

Mounted at /content/drive


In [None]:
# MLP - 은닉층 5개
# 현재 최고성능 모델 하이퍼파라미터(GS) : {"batch_size": 32, "hidden_sizes": [32, 64, 64, 32, 16], "learning_rate": 0.001}
# Epoch : 93
# 제출 성능 : 0.752
class MLP5Hidden(nn.Module):
    def __init__(self, input_size=11, hidden_sizes=[32, 64, 64, 32, 16], output_size=1):
        super(MLP5Hidden, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.fc4 = nn.Linear(hidden_sizes[2], hidden_sizes[3])
        self.fc5 = nn.Linear(hidden_sizes[3], hidden_sizes[4])
        self.fc6 = nn.Linear(hidden_sizes[4], output_size)
        self.prelu = nn.PReLU()

    def forward(self, x):
        x = self.prelu(self.fc1(x))
        x = self.prelu(self.fc2(x))
        x = self.prelu(self.fc3(x))
        x = self.prelu(self.fc4(x))
        x = self.prelu(self.fc5(x))
        x = self.fc6(x)
        return x

In [None]:
# 훈련 데이터를 훈련 + 검증 데이터로 나누고 train_dataset, val_dataset 을 반환하는 함수:
train_data = pd.read_csv(train_csv_path)

X = torch.tensor(train_data.iloc[:,1:-1].values, dtype=torch.float32)
y = torch.tensor(train_data.iloc[:,-1].values, dtype = torch.float32).view(-1,1)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

In [None]:

model = MLP5Hidden()
model.cuda()
criterion = nn.MSELoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

train_losses = []
val_losses = []

for epoch in range(93):
    model.train()
    train_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.cuda(), batch_y.cuda()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss = train_loss / len(train_loader)
    train_losses.append(train_loss)


    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.cuda(), batch_y.cuda()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()

    val_loss = val_loss / len(val_loader)
    val_losses.append(val_loss)

    print(f'Epoch {epoch+1}/300, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch 1/300, Train Loss: 350.2065, Validation Loss: 6.7849
Epoch 2/300, Train Loss: 3.9155, Validation Loss: 4.0508
Epoch 3/300, Train Loss: 3.3235, Validation Loss: 3.6445
Epoch 4/300, Train Loss: 3.1878, Validation Loss: 3.3946
Epoch 5/300, Train Loss: 3.1973, Validation Loss: 3.6870
Epoch 6/300, Train Loss: 3.1688, Validation Loss: 3.3060
Epoch 7/300, Train Loss: 3.2601, Validation Loss: 3.8899
Epoch 8/300, Train Loss: 3.1137, Validation Loss: 4.3795
Epoch 9/300, Train Loss: 3.1876, Validation Loss: 3.5824
Epoch 10/300, Train Loss: 3.1749, Validation Loss: 3.2841
Epoch 11/300, Train Loss: 3.2407, Validation Loss: 3.2804
Epoch 12/300, Train Loss: 3.1377, Validation Loss: 3.2787
Epoch 13/300, Train Loss: 3.1048, Validation Loss: 4.0377
Epoch 14/300, Train Loss: 3.2147, Validation Loss: 3.4943
Epoch 15/300, Train Loss: 3.2094, Validation Loss: 3.2741
Epoch 16/300, Train Loss: 3.2122, Validation Loss: 5.0921
Epoch 17/300, Train Loss: 3.1227, Validation Loss: 3.6655
Epoch 18/300, Train L

In [None]:
X_test = pd.read_csv(test_csv_path).iloc[:,1:]
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

test_dataset = TensorDataset(X_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model.eval()
test_pred = []
with torch.no_grad():
    for batch in test_loader:
        batch_X = batch[0].cuda()
        outputs = model(batch_X)
        test_pred.extend(outputs.cpu().numpy())  # 예측값을 CPU로 이동하여 리스트에 추가

test_pred = np.array(test_pred).flatten()
submission_df = pd.read_csv(submission_csv_path)
submission_df['y'] = test_pred  # 예측 결과를 y 컬럼에 추가

# 지정된 경로에 CSV 파일로 저장
submission_df.to_csv("/content/drive/MyDrive/new_MLP5_PReLU.csv", index=False)

In [None]:
submission_df.describe()

Unnamed: 0,y
count,4986.0
mean,85.297058
std,3.50829
min,81.821587
25%,82.736248
50%,83.606258
75%,86.764063
max,94.268341
