In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

import math
import random
import time
from itertools import chain
import argparse

#for training
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm

from torch.optim.optimizer import Optimizer, required
from torch.optim.lr_scheduler import LambdaLR

In [3]:
!unzip /content/drive/MyDrive/반도체/data_mdc01.zip

Archive:  /content/drive/MyDrive/반도체/data_mdc01.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


## 파일 불러오기, CUDA 환경설정

In [4]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submit = pd.read_csv('sample_submission.csv')

#gpu 사용하기 위해서 CUDA 환경 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.manual_seed(777)

print(train.shape)

(810000, 230)


### train, validation set 설정

In [5]:
x_train = train.iloc[:,4:]
y_train = train.iloc[:,0:4]

x_test = test.iloc[:,1:]

from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=10000, shuffle=True)

print(x_train.shape)
print(x_val.shape)


(800000, 226)
(10000, 226)


In [6]:
#tensor로 변환

x_train = np.array(x_train)
y_train = np.array(y_train)
x_val = np.array(x_val)
y_val = np.array(y_val)

x_test = np.array(x_test)

x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(y_train)
x_val = torch.FloatTensor(x_val)
y_val = torch.FloatTensor(y_val)

x_test = torch.FloatTensor(x_test)


In [7]:
#parameter 설정
lr = 1e-3
adam_epsilon = 1e-06 #Adam optimizer 사용할 때 bad local optima로의 학습 안 일어나게 아주 작은 learning rate 사용
epochs = 70
batch_size = 2048
warmup_step = 2000  #처음 2000 step 동안은 선형적으로 조금씩 증가하는 learning rate 사용 
loss_fn = nn.L1Loss()

#데이터셋 만들기
train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=3)

val_dataset = torch.utils.data.TensorDataset(x_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=3)

## 모델에 사용할 함수

In [8]:
# activation function 
class GELU(nn.Module):
    def forward(self, x):
        return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))

class LayerNorm(nn.Module):
    def __init__(self, hidden_size, eps=1e-5):
        """Construct a layernorm module in the TF style (epsilon inside the square root).
        """
        super(LayerNorm, self).__init__()
        self.weight = nn.Parameter(torch.ones(hidden_size))
        self.bias = nn.Parameter(torch.zeros(hidden_size))
        self.variance_epsilon = eps

        self.init_weights()

    def init_weights(self):
        self.weight.data.fill_(1.0)
        self.bias.data.zero_()

    def forward(self, x):
        u = x.mean(-1, keepdim=True)
        s = (x - u).pow(2).mean(-1, keepdim=True)
        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
        return self.weight * x + self.bias

In [9]:
# model fucntion
class skipConnectionModel(nn.Module):
    def __init__(self):
        super(skipConnectionModel, self).__init__()
        
        self.ln = LayerNorm(10000)
        self.ln1 = LayerNorm(7000)
        self.ln2 = LayerNorm(4000)
        self.ln3 = LayerNorm(2000)
        
        self.upblock1 = nn.Sequential(nn.Linear(226, 2000),GELU(),nn.BatchNorm1d(2000))
        self.upblock2 = nn.Sequential(nn.Linear(2000,4000),GELU(),nn.BatchNorm1d(4000))
        self.upblock3 = nn.Sequential(nn.Linear(4000,7000), GELU(),nn.BatchNorm1d(7000))
        self.upblock4 = nn.Sequential(nn.Linear(7000,10000),GELU(),nn.BatchNorm1d(10000))

        self.downblock1 = nn.Sequential(nn.Linear(10000, 7000),GELU(),nn.BatchNorm1d(7000))
        self.downblock2 = nn.Sequential(nn.Linear(7000, 4000),GELU(),nn.BatchNorm1d(4000))
        self.downblock3 = nn.Sequential(nn.Linear(4000, 2000),GELU(),nn.BatchNorm1d(2000))
        self.downblock4 = nn.Sequential(nn.Linear(2000, 300),GELU(),nn.BatchNorm1d(300))
        
        self.fclayer = nn.Sequential(nn.Linear(300,4))
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        upblock1_out = self.upblock1(x)
        upblock2_out = self.upblock2(upblock1_out)
        upblock3_out = self.upblock3(upblock2_out)
        upblock4_out = self.upblock4(upblock3_out)
        
        downblock1_out = self.downblock1(self.ln(upblock4_out))
        skipblock1 = downblock1_out + upblock3_out
        downblock2_out = self.downblock2(self.ln1(skipblock1))
        skipblock2 = downblock2_out + upblock2_out
        downblock3_out = self.downblock3(self.ln2(skipblock2))
        skipblock3 = downblock3_out + upblock1_out
        downblock4_out = self.downblock4(self.ln3(skipblock3))
        
        output = self.fclayer(downblock4_out)
        
        return output

In [10]:
#scheduler fucntion
def get_cosine_with_hard_restarts_schedule_with_warmup(
    optimizer, num_warmup_steps, num_training_steps, num_cycles=1.0, last_epoch=-1
):
    """ Create a schedule with a learning rate that decreases following the
    values of the cosine function with several hard restarts, after a warmup
    period during which it increases linearly between 0 and 1.
    """

    def lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
        if progress >= 1.0:
            return 0.0
        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * ((float(num_cycles) * progress) % 1.0))))

    return LambdaLR(optimizer, lr_lambda, last_epoch)

In [11]:
#optimizer fucntion
class AdamW(Optimizer):
    """ Implements Adam algorithm with weight decay fix.
    Parameters:
        lr (float): learning rate. Default 1e-3.
        betas (tuple of 2 floats): Adams beta parameters (b1, b2). Default: (0.9, 0.999)
        eps (float): Adams epsilon. Default: 1e-6
        weight_decay (float): Weight decay. Default: 0.0
        correct_bias (bool): can be set to False to avoid correcting bias in Adam (e.g. like in Bert TF repository). Default True.
    """

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-6, weight_decay=0.0, correct_bias=True):
        if lr < 0.0:
            raise ValueError("Invalid learning rate: {} - should be &gt;= 0.0".format(lr))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter: {} - should be in [0.0, 1.0[".format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter: {} - should be in [0.0, 1.0[".format(betas[1]))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {} - should be &gt;= 0.0".format(eps))
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias)
        super().__init__(params, defaults)

    def step(self, closure=None):
        """Performs a single optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError("Adam does not support sparse gradients, please consider SparseAdam instead")

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state["step"] = 0
                    # Exponential moving average of gradient values
                    state["exp_avg"] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state["exp_avg_sq"] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
                beta1, beta2 = group["betas"]

                state["step"] += 1

                # Decay the first and second moment running average coefficient
                # In-place operations to update the averages at the same time
                exp_avg.mul_(beta1).add_(1.0 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1.0 - beta2, grad, grad)
                denom = exp_avg_sq.sqrt().add_(group["eps"])

                step_size = group["lr"]
                if group["correct_bias"]:  # No bias correction for Bert
                    bias_correction1 = 1.0 - beta1 ** state["step"]
                    bias_correction2 = 1.0 - beta2 ** state["step"]
                    step_size = step_size * math.sqrt(bias_correction2) / bias_correction1

                p.data.addcdiv_(-step_size, exp_avg, denom)

                # Just adding the square of the weights to the loss function is *not*
                # the correct way of using L2 regularization/weight decay with Adam,
                # since that will interact with the m and v parameters in strange ways.
                #
                # Instead we want to decay the weights in a manner that doesn't interact
                # with the m/v parameters. This is equivalent to adding the square
                # of the weights to the loss with plain (non-momentum) SGD.
                # Add weight decay at the end (fixed version)
                if group["weight_decay"] < 0.0:
                    p.data.add_(-group["lr"] * group["weight_decay"], p.data)

        return loss

## 모델 학습하기

In [12]:
model = skipConnectionModel().to(device)

total_step = len(train_loader) * epochs
print(f"Total step is....{total_step}") # 모델이 학습하는 전체 step 계산.

no_decay = ["bias", "LayerNorm.weight"] # decay하지 않을 영역 지정.
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
    {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
]

optimizer = AdamW(optimizer_grouped_parameters, lr=lr, eps=adam_epsilon)
scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
    optimizer, num_warmup_steps=warmup_step, num_training_steps=total_step
)


Total step is....27370


In [13]:
# train loss와 val loss 지정.
total_loss = 0.0
total_val_loss = 0.0

for epoch in range(epochs):
    total_loss = 0
    total_val_loss = 0
   
    for i, (x_train, y_train) in enumerate(tqdm(train_loader, desc = '*******Train mode********')):
        x_train = x_train.float().to(device) #이게 반복문 밖에 있으니까 에러가 난다. 왜???? 잉?????
        y_train = y_train.float().to(device)
        
        hypothesis = model(x_train)
        loss = loss_fn(hypothesis, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

    train_loss = total_loss/len(train_loader)

    print ("Epoch [{}/{}], Train Loss: {:.4f}".format(epoch+1, epochs, train_loss))

HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)



Epoch [1/70], Train Loss: 154.6443


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [2/70], Train Loss: 144.3829


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [3/70], Train Loss: 96.0780


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [4/70], Train Loss: 28.0912


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [5/70], Train Loss: 5.5269


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [6/70], Train Loss: 4.1547


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [7/70], Train Loss: 3.5301


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [8/70], Train Loss: 3.2623


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [9/70], Train Loss: 3.1402


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [10/70], Train Loss: 2.9582


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [11/70], Train Loss: 2.8691


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [12/70], Train Loss: 2.8141


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [13/70], Train Loss: 2.7666


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [14/70], Train Loss: 2.6588


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [15/70], Train Loss: 2.6173


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [16/70], Train Loss: 2.5982


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [17/70], Train Loss: 2.5214


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [18/70], Train Loss: 2.5209


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [19/70], Train Loss: 2.4636


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [20/70], Train Loss: 2.4526


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [21/70], Train Loss: 2.4359


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [22/70], Train Loss: 2.3857


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [23/70], Train Loss: 2.4166


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [24/70], Train Loss: 2.3737


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [25/70], Train Loss: 2.2911


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [26/70], Train Loss: 2.2793


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [27/70], Train Loss: 2.2145


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [28/70], Train Loss: 2.1931


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [29/70], Train Loss: 2.1935


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [30/70], Train Loss: 2.1660


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [31/70], Train Loss: 2.1309


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [32/70], Train Loss: 2.1021


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [33/70], Train Loss: 2.0821


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [34/70], Train Loss: 2.0737


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [35/70], Train Loss: 2.0689


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [36/70], Train Loss: 2.0668


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [37/70], Train Loss: 2.0390


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [38/70], Train Loss: 2.0115


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [39/70], Train Loss: 1.9796


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [40/70], Train Loss: 1.9545


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [41/70], Train Loss: 1.9196


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [42/70], Train Loss: 1.9030


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [43/70], Train Loss: 1.9050


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [44/70], Train Loss: 1.9047


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [45/70], Train Loss: 1.8780


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [46/70], Train Loss: 1.8559


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [47/70], Train Loss: 1.8389


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [48/70], Train Loss: 1.8256


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [49/70], Train Loss: 1.8062


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [50/70], Train Loss: 1.7829


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [51/70], Train Loss: 1.7640


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [52/70], Train Loss: 1.7479


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [53/70], Train Loss: 1.7320


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [54/70], Train Loss: 1.7172


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [55/70], Train Loss: 1.7030


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [56/70], Train Loss: 1.6906


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [57/70], Train Loss: 1.6784


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [58/70], Train Loss: 1.6640


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [59/70], Train Loss: 1.6505


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [60/70], Train Loss: 1.6392


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [61/70], Train Loss: 1.6292


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [62/70], Train Loss: 1.6200


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [63/70], Train Loss: 1.6117


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [64/70], Train Loss: 1.6030


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [65/70], Train Loss: 1.5962


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [66/70], Train Loss: 1.5911


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [67/70], Train Loss: 1.5875


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [68/70], Train Loss: 1.5847


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [69/70], Train Loss: 1.5823


HBox(children=(FloatProgress(value=0.0, description='*******Train mode********', max=391.0, style=ProgressStyl…


Epoch [70/70], Train Loss: 1.5801


In [18]:
model.eval()
n_val_loss = 10000000
version = time.localtime()[3:5]
learning_rate = lr
with torch.no_grad():
    for i,(X_val, Y_val) in enumerate(tqdm(val_loader, desc='*********Evaluation mode*******')):
        X_val=X_val.float().to(device)
        Y_val=Y_val.float().to(device)

        hypothesis = model(X_val)
        loss_val = loss_fn(hypothesis,Y_val)

        total_val_loss += loss_val.item()


    val_loss = total_val_loss / len(val_loader)
    print ("Epoch [{}/{}], Eval Loss: {:.4f}".format(epoch+1, epochs, val_loss))


    if val_loss < n_val_loss:
      n_val_loss = val_loss
      torch.save(model.state_dict(), f'test_{version}_{learning_rate}_{epochs}.pth')
      print("Best Model saved......")

HBox(children=(FloatProgress(value=0.0, description='*********Evaluation mode*******', max=5.0, style=Progress…


Epoch [70/70], Eval Loss: 3.0205
Best Model saved......


In [19]:
test_model = skipConnectionModel()

weights = torch.load(f'test_{version}_{learning_rate}_{epochs}.pth', map_location='cuda:0')
#모델
weights

OrderedDict([('ln.weight',
              tensor([0.9696, 0.8566, 0.7946,  ..., 0.8049, 0.7189, 0.6306], device='cuda:0')),
             ('ln.bias',
              tensor([ 0.4622, -0.5585,  0.0336,  ...,  0.1489,  0.4573, -0.4420],
                     device='cuda:0')),
             ('ln1.weight',
              tensor([1.2996, 0.9333, 1.0500,  ..., 0.6867, 0.9595, 1.0161], device='cuda:0')),
             ('ln1.bias',
              tensor([ 0.0044, -0.0361,  0.2744,  ...,  0.0213,  0.0864,  0.4395],
                     device='cuda:0')),
             ('ln2.weight',
              tensor([0.8263, 0.7762, 1.1054,  ..., 1.0196, 1.2374, 0.8595], device='cuda:0')),
             ('ln2.bias',
              tensor([0.0096, 0.0074, 0.3075,  ..., 0.1016, 0.0521, 0.2363], device='cuda:0')),
             ('ln3.weight',
              tensor([0.8703, 0.9623, 0.7745,  ..., 0.8735, 0.9061, 0.7704], device='cuda:0')),
             ('ln3.bias',
              tensor([-0.1120, -0.5744,  0.3270,  ..., -0.23

In [20]:
test_model.load_state_dict(weights)
test_model = test_model.to(device)
test_model.eval()

skipConnectionModel(
  (ln): LayerNorm()
  (ln1): LayerNorm()
  (ln2): LayerNorm()
  (ln3): LayerNorm()
  (upblock1): Sequential(
    (0): Linear(in_features=226, out_features=2000, bias=True)
    (1): GELU()
    (2): BatchNorm1d(2000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (upblock2): Sequential(
    (0): Linear(in_features=2000, out_features=4000, bias=True)
    (1): GELU()
    (2): BatchNorm1d(4000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (upblock3): Sequential(
    (0): Linear(in_features=4000, out_features=7000, bias=True)
    (1): GELU()
    (2): BatchNorm1d(7000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (upblock4): Sequential(
    (0): Linear(in_features=7000, out_features=10000, bias=True)
    (1): GELU()
    (2): BatchNorm1d(10000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (downblock1): Sequential(
    (0): Linear(in_features=10000, out_features=7000, bias=True

In [22]:
with torch.no_grad():
   outputs = test_model(x_test.float().to(device))
pred = outputs
pred = pred.cpu().numpy()

In [25]:
submit['layer_1']=pred[:,0]
submit['layer_2']=pred[:,1]
submit['layer_3']=pred[:,2]
submit['layer_4']=pred[:,3]
submit

Unnamed: 0,id,layer_1,layer_2,layer_3,layer_4
0,0,254.557571,230.011139,132.429550,84.596886
1,1,157.712524,127.846169,237.148743,97.159866
2,2,147.322723,179.586670,273.938873,156.045074
3,3,90.599762,229.265091,190.002090,81.790100
4,4,273.385834,294.782471,245.092651,270.784973
...,...,...,...,...,...
9995,9995,112.028305,95.652672,84.208313,53.318432
9996,9996,78.281845,258.855164,126.226479,102.619431
9997,9997,47.167194,255.828598,264.934052,28.608789
9998,9998,28.241468,23.469036,147.503372,76.954681


In [26]:
submit.to_csv("submit.csv",index=False)