<a href="https://colab.research.google.com/github/DonghaeSuh/PyTorch_Basic/blob/main/train_valid_test_split.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 현재 위치 설정

In [1]:
cd drive/MyDrive/pytorch/

/content/drive/MyDrive/pytorch


#모듈 불러오기

In [2]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split

일단 Dataset과 모델 class를 만든 다음에\
dataset을 불러온다음 길이를 계산하여\
train : 0.8 / validation : 0.1 / test : 0.1 로 분리하여\
dataloader를 이용해 각각 불러와 train이후 validation loss를 구할 것이다.


### Dataset 구성

In [13]:
class CustomDataset(Dataset):
  def __init__(self,file_path):
    df=pd.read_csv(file_path)
    self.x=df.iloc[:,0].values
    self.y=df.iloc[:,1].values
    self.length=len(df)

  def __getitem__(self,index):
    x=torch.FloatTensor([self.x[index]**2,self.x[index]])
    y=torch.FloatTensor([self.y[index]])
    return x,y

  def __len__(self):
    return self.length

### Model 구성

In [14]:
class CustomModel(nn.Module):
  def __init__(self):
    super(CustomModel,self).__init__()
    self.layer=nn.Linear(2,1)

  def forward(self,x):
    x=self.layer(x)
    return x

### Dataset 불러오기 및 Train,Validation,Test 분류

In [15]:
dataset=CustomDataset("dataset.csv")
dataset_size=len(dataset)
train_size=int(dataset_size*0.8)  # 꼭 정수형(int)으로 바꿔줘야 한다!!!
validation_size=int(dataset_size*0.1)
test_size=dataset_size-train_size-validation_size

train_dataset,validation_dataset,test_dataset=random_split(dataset,[train_size,validation_size,test_size])

print(f"Train_dataset_size = {len(train_dataset)}")
print(f"Validation_dataset_size = {len(validation_dataset)}")
print(f"Test_dataset_size = {len(test_dataset)}")

Train_dataset_size = 160
Validation_dataset_size = 20
Test_dataset_size = 20


### dataloader를 이용한 데이터 활용 방식 설정

In [16]:
train_dataloader = DataLoader(train_dataset,batch_size=16, shuffle=True,drop_last=True)
validation_dataloader = DataLoader(validation_dataset,batch_size=4,shuffle=True,drop_last=True)
test_dataloader= DataLoader(test_dataset, batch_size=1, shuffle=True,drop_last=True)

### 모델,Loss,Optimizer 계산에 GPU를 사용할 수 있음

In [17]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(),lr=0.0001)

### 학습

In [18]:
for epoch in range(10000):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss

    cost = cost / len(train_dataloader)

### 검증

In [19]:
with torch.no_grad():
    model.eval()
    for x, y in validation_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        outputs = model(x)
        print(f"X : {x}")
        print(f"Y : {y}")
        print(f"Outputs : {outputs}")
        print("--------------------")

X : tensor([[24.0100, -4.9000],
        [ 2.8900, -1.7000],
        [44.8900,  6.7000],
        [90.2500,  9.5000]])
Y : tensor([[ 83.2800],
        [ 12.7900],
        [127.8900],
        [263.8000]])
Outputs : tensor([[ 83.2802],
        [ 12.3524],
        [128.2459],
        [264.0979]])
--------------------
X : tensor([[ 7.2900,  2.7000],
        [46.2400, -6.8000],
        [ 7.8400, -2.8000],
        [62.4100,  7.9000]])
Y : tensor([[ 18.2600],
        [155.4300],
        [ 29.2000],
        [180.9000]])
Outputs : tensor([[ 18.4958],
        [155.4341],
        [ 29.5725],
        [180.5159]])
--------------------
X : tensor([[ 7.8400,  2.8000],
        [16.8100, -4.1000],
        [ 4.4100,  2.1000],
        [30.2500,  5.5000]])
Y : tensor([[20.3700],
        [59.1100],
        [10.4700],
        [85.2600]])
Outputs : tensor([[20.0305],
        [59.5960],
        [10.5897],
        [84.9044]])
--------------------
X : tensor([[40.9600,  6.4000],
        [16.0000,  4.0000],
      