In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import pandas as pd
from torch.utils.data import TensorDataset, Dataset, DataLoader, random_split
from pathlib import Path

In [35]:
import os

ROOT_PATH = Path("/content/drive/MyDrive/blog/PyTorch_using_transformers")
DATA_PATH = ROOT_PATH / "datasets"
SAVE_PATH = ROOT_PATH / "03 파이토치 기초/save"

os.makedirs(SAVE_PATH, exist_ok=True)

# 데이터세트와 데이터로더

## 3.29 ~ 3.33 다중 선형 회귀

In [None]:
train_x = torch.FloatTensor([
    [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]
])
train_y = torch.FloatTensor([
    [0.1, 1.5], [1, 2.8], [1.9, 4.1], [2.8, 5.4], [3.7, 6.7], [4.6, 8]
])

In [None]:
train_dataset = TensorDataset(train_x, train_y)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, drop_last=True)

In [None]:
model = torch.nn.Linear(2, 2, bias=True)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [None]:
for epoch in range(20000):
  cost = 0.0

  for batch in train_dataloader:
    x, y = batch
    output = model(x)

    loss = criterion(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost += loss

  cost /= len(train_dataloader)

  if (epoch + 1) % 1000 == 0:
    print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}\n")

Epoch : 1000, Model : [Parameter containing:
tensor([[0.2407, 0.5344],
        [0.4956, 0.7523]], requires_grad=True), Parameter containing:
tensor([-0.8089, -0.3334], requires_grad=True)], Cost : 0.032

Epoch : 2000, Model : [Parameter containing:
tensor([[0.3675, 0.4689],
        [0.5484, 0.7251]], requires_grad=True), Parameter containing:
tensor([-1.0012, -0.4135], requires_grad=True)], Cost : 0.008

Epoch : 3000, Model : [Parameter containing:
tensor([[0.4321, 0.4356],
        [0.5754, 0.7112]], requires_grad=True), Parameter containing:
tensor([-1.0991, -0.4543], requires_grad=True)], Cost : 0.002

Epoch : 4000, Model : [Parameter containing:
tensor([[0.4650, 0.4185],
        [0.5891, 0.7041]], requires_grad=True), Parameter containing:
tensor([-1.1490, -0.4751], requires_grad=True)], Cost : 0.001

Epoch : 5000, Model : [Parameter containing:
tensor([[0.4817, 0.4099],
        [0.5960, 0.7005]], requires_grad=True), Parameter containing:
tensor([-1.1745, -0.4857], requires_grad=Tr

## 3.34 편향 제거

In [None]:
model = torch.nn.Linear(2, 2, bias=False)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

for epoch in range(50000):
  cost = 0.0

  for batch in train_dataloader:
    x, y = batch
    output = model(x)

    loss = criterion(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost += loss

  cost /= len(train_dataloader)

  if (epoch + 1) % 10000 == 0:
    print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}\n")

Epoch : 10000, Model : [Parameter containing:
tensor([[ 1.5385, -0.6694],
        [ 1.0469,  0.2429]], requires_grad=True)], Cost : 0.002

Epoch : 20000, Model : [Parameter containing:
tensor([[ 1.6838, -0.7869],
        [ 1.0947,  0.2043]], requires_grad=True)], Cost : 0.000

Epoch : 30000, Model : [Parameter containing:
tensor([[ 1.6984, -0.7987],
        [ 1.0995,  0.2004]], requires_grad=True)], Cost : 0.000

Epoch : 40000, Model : [Parameter containing:
tensor([[ 1.6998, -0.7999],
        [ 1.0999,  0.2001]], requires_grad=True)], Cost : 0.000

Epoch : 50000, Model : [Parameter containing:
tensor([[ 1.6999, -0.7999],
        [ 1.1000,  0.2000]], requires_grad=True)], Cost : 0.000



# 모델/데이터세트 분리

## 3.36 사용자 정의 데이터세트

In [5]:
class CustomDataset(Dataset):
  def __init__(self, file_path):
    df = pd.read_csv(file_path)
    self.x = df.iloc[:, 0].values
    self.y = df.iloc[:, 1].values
    self.length = len(df)

  def __getitem__(self, index):
    x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
    y = torch.FloatTensor([self.y[index]])
    return x, y

  def __len__(self):
    return self.length

## 3.37 사용자 정의 모델

In [6]:
class CustomModel(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.layer = torch.nn.Linear(2, 1, bias=True)

  def forward(self, x):
    x = self.layer(x)
    return x

## 3.38 사용자 정의 데이터세트와 데이터로더

In [7]:
train_dataset = CustomDataset(DATA_PATH / "non_linear.csv")
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

## 3.39 GPU 연산 적용

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CustomModel().to(device)
criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

## 3.40 학습 진행

In [9]:
for epoch in range(10000):
  cost = 0.0

  for x, y in train_dataloader:
    x = x.to(device)
    y = y.to(device)

    output = model(x)

    loss = criterion(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost += loss

  cost /= len(train_dataloader)

  if (epoch + 1) % 1000 == 0:
    print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}\n")

Epoch : 1000, Model : [Parameter containing:
tensor([[ 3.1164, -1.6996]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.4926], device='cuda:0', requires_grad=True)], Cost : 0.498

Epoch : 2000, Model : [Parameter containing:
tensor([[ 3.1155, -1.7028]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.4087], device='cuda:0', requires_grad=True)], Cost : 0.436

Epoch : 3000, Model : [Parameter containing:
tensor([[ 3.1135, -1.7027]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.3319], device='cuda:0', requires_grad=True)], Cost : 0.379

Epoch : 4000, Model : [Parameter containing:
tensor([[ 3.1124, -1.7025]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.2616], device='cuda:0', requires_grad=True)], Cost : 0.324

Epoch : 5000, Model : [Parameter containing:
tensor([[ 3.1114, -1.7024]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1974], device='cuda:0', requires_gr

## 3.41 모델 평가

In [11]:
with torch.no_grad():
  model.eval()
  inputs = torch.FloatTensor(
      [
          [1 ** 2, 1],
          [5 ** 2, 5],
          [11 ** 2, 11],
      ]
  ).to(device)
  outputs = model(inputs)
  print(outputs)

tensor([[  1.4548],
        [ 69.2141],
        [357.2812]], device='cuda:0')


## 3.42 모델 저장

In [12]:
torch.save(model, SAVE_PATH / "model.pt")
torch.save(model.state_dict(), SAVE_PATH / "model_state_dict.pt")

## 3.44 데이터세트 분리

In [13]:
dataset = CustomDataset(DATA_PATH / "non_linear.csv")
dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
validation_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - validation_size

train_dataset, validation_dataset, test_dataset = random_split(dataset, [train_size, validation_size, test_size])
print(f"Training Data Size : {len(train_dataset)}")
print(f"Validation Data Size : {len(validation_dataset)}")
print(f"Testing Data Size : {len(test_dataset)}")

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=4, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

Training Data Size : 160
Validation Data Size : 20
Testing Data Size : 20


In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CustomModel().to(device)
criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

In [15]:
for epoch in range(10000):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss

    cost = cost / len(train_dataloader)

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}")

Epoch : 1000, Model : [Parameter containing:
tensor([[ 3.0998, -1.7024]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4331], device='cuda:0', requires_grad=True)], Cost : 0.080
Epoch : 2000, Model : [Parameter containing:
tensor([[ 3.1013, -1.7027]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4617], device='cuda:0', requires_grad=True)], Cost : 0.081
Epoch : 3000, Model : [Parameter containing:
tensor([[ 3.1004, -1.7025]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4730], device='cuda:0', requires_grad=True)], Cost : 0.078
Epoch : 4000, Model : [Parameter containing:
tensor([[ 3.1009, -1.7024]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4775], device='cuda:0', requires_grad=True)], Cost : 0.081
Epoch : 5000, Model : [Parameter containing:
tensor([[ 3.1007, -1.7026]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4793], device='cuda:0', requires_grad=True)]

## 3.46 검증용 데이터세트를 통한 평가

In [16]:
with torch.no_grad():
  model.eval()
  for x, y in validation_dataloader:
    x = x.to(device)
    y = y.to(device)

    outputs = model(x)
    print(f"X : {x}")
    print(f"Y : {y}")
    print(f"Outputs : {outputs}")
    print("--------------------")

X : tensor([[44.8900, -6.7000],
        [ 9.6100,  3.1000],
        [16.0000,  4.0000],
        [ 3.6100, -1.9000]], device='cuda:0')
Y : tensor([[151.2400],
        [ 25.0400],
        [ 43.6400],
        [ 15.2300]], device='cuda:0')
Outputs : tensor([[151.0103],
        [ 24.9859],
        [ 43.2574],
        [ 14.9035]], device='cuda:0')
--------------------
X : tensor([[15.2100, -3.9000],
        [88.3600, -9.4000],
        [75.6900,  8.7000],
        [73.9600, -8.6000]], device='cuda:0')
Y : tensor([[ 54.0300],
        [290.4800],
        [220.4600],
        [244.2000]], device='cuda:0')
Outputs : tensor([[ 54.2592],
        [290.3290],
        [220.2462],
        [244.3386]], device='cuda:0')
--------------------
X : tensor([[29.1600, -5.4000],
        [11.5600,  3.4000],
        [20.2500, -4.5000],
        [51.8400,  7.2000]], device='cuda:0')
Y : tensor([[100.4000],
        [ 30.6800],
        [ 71.0900],
        [149.1800]], device='cuda:0')
Outputs : tensor([[100.0467],
    

# 모델 저장 및 불러오기

## 3.47 모델 불러오기

In [18]:
model = torch.load(SAVE_PATH / "model.pt", map_location=device)

## 3.48 모델 구조 확인

In [19]:
print(model)

CustomModel(
  (layer): Linear(in_features=2, out_features=1, bias=True)
)


## 3.49 모델 상태 저장

In [20]:
torch.save(model.state_dict(), SAVE_PATH / "model_state_dict.pt")

## 3.50 모델 상태 불러오기

In [21]:
model = CustomModel().to(device)

model_state_dict = torch.load(SAVE_PATH / "model_state_dict.pt", map_location=device)
model.load_state_dict(model_state_dict)

<All keys matched successfully>

## 3.51 체크포인트 저장

In [25]:
model = CustomModel().to(device)
criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

In [26]:
checkpoint = 1
for epoch in range(10000):
  cost = 0.0

  for x, y in train_dataloader:
    x = x.to(device)
    y = y.to(device)

    output = model(x)
    loss = criterion(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost += loss

  cost = cost / len(train_dataloader)

  if (epoch + 1) % 1000 == 0:
    torch.save(
        {
            "model":"CustomModel",
            "epoch":epoch,
            "model_state_dict":model.state_dict(),
            "optimizer_state_dict":optimizer.state_dict(),
            "cost":cost,
            "description":f"CustomModel checkpoint-{checkpoint}",
        },
        SAVE_PATH / f"checkpoint-{checkpoint}.pt"
    )
    checkpoint += 1

## 3.52 체크포인트 불러오기

In [32]:
model = CustomModel().to(device)
criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

In [33]:
checkpoint = torch.load(SAVE_PATH / "checkpoint-6.pt")
model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
checkpoint_epoch = checkpoint["epoch"]
checkpoint_description = checkpoint["description"]
print(checkpoint_description)

CustomModel checkpoint-6


In [34]:
for epoch in range(checkpoint_epoch + 1, 10000):
  cost = 0.0

  for x, y in train_dataloader:
    x = x.to(device)
    y = y.to(device)

    output = model(x)
    loss = criterion(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost += loss

  cost = cost / len(train_dataloader)
  if (epoch + 1) % 1000 == 0:
    print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}")

Epoch : 7000, Model : [Parameter containing:
tensor([[ 3.0998, -1.7026]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4798], device='cuda:0', requires_grad=True)], Cost : 0.081
Epoch : 8000, Model : [Parameter containing:
tensor([[ 3.0997, -1.7027]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4802], device='cuda:0', requires_grad=True)], Cost : 0.080
Epoch : 9000, Model : [Parameter containing:
tensor([[ 3.1012, -1.7026]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4804], device='cuda:0', requires_grad=True)], Cost : 0.082
Epoch : 10000, Model : [Parameter containing:
tensor([[ 3.0999, -1.7025]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4805], device='cuda:0', requires_grad=True)], Cost : 0.078
