<a href="https://colab.research.google.com/github/DonghaeSuh/PyTorch_Basic/blob/main/checkpoint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 파일 경로 설정

In [1]:
cd drive/MyDrive/pytorch

/content/drive/MyDrive/pytorch


In [2]:
ls

check  dataset.csv  Model.ipynb  model.pt  model_state_dict.pt


In [3]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset,DataLoader

In [16]:
class CustomDataset(Dataset):
  def __init__(self,file_path):
    df=pd.read_csv(file_path)
    self.x=df.iloc[:,0].values
    self.y=df.iloc[:,1].values
    self.length=len(df)

  def __getitem__(self,index):
    x=torch.FloatTensor([self.x[index]**2,self.x[index]])
    y=torch.FloatTensor([self.y[index]])
    return x,y

  def __len__(self):
    return self.length


class CustomModel(nn.Module):
  def __init__(self):
    super(CustomModel,self).__init__()
    self.layer=nn.Linear(2,1)

  def forward(self,x):
    x=self.layer(x)
    return x


train_dataset=CustomDataset("./dataset.csv")
train_dataloader = DataLoader(train_dataset,batch_size=128,shuffle=True,drop_last=True)

device="cuda" if torch.cuda.is_available else "cpu"
model=CustomModel().to(device)
criterion=nn.MSELoss().to(device)
optimizer=optim.SGD(model.parameters(),lr=0.0001)

checkpoint=1

for epoch in range(10000):
  cost=0.0

  for x,y in train_dataloader:
    x=x.to(device)
    y=y.to(device)

    output=model(x)
    loss=criterion(output,y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost+=loss
  
  cost=cost/len(train_dataloader)

  if (epoch+1)%1000==0:
    torch.save(
        {
            "model":"CustomModel",
            "epoch": epoch,
            "model_state_dict":model.state_dict(),
            "optimizer_state_dict":optimizer.state_dict(),
            "cost":cost,
            "description":f"CustomModel 체크포인트-{checkpoint}",
        },
        f"./checkpoint-{checkpoint}.pt",
    )
    print(checkpoint)
    checkpoint+=1
    
  

1
2
3
4
5
6
7
8
9
10


### 체크포인트 불러오기

In [17]:
class CustomDataset(Dataset):
  def __init__(self,file_path):
    df=pd.read_csv(file_path)
    self.x=df.iloc[:,0].values
    self.y=df.iloc[:,1].values
    self.length=len(df)

  def __getitem__(self,index):
    x=torch.FloatTensor([self.x[index]**2,self.x[index]])
    y=torch.FloatTensor([self.y[index]])
    return x,y

  def __len__(self):
    return self.length


class CustomModel(nn.Module):
  def __init__(self):
    super(CustomModel,self).__init__()
    self.layer=nn.Linear(2,1)

  def forward(self,x):
    x=self.layer(x)
    return x


train_dataset=CustomDataset("./dataset.csv")
train_dataloader = DataLoader(train_dataset,batch_size=128,shuffle=True,drop_last=True)

device="cuda" if torch.cuda.is_available else "cpu"
model=CustomModel().to(device)
criterion=nn.MSELoss().to(device)
optimizer=optim.SGD(model.parameters(),lr=0.0001)

checkpoint=torch.load("./checkpoint-6.pt")
model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
checkpoint_epoch=checkpoint["epoch"]
checkpoint_description=checkpoint["description"]

print(checkpoint_description)

for epoch in range(checkpoint_epoch+1,10000):
  cost=0.0

  for x,y in train_dataloader:
    x=x.to(device)
    y=y.to(device)

    output=model(x)
    loss=criterion(output,y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    cost+=loss

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch : {epoch+1:4d}, Model : {list(model.parameters())}, Cost : {cost:.3f}")


CustomModel 체크포인트-6
Epoch : 7000, Model : [Parameter containing:
tensor([[ 3.1015, -1.7034]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4001], device='cuda:0', requires_grad=True)], Cost : 0.083
Epoch : 8000, Model : [Parameter containing:
tensor([[ 3.1012, -1.7031]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4081], device='cuda:0', requires_grad=True)], Cost : 0.077
Epoch : 9000, Model : [Parameter containing:
tensor([[ 3.1010, -1.7030]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4155], device='cuda:0', requires_grad=True)], Cost : 0.078
Epoch : 10000, Model : [Parameter containing:
tensor([[ 3.1011, -1.7034]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.4222], device='cuda:0', requires_grad=True)], Cost : 0.072
