### 텐서 생성
tensor() 메소드는 입력받은 데이터의 자료형을 그대로 사용 비추

In [3]:
import torch

print(torch.tensor([1,2,3]))                        # 입력 자료형 그대로 만듦
print(torch.Tensor([[1,2,3], [4,5,6]]))             # 기본 Float
print(torch.LongTensor([1,2,3]))
print(torch.FloatTensor([1,2,3]))

tensor([1, 2, 3])
tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([1, 2, 3])
tensor([1., 2., 3.])


### 텐서 속성
shape, dtype, device

In [None]:
import torch

tensor = torch.rand(1, 2)
print(tensor)
print(tensor.shape)
print(tensor.dtype)
print(tensor.device)

tensor([[0.8631, 0.4983]])
torch.Size([1, 2])
torch.float32
cpu


### 텐서 차원 변환

In [17]:
import torch

tensor = torch.rand(1, 2)
print(tensor)
print(tensor.shape)

#tensor = torch.reshape(tensor, (2, 1))
tensor = tensor.reshape(2, 1)
print(tensor)
print(tensor.shape)

tensor([[0.4688, 0.5552]])
torch.Size([1, 2])
tensor([[0.4688],
        [0.5552]])
torch.Size([2, 1])


### 자료형 설정

In [20]:
import torch

tensor = torch.rand((3, 3), dtype=torch.float)          ## 자료형 설정 시, 그냥 float이 아닌, torch.* 형태로 설정
print(tensor)
print(tensor.dtype)

tensor([[0.9212, 0.6410, 0.8986],
        [0.9082, 0.3878, 0.1290],
        [0.2852, 0.3037, 0.5846]])
torch.float32


### 장치 설정 (GPU)

In [25]:
import torch

device = "mps" if torch.mps.is_available() else "cpu"
cpu = torch.FloatTensor([1, 2, 3])
gpu = torch.FloatTensor([1, 2, 3]).to(device)
tensor = torch.rand((1, 1), device=device)

print(device)
print(cpu)
print(gpu)
print(tensor)

mps
tensor([1., 2., 3.])
tensor([1., 2., 3.], device='mps:0')
tensor([[0.3386]], device='mps:0')


### 장치 변환
- cpu 텐서와 gpu 텐서 연산 불가
- gpu 텐서와 넘파이 배열 연산 불가

In [31]:
import torch

cpu = torch.FloatTensor([1,2,3])
gpu = cpu.to("mps")
gpu2cpu = gpu.to("cpu")

print(cpu)
print(gpu)
print(gpu2cpu)

tensor([1., 2., 3.])
tensor([1., 2., 3.], device='mps:0')
tensor([1., 2., 3.])


### Numpy to Tensor

In [None]:
import torch
import numpy as np

ndarray = np.array([1, 2, 3], dtype=np.uint8)
print(torch.tensor(ndarray))
print(torch.Tensor(ndarray))
print(torch.from_numpy(ndarray))

tensor([1, 2, 3], dtype=torch.uint8)
tensor([1., 2., 3.])
tensor([1, 2, 3], dtype=torch.uint8)


### Tensor to Numpy
- Tensor는 모든 연산을 추적해 기록함 (역전파 등과 같은 연산이 진행돼 모델 학습을 진행하기 위함)
- detach() 메소드로 새로운 텐서를 반환하여 numpy()로 변환

In [37]:
import torch

tensor = torch.FloatTensor([1, 2, 3]).to("mps")
ndarray = tensor.detach().cpu().numpy()
print(ndarray)
print(type(ndarray))

[1. 2. 3.]
<class 'numpy.ndarray'>


### Simple Linear Regression (Numpy)

In [None]:
import numpy as np

# Make Dataset
x = np.empty((0, 1))
y = np.empty((0, 1))
temp_w = 5.3122

for i in range(30):
    value = np.random.uniform(-2, 2)
    x = np.append(x, [[i]], axis=0)
    y = np.append(y, [[temp_w*i + value]], axis=0)
    
# Set initial parameters
weight = 0.0
bias = 0.0
learning_rate = 0.001

# Train
for epoch in range(10000):
    y_hat = weight * x + bias
    cost = ((y - y_hat) ** 2).mean()

    weight = weight - learning_rate * ((y_hat - y) * x).mean()
    bias = bias - learning_rate * (y_hat - y).mean()
    
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, weight : {weight:.3f}, bias : {bias:.3f}, cost : {cost:.3f}")
        

epoch : 1000, weight : 5.313, bias : 0.295, cost : 1.346
epoch : 2000, weight : 5.312, bias : 0.315, cost : 1.345
epoch : 3000, weight : 5.311, bias : 0.329, cost : 1.345
epoch : 4000, weight : 5.310, bias : 0.341, cost : 1.345
epoch : 5000, weight : 5.310, bias : 0.350, cost : 1.345
epoch : 6000, weight : 5.310, bias : 0.356, cost : 1.344
epoch : 7000, weight : 5.309, bias : 0.361, cost : 1.344
epoch : 8000, weight : 5.309, bias : 0.365, cost : 1.344
epoch : 9000, weight : 5.309, bias : 0.369, cost : 1.344
epoch : 10000, weight : 5.309, bias : 0.371, cost : 1.344


### Simple Linear Regression (Pytorch)
- 가중치, 편향 텐서에서 requires_grad는 자동 미분 기능을 의미함
- zero_grad() -> backward() -> step()

In [None]:
import torch
from torch import optim     # 최적함 함수 포함

# Make Dataset
x = np.empty((0, 1))
y = np.empty((0, 1))
temp_w = 5.3122

for i in range(30):
    value = np.random.uniform(-2, 2)
    x = np.append(x, [[i]], axis=0)
    y = np.append(y, [[temp_w*i + value]], axis=0)

x = torch.FloatTensor(x)
y = torch.FloatTensor(y)

# Set initial parameters
weight = torch.zeros(1, requires_grad=True)
bias = torch.zeros(1, requires_grad=True)
learning_rate = 0.001

# Set optimizer
optimizer = optim.SGD([weight, bias], lr=learning_rate)

for epoch in range(10000):
    hypotesis = x * weight + bias
    cost = torch.mean((hypotesis - y) ** 2)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, weight : {weight.item():.3f}, bias : {bias.item():.3f}, cost : {cost:.3f}")

epoch : 1000, weight : 5.306, bias : 0.391, cost : 0.877
epoch : 2000, weight : 5.302, bias : 0.463, cost : 0.872
epoch : 3000, weight : 5.300, bias : 0.505, cost : 0.870
epoch : 4000, weight : 5.299, bias : 0.530, cost : 0.870
epoch : 5000, weight : 5.298, bias : 0.545, cost : 0.869
epoch : 6000, weight : 5.298, bias : 0.554, cost : 0.869
epoch : 7000, weight : 5.297, bias : 0.559, cost : 0.869
epoch : 8000, weight : 5.297, bias : 0.562, cost : 0.869
epoch : 9000, weight : 5.297, bias : 0.564, cost : 0.869
epoch : 10000, weight : 5.297, bias : 0.565, cost : 0.869


In [101]:
import torch
from torch import nn 
from torch import optim

# Make Dataset
x = np.empty((0, 1))
y = np.empty((0, 1))
temp_w = 5.3122

for i in range(30):
    value = np.random.uniform(-2, 2)
    x = np.append(x, [[i]], axis=0)
    y = np.append(y, [[temp_w*i + value]], axis=0)

x = torch.FloatTensor(x)
y = torch.FloatTensor(y)

# Set initial parameters
model = nn.Linear(1, 1, bias=True)
criterion = nn.MSELoss()
learning_rate = 0.001

# Set optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(10000):
    output = model(x)
    cost = criterion(output, y)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, model : {list(model.parameters())}, cost : {cost:.3f}")

epoch : 1000, model : [Parameter containing:
tensor([[5.2757]], requires_grad=True), Parameter containing:
tensor([0.0921], requires_grad=True)], cost : 1.127
epoch : 2000, model : [Parameter containing:
tensor([[5.2683]], requires_grad=True), Parameter containing:
tensor([0.2390], requires_grad=True)], cost : 1.105
epoch : 3000, model : [Parameter containing:
tensor([[5.2638]], requires_grad=True), Parameter containing:
tensor([0.3260], requires_grad=True)], cost : 1.097
epoch : 4000, model : [Parameter containing:
tensor([[5.2612]], requires_grad=True), Parameter containing:
tensor([0.3775], requires_grad=True)], cost : 1.095
epoch : 5000, model : [Parameter containing:
tensor([[5.2597]], requires_grad=True), Parameter containing:
tensor([0.4080], requires_grad=True)], cost : 1.094
epoch : 6000, model : [Parameter containing:
tensor([[5.2587]], requires_grad=True), Parameter containing:
tensor([0.4260], requires_grad=True)], cost : 1.093
epoch : 7000, model : [Parameter containing:
t

### DataSet, DataLoader
- drop_last : 불완전한 배치의 사용 여부

In [114]:
import torch
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

train_x = torch.FloatTensor([
    [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]
])
train_y = torch.FloatTensor([
    [0.1, 1.5], [1, 2.8], [1.9, 4.1], [2.8, 5.4], [3.7, 6.7], [4.6, 8]
])

train_dataset = TensorDataset(train_x, train_y)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, drop_last=True)

model = nn.Linear(2, 2, bias=True)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

for epoch in range(20000):
    cost = 0.0
    
    for batch in train_dataloader:
        x, y = batch
        output = model(x)
        
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        cost += loss
        
    cost =  cost / len(train_dataloader)
    
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, model : {list(model.parameters())}, cost : {cost:.3f}")
    

epoch : 1000, model : [Parameter containing:
tensor([[ 0.8158, -0.0464],
        [ 0.2666,  0.9545]], requires_grad=True), Parameter containing:
tensor([-0.2032, -0.4224], requires_grad=True)], cost : 0.040
epoch : 2000, model : [Parameter containing:
tensor([[ 0.9485, -0.1151],
        [ 0.3467,  0.9131]], requires_grad=True), Parameter containing:
tensor([-0.4045, -0.5439], requires_grad=True)], cost : 0.010
epoch : 3000, model : [Parameter containing:
tensor([[ 1.0161, -0.1501],
        [ 0.3875,  0.8920]], requires_grad=True), Parameter containing:
tensor([-0.5071, -0.6058], requires_grad=True)], cost : 0.003
epoch : 4000, model : [Parameter containing:
tensor([[ 1.0506, -0.1679],
        [ 0.4083,  0.8813]], requires_grad=True), Parameter containing:
tensor([-0.5594, -0.6373], requires_grad=True)], cost : 0.001
epoch : 5000, model : [Parameter containing:
tensor([[ 1.0681, -0.1769],
        [ 0.4189,  0.8758]], requires_grad=True), Parameter containing:
tensor([-0.5860, -0.6534], 

### 비선형 회귀

In [116]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)
        
    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length
    
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.layer(x)
        return x

train_dataset = CustomDataset("./datasets/non_linear.csv")
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

device = "mps" if torch.mps.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0001)

for epoch in range(10000):
    cost = 0.0
    
    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        output = model(x)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        cost += loss

    cost = cost / len(train_dataloader)
    
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, model : {list(model.parameters())}, cost : {cost:.3f}")
        
with torch.no_grad():
    model.eval()
    inputs = torch.FloatTensor(
        [
            [1**2, 1],
            [5**2, 5],
            [11**2, 11]
        ]
    ).to(device)
    outputs = model(inputs)
    print(outputs)
    
torch.save(
    model,
    "./models/model.pt"
)

torch.save(
    model.state_dict(),
    "./models/model_state_dict.pt"
)

epoch : 1000, model : [Parameter containing:
tensor([[ 3.1125, -1.7006]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.2534], device='mps:0', requires_grad=True)], cost : 0.318
epoch : 2000, model : [Parameter containing:
tensor([[ 3.1113, -1.7027]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.1897], device='mps:0', requires_grad=True)], cost : 0.290
epoch : 3000, model : [Parameter containing:
tensor([[ 3.1103, -1.7025]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.1314], device='mps:0', requires_grad=True)], cost : 0.241
epoch : 4000, model : [Parameter containing:
tensor([[ 3.1091, -1.7025]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.0781], device='mps:0', requires_grad=True)], cost : 0.215
epoch : 5000, model : [Parameter containing:
tensor([[ 3.1083, -1.7028]], device='mps:0', requires_grad=True), Parameter containing:
tensor([-0.0295], device='mps:0', requires_grad=True)], cos

### 데이터세트 분리

In [119]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)
        
    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length
    
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.layer(x)
        return x

dataset = CustomDataset("./datasets/non_linear.csv")
dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
val_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], torch.manual_seed(0))
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

device = "mps" if torch.mps.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0001)

for epoch in range(10000):
    cost = 0.0
    
    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        output = model(x)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        cost += loss

    cost = cost / len(train_dataloader)
    
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, model : {list(model.parameters())}, cost : {cost:.3f}")
        
with torch.no_grad():
    model.eval()
    for x, y in val_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        outputs = model(x)
        print(outputs)
        
torch.save(
    model,
    "./models/model.pt"
)

torch.save(
    model.state_dict(),
    "./models/model_state_dict.pt"
)

epoch : 1000, model : [Parameter containing:
tensor([[ 3.1000, -1.7007]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4929], device='mps:0', requires_grad=True)], cost : 0.074
epoch : 2000, model : [Parameter containing:
tensor([[ 3.1007, -1.7009]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4874], device='mps:0', requires_grad=True)], cost : 0.073
epoch : 3000, model : [Parameter containing:
tensor([[ 3.1014, -1.7008]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4854], device='mps:0', requires_grad=True)], cost : 0.073
epoch : 4000, model : [Parameter containing:
tensor([[ 3.1009, -1.7008]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4845], device='mps:0', requires_grad=True)], cost : 0.072
epoch : 5000, model : [Parameter containing:
tensor([[ 3.1012, -1.7009]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4842], device='mps:0', requires_grad=True)], cost : 0

### 모델 불러오기

In [126]:
import torch
from torch import nn

class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.layer(x)
        return x
    
device = "mps" if torch.mps.is_available() else "cpu"
model = torch.load("./models/model.pt", map_location=device, weights_only=False)
print(model)

CustomModel(
  (layer): Linear(in_features=2, out_features=1, bias=True)
)


### 모델 상태 불러오기

In [128]:
import torch
from torch import nn

class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.layer(x)
        return x
    
device = "mps" if torch.mps.is_available() else "cpu"
model = CustomModel().to(device)

model_state_dict = torch.load("./models/model_state_dict.pt", map_location=device)
model.load_state_dict(model_state_dict)

with torch.no_grad():
    model.eval()
    inputs = torch.FloatTensor(
        [
            [1**2, 1],
            [5**2, 5],
            [11**2, 11]
        ]
    ).to(device)
    outputs = model(inputs)
    print(outputs)

tensor([[  1.8828],
        [ 69.4762],
        [356.8583]], device='mps:0')


### 체크포인트 저장

In [129]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)
        
    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length
    
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.layer(x)
        return x

dataset = CustomDataset("./datasets/non_linear.csv")
dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
val_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], torch.manual_seed(0))
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

device = "mps" if torch.mps.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0001)


checkpoint = 1
for epoch in range(10000):
    cost = 0.0
    
    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        output = model(x)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        cost += loss

    cost = cost / len(train_dataloader)
    
    if (epoch + 1) % 1000 == 0:
        torch.save(
            {
                "model" : "CustomModel",
                "epoch" : epoch,
                "model_state_dict" : model.state_dict(),
                "optimizer_state_dict" : optimizer.state_dict(),
                "cost" : cost,
                "description" : f"CustomModel Checkpoint-{checkpoint}",
            },
            f"./models/checkpoint-{checkpoint}.pt",
        )
        checkpoint += 1
        print(f"epoch : {epoch+1:4d}, model : {list(model.parameters())}, cost : {cost:.3f}")
        
with torch.no_grad():
    model.eval()
    for x, y in val_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        outputs = model(x)
        print(outputs)

epoch : 1000, model : [Parameter containing:
tensor([[ 3.1000, -1.7007]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4929], device='mps:0', requires_grad=True)], cost : 0.074
epoch : 2000, model : [Parameter containing:
tensor([[ 3.1007, -1.7009]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4874], device='mps:0', requires_grad=True)], cost : 0.073
epoch : 3000, model : [Parameter containing:
tensor([[ 3.1014, -1.7008]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4854], device='mps:0', requires_grad=True)], cost : 0.073
epoch : 4000, model : [Parameter containing:
tensor([[ 3.1009, -1.7008]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4845], device='mps:0', requires_grad=True)], cost : 0.072
epoch : 5000, model : [Parameter containing:
tensor([[ 3.1012, -1.7009]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4842], device='mps:0', requires_grad=True)], cost : 0

### 체크포인트 불러오기

In [130]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)
        self.x = df.iloc[:, 0].values
        self.y = df.iloc[:, 1].values
        self.length = len(df)
        
    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[index] ** 2, self.x[index]])
        y = torch.FloatTensor([self.y[index]])
        return x, y
    
    def __len__(self):
        return self.length
    
class CustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.layer(x)
        return x

dataset = CustomDataset("./datasets/non_linear.csv")
dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
val_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], torch.manual_seed(0))
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

device = "mps" if torch.mps.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0001)

checkpoint = torch.load("./models/checkpoint-6.pt")
model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
checkpoint_epoch = checkpoint["epoch"]
checkpoint_description = checkpoint["description"]
print(checkpoint_description)


for epoch in range(checkpoint_epoch+1, 10000):
    cost = 0.0
    
    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        output = model(x)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        cost += loss
    if (epoch + 1) % 1000 == 0:
        print(f"epoch : {epoch+1:4d}, model : {list(model.parameters())}, cost : {cost:.3f}")

CustomModel Checkpoint-6
epoch : 7000, model : [Parameter containing:
tensor([[ 3.1002, -1.7007]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4840], device='mps:0', requires_grad=True)], cost : 0.736
epoch : 8000, model : [Parameter containing:
tensor([[ 3.1007, -1.7009]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4839], device='mps:0', requires_grad=True)], cost : 0.732
epoch : 9000, model : [Parameter containing:
tensor([[ 3.1014, -1.7008]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4840], device='mps:0', requires_grad=True)], cost : 0.727
epoch : 10000, model : [Parameter containing:
tensor([[ 3.1009, -1.7008]], device='mps:0', requires_grad=True), Parameter containing:
tensor([0.4840], device='mps:0', requires_grad=True)], cost : 0.723
