In [4]:
import copy
import torch
import torch.nn as nn
import torch.optim as optim

In [8]:
# 간단한 모델 정의
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.layer1 = nn.Linear(10, 5)  # Freeze 대상
        self.layer2 = nn.Linear(5, 1)  # 학습 대상

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x

# 모델 복사 (두 방법을 독립적으로 실험하기 위해)
model1 = SimpleModel()  # requires_grad=False
model2 = copy.deepcopy(model1)  # learning_rate=0

In [9]:
# 손실 함수
criterion = nn.MSELoss()

# Optimizer 설정
optimizer2 = optim.SGD([
    {'params': model2.layer1.parameters(), 'lr': 0.0},  # layer1 Freeze
    {'params': model2.layer2.parameters(), 'lr': 0.1}  # 학습
])

# 1. requires_grad=False 실험
for param in model1.layer1.parameters():
    param.requires_grad = False

# 입력 데이터와 타깃 생성
x = torch.randn(8, 10)  # 입력 데이터
y = torch.randn(8, 1)   # 타깃 데이터

In [11]:
# Forward Pass
output1 = model1(x)
loss1 = criterion(output1, y)

output2 = model2(x)
loss2 = criterion(output2, y)

# Backward Pass
loss1.backward()
loss2.backward()

# Optimizer Step
# (model1은 requires_grad=False로 인해 Optimizer 필요 없음)
optimizer2.step()

# Compare layer1 weights
print("\nlayer2.weight values comparison:")
print("model1 (requires_grad=False):", model1.layer1.weight.data)
print("model2 (learning_rate=0):", model2.layer1.weight.data)
print("Are weights equal?", torch.allclose(model1.layer1.weight.data, model2.layer1.weight.data))


# Compare layer2 weights
print("\nlayer2.weight values comparison:")
print("model1 (requires_grad=False):", model1.layer2.weight.data)
print("model2 (learning_rate=0):", model2.layer2.weight.data)
print("Are weights equal?", torch.allclose(model1.layer2.weight.data, model2.layer2.weight.data))


layer2.weight values comparison:
model1 (requires_grad=False): tensor([[-0.2314, -0.2362,  0.2633,  0.3005,  0.2951,  0.3033,  0.1839, -0.1985,
         -0.2011, -0.0437],
        [-0.0956, -0.2457, -0.1331, -0.3159, -0.0498,  0.0175,  0.0210,  0.1499,
         -0.0426,  0.2129],
        [-0.3009, -0.0329,  0.1318,  0.3105,  0.1225,  0.1839, -0.2842,  0.0626,
          0.0298, -0.0483],
        [-0.2429, -0.1088,  0.1813,  0.2302,  0.0484,  0.0344, -0.2778,  0.1923,
          0.2885, -0.1139],
        [-0.2684,  0.1971, -0.2305, -0.2347, -0.0004,  0.0748,  0.1602, -0.1460,
         -0.0861, -0.3100]])
model2 (learning_rate=0): tensor([[-0.2314, -0.2362,  0.2633,  0.3005,  0.2951,  0.3033,  0.1839, -0.1985,
         -0.2011, -0.0437],
        [-0.0956, -0.2457, -0.1331, -0.3159, -0.0498,  0.0175,  0.0210,  0.1499,
         -0.0426,  0.2129],
        [-0.3009, -0.0329,  0.1318,  0.3105,  0.1225,  0.1839, -0.2842,  0.0626,
          0.0298, -0.0483],
        [-0.2429, -0.1088,  0.1813,  