In [14]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()

        self.linear = nn.Linear(in_features=input_dim, out_features=output_dim)
        # self.activation = nn.Sigmoid()
        self.activation = nn.ReLU()
        # self.activation = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.activation(self.linear(x))

In [15]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [16]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [17]:
print(loss, y_pred)
for param in model.parameters():
    print(param)

tensor(2.5165e-14, grad_fn=<MseLossBackward0>) tensor([5.9605e-08, 0.0000e+00, 2.6822e-07], grad_fn=<ReluBackward0>)
Parameter containing:
tensor([[ 0.2050, -0.2040,  0.2042, -0.0086],
        [-0.4367, -0.0392, -0.1299, -0.4748],
        [ 0.1947,  0.0783, -0.5155,  0.0492]], requires_grad=True)
Parameter containing:
tensor([-0.1967, -0.1552,  0.1932], requires_grad=True)


# 다층 레이어 구현

In [19]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()

        self.linear1 = nn.Linear(input_dim, 10)
        self.linear2 = nn.Linear(10, 10)
        self.linear3 = nn.Linear(10, 10)
        self.linear4 = nn.Linear(10, output_dim)
        # self.activation = nn.Sigmoid()
        # self.activation = nn.ReLU()
        self.activation = nn.LeakyReLU(0.1)

    def forward(self, x):
        hidden = self.activation(self.linear1(x))
        hidden = self.activation(self.linear2(hidden))
        hidden = self.activation(self.linear3(hidden))
        y = self.linear4(hidden) # 마지막 출력에는 활성화 함수를 사용하지않는 것이 일반적임
        return y

In [20]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [21]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [22]:
print(loss, y_pred)
for param in model.parameters():
    print(param)

tensor(7.9134e-11, grad_fn=<MseLossBackward0>) tensor([ 3.3528e-08, -6.6573e-06, -1.3895e-05], grad_fn=<ViewBackward0>)
Parameter containing:
tensor([[ 0.4082, -0.2242,  0.3183,  0.4643],
        [-0.3961,  0.4747, -0.4720,  0.1354],
        [-0.4439,  0.4988, -0.4460,  0.3742],
        [ 0.3248, -0.0241, -0.4255, -0.3969],
        [ 0.2883, -0.4850, -0.4940,  0.2846],
        [ 0.0863, -0.0959, -0.4824,  0.2551],
        [-0.0040,  0.2634, -0.1946, -0.2441],
        [ 0.4320, -0.1047, -0.1741,  0.0495],
        [ 0.2812,  0.2909,  0.0019, -0.1788],
        [ 0.2893,  0.0153,  0.0577, -0.1594]], requires_grad=True)
Parameter containing:
tensor([ 0.3903, -0.3262,  0.2574, -0.3179,  0.2655, -0.2225,  0.2896, -0.2406,
        -0.4350,  0.1507], requires_grad=True)
Parameter containing:
tensor([[-0.2061,  0.2750, -0.3079, -0.2681,  0.2034, -0.2610, -0.2402,  0.2362,
         -0.1808, -0.1721],
        [ 0.1829, -0.2048,  0.0766, -0.0930, -0.2549,  0.2891,  0.1830, -0.2838,
          0.0435

In [24]:
x = torch.ones(4)
y = torch.zeros(3)
input_dim = x.size(0)
output_dim = y.size(0)

model = nn.Sequential(
    nn.Linear(input_dim, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, output_dim)
)

In [25]:
loss_function = nn.MSELoss()
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(loss, y_pred)
for param in model.parameters():
    print(param)

tensor(5.0484e-13, grad_fn=<MseLossBackward0>) tensor([-6.9663e-07, -4.0233e-07,  9.3132e-07], grad_fn=<ViewBackward0>)
Parameter containing:
tensor([[-0.1316,  0.3044, -0.2506,  0.4329],
        [-0.0340, -0.2885, -0.3310, -0.3434],
        [-0.0438, -0.0910, -0.4147,  0.1563],
        [ 0.1273, -0.3213, -0.0744,  0.3222],
        [-0.0434,  0.2286, -0.4020,  0.4764],
        [-0.1578, -0.3858,  0.0248, -0.0901],
        [-0.2886,  0.4622,  0.2869, -0.1040],
        [-0.3060,  0.1483, -0.3616,  0.0512],
        [ 0.1856,  0.4484,  0.4565, -0.4846],
        [ 0.2898,  0.0970, -0.2105, -0.1609]], requires_grad=True)
Parameter containing:
tensor([ 0.1966, -0.2068,  0.0364,  0.3109, -0.0359,  0.1440,  0.4814, -0.2367,
        -0.1686,  0.4594], requires_grad=True)
Parameter containing:
tensor([[-0.2925, -0.3080, -0.1044, -0.1796, -0.1254, -0.1158,  0.0774,  0.2063,
         -0.2110,  0.1901],
        [ 0.2637,  0.2183,  0.0645, -0.1972, -0.1961, -0.0828,  0.2845,  0.1709,
          0.0260

# SGD 방식 구현

In [26]:
data1 = torch.randn(3, 4)
print(data1)
indices = torch.tensor([1, 2])
print(indices)
print(torch.index_select(data1, 0, indices))
print(torch.index_select(data1, 1, indices))

tensor([[ 2.1894, -0.2765,  0.8382, -1.9304],
        [ 0.0274,  1.0628,  0.7160, -1.1517],
        [-0.5813,  1.1983,  2.5135, -0.7826]])
tensor([1, 2])
tensor([[ 0.0274,  1.0628,  0.7160, -1.1517],
        [-0.5813,  1.1983,  2.5135, -0.7826]])
tensor([[-0.2765,  0.8382],
        [ 1.0628,  0.7160],
        [ 1.1983,  2.5135]])


In [27]:
x = torch.ones(5000, 10)
y = torch.zeros(5000, 1)
learning_rate = 0.01
nb_epochs = 1000
minibatch_size = 256

In [28]:
input_dim = x.size(-1)
output_dim = y.size(-1)

model = nn.Sequential(
    nn.Linear(input_dim, 10),
    nn.LeakyReLU(0.1),
    nn.Linear(10, 8),
    nn.LeakyReLU(0.1),
    nn.Linear(8, 6),
    nn.LeakyReLU(0.1),
    nn.Linear(6, output_dim)
)

loss_function = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [39]:
indices = torch.randperm(x.size(0)) # 5000개의 인덱스 번호를 만들어서 섞음
print(indices)
x_batch_list = torch.index_select(x, 0, index=indices) # indices로 데이터셋을 shuffle
y_batch_list = torch.index_select(y, 0, index=indices)

tensor([3550, 4356, 3043,  ..., 3480, 1415, 2163])


In [40]:
y_batch_list[0].shape
x_batch_list = x_batch_list.split(minibatch_size, dim=0) # 행을 기준으로 미니배치 사이즈로 나눈다
y_batch_list = y_batch_list.split(minibatch_size, dim=0)
print(len(x_batch_list), len(y_batch_list))
type(y_batch_list)
type(y_batch_list[0])
x_batch_list[0]

20 20


tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]])

In [43]:
for index in range(nb_epochs):
    indices = torch.randperm(x.size(0))

    x_batch_list = torch.index_select(x, 0, index=indices)
    y_batch_list = torch.index_select(y, 0, index=indices)
    x_batch_list = x_batch_list.split(minibatch_size, dim=0)
    y_batch_list = y_batch_list.split(minibatch_size, dim=0)

    for x_minibatch, y_minibatch in zip(x_batch_list, y_batch_list):
        y_minibatch_pred = model(x_minibatch)
        loss = loss_function(y_minibatch_pred, y_minibatch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

print(loss)
for param in model.parameters():
    print(param)

tensor(3.1974e-14, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.2409,  0.3024,  0.0587, -0.0520,  0.1150,  0.0610, -0.1726,  0.3086,
         -0.1511, -0.0019],
        [ 0.1817, -0.0070, -0.1652, -0.1036, -0.1939, -0.1362,  0.2584,  0.2275,
          0.2478,  0.2951],
        [-0.0504, -0.2033, -0.1345, -0.0958,  0.3141, -0.2102,  0.0101,  0.1960,
         -0.1381, -0.2024],
        [-0.2614, -0.1803, -0.0842, -0.1633, -0.2948, -0.2351, -0.1535,  0.0328,
          0.1041, -0.2271],
        [-0.0740, -0.1019, -0.1200, -0.2486, -0.2470,  0.1968,  0.1455,  0.1978,
          0.0940, -0.1829],
        [ 0.1720, -0.0774,  0.0181, -0.3142,  0.1556,  0.0076, -0.2390, -0.1603,
         -0.1067,  0.2032],
        [-0.0384,  0.2354, -0.0733, -0.1160, -0.0572, -0.2430,  0.1680,  0.0472,
          0.1486, -0.1304],
        [ 0.2892, -0.0399,  0.0911, -0.2712,  0.2287,  0.2710, -0.1434,  0.0028,
         -0.0032, -0.1079],
        [ 0.1450, -0.1553,  0.1262,  0.0956, -0.2930,  0.27