In [None]:
# gpu사용
import torch

print(torch.cuda.is_available())  # True면 GPU 사용 가능!
print(torch.cuda.get_device_name(0))  # GPU 이름 출력
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x = torch.tensor([1.0, 2.0]).to(device)

In [1]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super().__init__()
        # 명시적으로 인자명을 써주는 경우도 많이 쓰인다고 한다.
        self.linear = nn.Linear(in_features = input_dim, out_features = output_dim)
        self.activation = nn.ReLU() # 시그모이드 함수
        # self.activation = nn.Sigmoid()
    def forward(self, x):
        return self.activation(self.linear(x)) # 리니어 통과 후 활성화 통

In [2]:
x = torch.ones(4) # input
y = torch.zeros(3) # out
model = LinearRegressionModel(4,3)
loss_function = nn.MSELoss()

In [3]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(nb_epochs + 1):

    y_pred = model(x)
    loss = loss_function(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [4]:
print(loss) # sigmoid에 비해 relu가 엄청나게 오차를 줄인다. 
for param in model.parameters():
    print(param)

tensor(0., grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[ 0.0442, -0.4486,  0.1079, -0.1035],
        [ 0.0588,  0.2621, -0.4610, -0.0590],
        [ 0.1966,  0.0600,  0.1223, -0.0958]], requires_grad=True)
Parameter containing:
tensor([-0.4836,  0.0704, -0.4542], requires_grad=True)


## 다층 레이어 구현

In [5]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):

    def __init__(self,input_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, 10)
        self.linear2 = nn.Linear(10, 10)
        self.linear3 = nn.Linear(10, 10)
        self.linear4 = nn.Linear(10, output_dim)
        self.activation = nn.LeakyReLU(0, 1)

    def forward(self, x):
        #|x| = (input_dim, output_dim)
        hidden = self.activation(self.linear1(x))
        hidden = self.activation(self.linear2(hidden))
        hidden = self.activation(self.linear3(hidden))
        y = self.linear4(hidden) # 마지막 출력에는 activation 함수를 사용하지 않는 것이 일반적
        return y # 가시성을 위해 분리해서 쓰는겁니다.

In [6]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [7]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [8]:
print(loss) # sigmoid에 비해 relu가 엄청나게 오차를 줄인다. 
for param in model.parameters():
    print(param)

tensor(7.1288e-09, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.2797, -0.1138,  0.3802, -0.0796],
        [-0.3490, -0.4342,  0.2225, -0.2356],
        [-0.4482,  0.1348, -0.0949, -0.1305],
        [-0.1516, -0.1979,  0.2130,  0.1586],
        [-0.0760,  0.3355, -0.2252, -0.3781],
        [ 0.2506,  0.3882,  0.2106,  0.3340],
        [ 0.4139,  0.4783, -0.4661,  0.3483],
        [-0.2251,  0.3306,  0.4859,  0.3596],
        [-0.4595, -0.1920,  0.0952,  0.4251],
        [-0.2760, -0.2601,  0.2324, -0.4446]], requires_grad=True)
Parameter containing:
tensor([-0.2905,  0.2244, -0.0686,  0.3783,  0.3194,  0.4856, -0.4331, -0.2762,
        -0.4506,  0.4404], requires_grad=True)
Parameter containing:
tensor([[ 0.1812,  0.1498, -0.2104, -0.3030,  0.1367, -0.0837, -0.2702,  0.0624,
          0.2621, -0.2911],
        [ 0.1741, -0.0866, -0.2045, -0.0576, -0.1272, -0.0456,  0.2864, -0.2122,
         -0.1366,  0.0176],
        [-0.2956,  0.1266,  0.2424,  0.2591, -0.1698, -0.1345

## nn.Sequential

In [9]:
# 이거 이용하면 더 쉽게 사용 가능하다
x = torch.ones(4)
y = torch.zeros(3)

input_dim = x.size(0)
output_dim = y.size(0)

model = nn.Sequential(
    nn.Linear(input_dim, 10),
    nn.LeakyReLU(0, 1),
    nn.Linear(10, 10),
    nn.LeakyReLU(0, 1),
    nn.Linear(10, 10),
    nn.LeakyReLU(0, 1),
    nn.Linear(10, output_dim)
)

In [10]:
loss_function = nn.MSELoss()
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [11]:
for epoch in range(nb_epochs + 1):

    y_pred = model(x)
    loss = loss_function(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [12]:
print(loss)
for param in model.parameters():
    print(param)

tensor(6.6500e-10, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.4482, -0.3585, -0.1033,  0.1997],
        [ 0.4611, -0.4423,  0.1648,  0.2349],
        [-0.2190,  0.2594,  0.4981, -0.0503],
        [-0.2283,  0.1484, -0.3257, -0.1747],
        [ 0.3029, -0.1750,  0.2513,  0.1611],
        [-0.2855,  0.2584, -0.4564,  0.2334],
        [-0.4482, -0.4303,  0.1178, -0.0039],
        [ 0.2350,  0.1737, -0.3141, -0.3255],
        [-0.3692, -0.4436,  0.1716,  0.3662],
        [ 0.0715, -0.1749, -0.0785, -0.1780]], requires_grad=True)
Parameter containing:
tensor([-0.1899, -0.1265,  0.3498, -0.0259,  0.0619,  0.1063, -0.1283, -0.2750,
        -0.3074,  0.3633], requires_grad=True)
Parameter containing:
tensor([[ 1.0052e-01,  2.4639e-01,  1.3499e-01, -2.2844e-01,  2.1062e-01,
          1.5670e-01,  4.8534e-02,  1.8237e-01, -2.9702e-01,  9.8999e-02],
        [ 1.3333e-01,  2.8756e-01, -4.2289e-02, -2.8050e-01, -4.3295e-03,
         -2.8505e-01, -1.3696e-01, -1.0937e-05, -1.1120e

In [1]:
# SGD 방식 구현
# 랜덤하게 데이터를 섞기 위한 함수
# torch.randperm(n): 0 ~ n-1 까지의 정수를 랜덤하게 섞어서 순열(배열)을 만들어 준다.
# torch.index_select(텐서객체, 차원번호, 인덱스텐서), 차원번호 = axis를 의미
# 특정 차원의 나열된 인덱스 번호 순서대로 데이터를 섞어준다.

In [7]:
import torch
import torch.nn as nn

data1 = torch.randn(3,4)
print(data1)
indices = torch.tensor([1,2])
print(indices)
print(torch.index_select(data1, 0, indices))
print(torch.index_select(data1, 1, indices))

tensor([[-0.1511, -0.5594,  0.2022,  1.9242],
        [ 1.1306,  0.6468, -0.5747,  1.7867],
        [-0.3386,  0.2676, -0.1767, -0.6475]])
tensor([1, 2])
tensor([[ 1.1306,  0.6468, -0.5747,  1.7867],
        [-0.3386,  0.2676, -0.1767, -0.6475]])
tensor([[-0.5594,  0.2022],
        [ 0.6468, -0.5747],
        [ 0.2676, -0.1767]])


In [69]:
# 테스트 구현
x = torch.ones(5000, 10).to(device)
y = torch.zeros(5000, 1).to(device)
learning_rate = 0.01
nb_epochs = 1000
minibatch_size = 256

In [73]:
input_dim = x.size(-1)
output_dim = y.size(-1)

# 보통 히든레이어는 출력에 가까울 수록 작아지게 설계하는 것이 일반적 => 성능이 좋아짐
model = nn.Sequential(
    nn.Linear(input_dim, 10),
    nn.LeakyReLU(0, 1),
    nn.Linear(10,8),
    nn.LeakyReLU(0, 1),
    nn.Linear(8,6),
    nn.LeakyReLU(0, 1),
    nn.Linear(6, output_dim)
).to(device)

loss_function = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [74]:
indices = torch.randperm(x.size(0), device = device) # 섞어주는 역할을 한다.
print(indices)
x_batch_list = torch.index_select(x, 0, index=indices)
y_batch_list = torch.index_select(y, 0, index=indices)

x_batch_list = x_batch_list.split(minibatch_size, dim = 0)
y_batch_list = y_batch_list.split(minibatch_size, dim = 0)
print(len(x_batch_list), len(y_batch_list))

tensor([2766,  503,  707,  ..., 2367, 1627, 2916], device='cuda:0')
20 20


In [75]:
for index in range(nb_epochs):
    indices = torch.randperm(x.size(0),device=device)

    x_batch_list = torch.index_select(x, 0, index=indices)
    y_batch_list = torch.index_select(y, 0, index=indices)
    x_batch_list = x_batch_list.split(minibatch_size, 0)
    y_batch_list = y_batch_list.split(minibatch_size, 0)

    for x_minibatch, y_minibatch in zip(x_batch_list, y_batch_list):
        y_minibatch_pred = model(x_minibatch)
        loss = loss_function(y_minibatch_pred, y_minibatch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

print(loss)
for param in model.parameters():
    print(param)

tensor(8.6736e-15, device='cuda:0', grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.1305, -0.1369,  0.0398, -0.0706,  0.2254, -0.0169, -0.3097, -0.2033,
          0.0706,  0.0916],
        [ 0.0955, -0.1266,  0.1437,  0.2260, -0.0416,  0.2009,  0.2622, -0.1392,
          0.0106,  0.2474],
        [-0.0610, -0.1722,  0.2486,  0.2016, -0.0188,  0.2550, -0.1352, -0.0380,
          0.1016,  0.1776],
        [ 0.1927,  0.1325,  0.1567, -0.2462,  0.2293, -0.0229,  0.1546,  0.2015,
         -0.1022, -0.2912],
        [ 0.0740,  0.1997, -0.1856,  0.2691,  0.0416,  0.0753, -0.0446, -0.2937,
          0.2463,  0.1638],
        [ 0.2751,  0.2136,  0.2546,  0.1048,  0.1698,  0.1316, -0.2045,  0.1996,
          0.1780, -0.1922],
        [ 0.1074, -0.0976, -0.0408, -0.1969,  0.3005, -0.0718,  0.2840, -0.2194,
          0.1256,  0.1147],
        [ 0.2191, -0.0606,  0.0261, -0.2185,  0.2693, -0.1868,  0.0330,  0.1075,
         -0.0313, -0.0686],
        [-0.1162, -0.0597, -0.1418,  0.191

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x = torch.tensor([1.0, 2.0]).to(device)

In [76]:
# 다양한 실험 결과와 그 대의 설정값을 관리해주는 프레임워크
# MLFlow, WanDB 가 있다.

# 주요 Optimizer
# 딥러닝의 학습은 결국 손실값을 가장 작게 모델을 만드는 것
# 즉, 학습은 손실함수(liss function) 최소값을 찾아가는 과정이며 이 과정을 최적화(Optimization) 이라고 한다.
# 대표적인게 SGD임
# 이 외에도 다양한 Optimizer가 제안되고 있다. ex) adam, momentum(sgd에 관성을 추가한거임), 