### torch.nn.Module
- 딥러닝을 구성한든 Layer의 base class
- input, output, forward, backward(AutoGrad) 정의
- 학습의 대상이 되는 parameter(tensor) 정의

## nn.Parameter
- weigh 값을 정의하는 class
- Tensor 객체의 상속 객체
- nn.Module 내에 attribute가 될 때는 required_grad = True으로 자동 지정되어, AutoGrad의 대상이되어 학습 대상이 되는 Tensor
    - 또한 자동으로 module의 parameter의 list에 속하게되어 parameters() iterator에서 등장하게 됨
    - 또한 module.state_dict() 에 자동 저장.


#### Example ; linear function (xw+b)

In [3]:
import torch
from torch import nn
from torch import Tensor

# layer를 구성하는 base class인 nn.Module을 상속
class MyLiner(nn.Module):
    # 'in_features'개의 features를 'out_features'개의 feature로.
    # bias는 기본적으로 존재한다.
    def __init__(self, in_features, out_features, bias = True):
        # nn.Modeule의 init을 기본적으로 상속
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        # weight는 보통 이렇게 까지 직접 지정해주지 않는다.
        # weight을 정의하는 nn.Parameter   
        # nn.Parameter가 nn.Module 안에 속성이 됐으므로 자동으로 학습대상으로 지정됨(AutoGrad의 대상이 됨). required_grad = True. 
        self.weights = nn.Parameter(
                # input features 개수 X output features의 크기를 갖는 정규분포 난수 weight 생성
                torch.randn(in_features,out_features)
        )

        # Bias 가 True일 때, output feature 개수만큼의 bias 가짐
        self.bias = nn.Parameter(torch.randn(out_features))

    # linear function 
    def forward(self, x : Tensor):
        return x @ self.weights + self.bias


In [4]:
# 5개의 data, 7 features
x = torch.randn(5,7)

In [5]:
x

tensor([[ 0.1239, -1.2761, -0.3707,  2.0381, -0.0659,  0.3456,  0.2790],
        [-0.1320, -1.2445, -0.3816,  0.8433,  0.2786, -0.7823, -0.6969],
        [-0.3531,  0.8200, -1.6397,  1.0144,  0.2942,  0.0257, -0.1580],
        [ 0.5962,  0.1025,  0.6492,  0.0306, -0.5311,  0.7606, -1.4780],
        [ 0.3175, -0.7066, -1.2907, -0.8492,  1.5602,  0.0400,  2.1103]])

In [1]:
# feature를 7에서 12개로 바꿔주고 싶다.
# MyLiner에 value를 넣어주면 forward가 실행됨
layer = MyLiner(7,12)
layer(x).shape

NameError: name 'MyLiner' is not defined

#### .parameter()
- 미분 대상이 되는 parameter 값들 확인
    - nn.Module 안에 nn.Parameter로 생성한 값들 확인
- weight, bias 값 확인

In [10]:
# weight and bias
for value in layer.parameters():
    print(value)

Parameter containing:
tensor([[ 1.3592, -1.0374, -1.7271,  0.6607,  0.3371, -1.5515, -0.4676, -0.6223,
          1.9363, -0.8209,  0.3722,  0.1109],
        [ 1.5672, -0.4507, -1.2125,  0.3652,  0.0824, -1.4784, -2.4415,  0.0743,
          0.1778,  0.0105, -0.5882, -1.0168],
        [ 0.3382, -1.0327,  0.6512, -1.1582,  0.2984, -0.4147, -0.0923, -0.6165,
         -1.0578,  0.4718,  0.9239,  0.3562],
        [-0.5480,  0.5679,  0.1860,  2.2391,  0.5212, -0.8554, -0.1859, -1.2038,
          0.4902,  0.5797,  0.5584,  1.2825],
        [-0.6254,  0.1903,  0.4651,  1.8050, -0.6152,  0.2653,  0.1783,  0.4419,
         -2.1835, -0.6811, -1.1891,  0.2289],
        [ 0.5388,  1.5717,  1.2475, -0.7371, -1.7043,  0.4437, -0.2624,  1.1333,
         -0.5116, -0.7372, -1.9843,  0.9637],
        [ 0.4704, -0.2824, -0.5290, -0.3489, -1.1780,  0.4061, -0.1251,  0.7267,
         -1.3462,  0.7394,  1.5415,  0.4353]], requires_grad=True)
Parameter containing:
tensor([ 1.3415,  2.1047,  1.8075, -2.1859, -0

nn.Parameter 아닌 그냥 Tensor로 wieght 생성했을 때는
parameter로 호출 안된다. 그리고 weight가 미분대상으로 지정되지 않는다.

In [8]:
import torch
from torch import Tensor
from torch import nn
class MyLiner_tensor(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        self.weights = Tensor(
                torch.randn(in_features, out_features))
        
        self.bias = Tensor(torch.randn(out_features))
        self.register_buffer('weight',self.weights)
    def forward(self, x : Tensor):
        return x @ self.weights + self.bias


layer = MyLiner_tensor(7, 12)
x = torch.randn(5,7)
layer(x).shape


torch.Size([5, 12])

In [9]:
# parameter로 호출 안된다
for value in layer.parameters():
    print(value)

### register_buffer(name,tensor,persistent=True)
- Parameter가 아니어서 자동으로 module에 저장되지는 않지만 따로 저장하고싶은 tensor(module state 등)를 저장
- module.name 으로 접근 가능
- persistent = True 이면 state_dict()에 저장됨. False면 안저장됨

buffer에 추가한 weight가 parameter에는 없지만 layer의 buffer로서 존재<br>state_dict에도 존재

In [14]:
layer.weight

tensor([[ 1.2949, -0.7042,  1.2433,  0.0889,  0.7479,  0.7894,  0.1380, -0.6044,
         -0.7085, -1.3978,  0.2104, -0.0603],
        [-0.7519,  0.0176,  2.5754,  1.8879,  0.5698,  0.9581,  0.6077,  0.0663,
          1.3105,  1.5825,  0.7167, -0.1105],
        [-0.2787, -0.4755, -0.5045, -1.2384,  0.5735, -0.4494,  0.1217, -0.0380,
         -1.9169, -1.2761,  0.7776, -0.6836],
        [-0.0784, -1.6489,  0.6718, -0.0531, -0.9055, -0.7005,  0.2103, -0.6881,
         -0.0070, -1.1194,  0.8467, -0.4971],
        [ 0.0262,  0.7233, -2.9718,  0.1400,  0.4020,  1.6114,  2.2128, -0.6321,
          0.2116,  0.5547,  0.8738,  0.3467],
        [ 0.3371,  0.2767,  1.2433, -0.9884,  0.0419, -0.6650, -1.0619,  0.1339,
         -0.4209,  0.0396, -0.7020,  0.7804],
        [-0.5884, -1.0025,  0.3156, -0.7788,  0.4656, -1.0590,  1.3068,  0.0381,
         -1.0292,  0.5219, -0.0239, -0.5336]])

In [15]:
layer.state_dict()

OrderedDict([('weight',
              tensor([[ 1.2949, -0.7042,  1.2433,  0.0889,  0.7479,  0.7894,  0.1380, -0.6044,
                       -0.7085, -1.3978,  0.2104, -0.0603],
                      [-0.7519,  0.0176,  2.5754,  1.8879,  0.5698,  0.9581,  0.6077,  0.0663,
                        1.3105,  1.5825,  0.7167, -0.1105],
                      [-0.2787, -0.4755, -0.5045, -1.2384,  0.5735, -0.4494,  0.1217, -0.0380,
                       -1.9169, -1.2761,  0.7776, -0.6836],
                      [-0.0784, -1.6489,  0.6718, -0.0531, -0.9055, -0.7005,  0.2103, -0.6881,
                       -0.0070, -1.1194,  0.8467, -0.4971],
                      [ 0.0262,  0.7233, -2.9718,  0.1400,  0.4020,  1.6114,  2.2128, -0.6321,
                        0.2116,  0.5547,  0.8738,  0.3467],
                      [ 0.3371,  0.2767,  1.2433, -0.9884,  0.0419, -0.6650, -1.0619,  0.1339,
                       -0.4209,  0.0396, -0.7020,  0.7804],
                      [-0.5884, -1.0025,  0.31

### Parameter 미분
- 'Forward의 결과값(예측치,y^)과 실제값 간의 차이', Loss에 대해 미분을 수행 -> Autograd, backward 함수 
- Parameter 업데이트


In [None]:
#epoch : batch를 한번 돌리는것
for epoch in range(epochs):
    # 앞 epoch의 grad가 지금 학습에 영향을 주면 안된다. 
    # gradient 초기화
    optimizer.zero_grad()

    #y^, 예측치, 결과값
    outputs = model(input)

    # get loss
    #y^: output과 y:lable 간의 loss 값 
    loss = creterion(outputs,labels)
    print(loss)
    # get gradients w.r.t to parameters
    # weight에 대해 loss를 미분간 값을 보여줌
    loss.backward()

    #epoch 마다 update
    #update parameters
    #loss를 줄이는 방향으로 weight 업데이트
    optimizer.step()

## AutoGrad for Linear Regression

In [14]:
import numpy as np
# create dummy data for training
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)

y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)

In [15]:
x_train

array([[ 0.],
       [ 1.],
       [ 2.],
       [ 3.],
       [ 4.],
       [ 5.],
       [ 6.],
       [ 7.],
       [ 8.],
       [ 9.],
       [10.]], dtype=float32)

In [16]:
y_train

array([[ 1.],
       [ 3.],
       [ 5.],
       [ 7.],
       [ 9.],
       [11.],
       [13.],
       [15.],
       [17.],
       [19.],
       [21.]], dtype=float32)

In [17]:
import torch
from torch.autograd import Variable
class LinearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(LinearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        out = self.linear(x)
        return out

In [18]:
inputDim = 1        # takes variable 'x' 
outputDim = 1       # takes variable 'y'
learningRate = 0.01 
epochs = 100

model = LinearRegression(inputDim, outputDim)
# ##### For GPU #######
# if torch.cuda.is_available():
#     model.cuda()

loss값, optimizer 설정
- optimize의 대상, learning rate 설정

In [20]:
# loss 는 mse 채택
criterion = torch.nn.MSELoss() 
# optimizer 는 SGD채택, optimize의 대상은 model.parameters() 입니다. lr 설정.
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

학습
1. output (model에 input 넣기)
2. loss 값 도출
3. 미분(backward)
4. step(업데이트)

In [None]:
for epoch in range(epochs):
    # Converting inputs and labels to Variable
    if torch.cuda.is_available():
        inputs = Variable(torch.from_numpy(x_train).cuda())
        labels = Variable(torch.from_numpy(y_train).cuda())
    else:
        inputs = Variable(torch.from_numpy(x_train))
        labels = Variable(torch.from_numpy(y_train))

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad()

    # get output from the model, given the inputs
    # forward 값 도출
    outputs = model(inputs)

    # get loss for the predicted output
    loss = criterion(outputs, labels)
    print(loss)
    # get gradients w.r.t to parameters
    loss.backward()

    # update parameters
    # all optimizers implement a step() method, that updates the parameters.
    optimizer.step()

    print('epoch {}, loss {}'.format(epoch, loss.item()))

test : 모델 학습 후 예측값 도출

In [25]:
# grad 없이 간다.
# we don't need gradients in the testing phase
with torch.no_grad():
    if torch.cuda.is_available():
        predicted = model(Variable(torch.from_numpy(x_train).cuda())).cpu().data.numpy()
    else:
        predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
    print(predicted)


[[ 1.1247633]
 [ 3.1067963]
 [ 5.088829 ]
 [ 7.070862 ]
 [ 9.052895 ]
 [11.034928 ]
 [13.016961 ]
 [14.998994 ]
 [16.981026 ]
 [18.96306  ]
 [20.945093 ]]


In [26]:
y_train

array([[ 1.],
       [ 3.],
       [ 5.],
       [ 7.],
       [ 9.],
       [11.],
       [13.],
       [15.],
       [17.],
       [19.],
       [21.]], dtype=float32)

Parameter 확인(witght, bias)

In [27]:
for p in model.parameters():
    if p.requires_grad:
         print(p.name, p.data)

None tensor([[1.9820]])
None tensor([1.1248])


## Logistic Regression

In [None]:
##logistic regression
class LR(nn.Module):
    def __init__(self, dim, lr=torch.scalar_tensor(0.01)):
        super(LR, self).__init__()
        # intialize parameters
        # 직접 미분을 아래에서 해줄거라 requires_grad = True 해줄 필요 없다.
        self.w = torch.zeros(dim, 1, dtype=torch.float).to(device)
        self.b = torch.scalar_tensor(0).to(device)
        self.grads = {"dw": torch.zeros(dim, 1, dtype=torch.float).to(device),
                      "db": torch.scalar_tensor(0).to(device)}
        self.lr = lr.to(device)

    def forward(self, x):
        ## compute forward
        z = torch.mm(self.w.T, x) + self.b
        a = self.sigmoid(z)
        return a

    def sigmoid(self, z):
        return 1/(1 + torch.exp(-z))

    def backward(self, x, yhat, y):
        ## compute backward
        # loss function에 대해, 각 변수로 미분하고, value를 넣어준 것을 각 grad에 할당
        self.grads["dw"] = (1/x.shape[1]) * torch.mm(x, (yhat - y).T)
        self.grads["db"] = (1/x.shape[1]) * torch.sum(yhat - y)
    
    def optimize(self):
        ## optimization step
        # 미분값grad 만큼 업데이트 해줌
        self.w = self.w - self.lr * self.grads["dw"]
        self.b = self.b - self.lr * self.grads["db"]

## utility functions
def loss(yhat, y):
    m = y.size()[1]
    return -(1/m)* torch.sum(y*torch.log(yhat) + (1 - y)* torch.log(1-yhat))

def predict(yhat, y):
    y_prediction = torch.zeros(1, y.size()[1])
    for i in range(yhat.size()[1]):
        if yhat[0, i] <= 0.5:
            y_prediction[0, i] = 0
        else:
            y_prediction[0, i] = 1
    return 100 - torch.mean(torch.abs(y_prediction - y)) * 100

In [None]:
## model pretesting
x, y = next(iter(train_dataset))

## flatten/transform the data
x_flatten = x.T
y = y.unsqueeze(0) 

## num_px is the dimension of the images
dim = x_flatten.shape[0]

## model instance
model = LR(dim)
model.to(device)
yhat = model.forward(x_flatten.to(device))
yhat = yhat.data.cpu()

## calculate loss
cost = loss(yhat, y)
prediction = predict(yhat, y)
print("Cost: ", cost)
print("Accuracy: ", prediction)

## backpropagate
model.backward(x_flatten.to(device), yhat.to(device), y.to(device))
model.optimize()