<a href="https://colab.research.google.com/github/21400126/Ionic/blob/master/1_NeuralNetworks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

nn.Module : contains layers

forward(input) : returns the output

Training procedure
Define the neural network that has some learnable parameters (or weights) -> Iterate over a dataset of inputs -> Process input through the network
-> Compute the loss (how far is the output from being correct) -> Propagate gradients back into the network’s parameters 
->Update the weights of the network, typically using a simple update rule: weight = weight - learning_rate * gradient

In [0]:
#Define the network
import torch
import torch.nn as nn
import torch.nn.functional as F    #Neural Network 만들때 필요함

class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    # nn.Conv2d will take in a 4D tensor of "nSamples * nChannels * height * width"
    self.conv1 = nn.Conv2d(1,6,3)       # 1input image channel, 6 output channels, 3x3 square convolution  
    self.conv2 = nn.Conv2d(6,16,3)
    
    self.fc1 = nn.Linear(16*6*6,120)       #6*6 from image dimension // input_dim, output_dim // 6*6은 무엇인가...
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)
  
  def forward(self,x):
    # Max pooling over a (2,2) window
    x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))       #???
    x = F.max_pool2d(F.relu(self.conv2(x)),2)        # size가 squeare라면 single number 사용 가능 => out_dim이 16이라서 square?
    
    x = x.view(-1,self.num_flat_features(x)) 
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x
  
  def num_flat_features(self, x): # 하나의 열벡터로 만들어준다
    size = x.size()[1:]   
    num_features = 1
    for s in size :
      num_features *= s                     #?
    return num_features
    
net = Net()
print(net)

params = list(net.parameters())            # 밑으로 이해 불가
print(len(params))
print(params[0].size())                     # 파라미터 자신들이 만들어줌

input = torch.randn(1,1,32,32)              # input 
out = net(input)                              
print(out)

'''
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss
'''

net.zero_grad() # back / forward할 시 gradient가 0가 되야 안정적이라고 한다. 밑에서 backward할 거니 미리 zero_grad로 만들자
out.backward(torch.randn(1,10)) #backward를 통해 학습을 진행

#Loss Function
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()

loss = criterion(output, target)  #loss가 몇인 지 알아서 구해줌
print(loss)

print(loss.grad_fn) #MSELoss
print(loss.grad_fn.next_functions[0][0]) #Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) #ReLu

#Backprop
net.zero_grad() 

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()             #back propagation하면서 loss구해주고 training도 진행되는 과정

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

# Update the weights

'''   Simple python code
learning_rate = 0.01
for f in net.parameters():
  f.data.sub_(f.grad.date * learnig_rate)
 '''
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)  # create optimizer

# in training loop
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Update, gradient을 각 parameter에 더해준다.

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
10
torch.Size([6, 1, 3, 3])
tensor([[ 0.0201, -0.0864,  0.0156, -0.0673, -0.0786, -0.0141, -0.0091,  0.0640,
         -0.0345, -0.0115]], grad_fn=<AddmmBackward>)
tensor(0.8450, grad_fn=<MseLossBackward>)
<MseLossBackward object at 0x7f678a9b6c18>
<AddmmBackward object at 0x7f678a9b6dd8>
<AccumulateGrad object at 0x7f678a8f6fd0>
conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0036, -0.0081, -0.0060,  0.0121, -0.0029,  0.0072])
