<a href="https://colab.research.google.com/github/2018007956/HYU/blob/main/Deep_Learning%5C5_Regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Overfitting
2. L1 L2 Regularization
3. Dropout
4. Normalization

# 1. Overfitting
Sol) Training data 늘림, Regularization, Dropout

# 2. L1 L2 Regularization

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

class LogisticRegression(nn.Module):
  def __init__(self, x_in, x_out):
    super(LogisticRegression, self).__init__()
    self.linear = nn.Linear(x_in, x_out)
    self.activation = nn.Sigmoid()
  def forward(self, x):
    z = self.linear(x)
    a = self.activation(z)
    return a

model = LogisticRegression(2, 1)

In [4]:
for name, param in model.named_parameters():
  print('========================')
  print(name)
  print(param.shape)
  print(param)

linear.weight
torch.Size([1, 2])
Parameter containing:
tensor([[-0.3049,  0.6672]], requires_grad=True)
linear.bias
torch.Size([1])
Parameter containing:
tensor([-0.3871], requires_grad=True)


## L1 loss in LogisticRegression
reg = model.linear.weight.abs().sum()

In [5]:
print(model.linear.weight)
print(model.linear.weight.abs().sum())

Parameter containing:
tensor([[-0.3049,  0.6672]], requires_grad=True)
tensor(0.9720, grad_fn=<SumBackward0>)


## L2 loss in LogisticRegression
reg = model.linear.weight.pow(2.0).sum()

In [6]:
print(model.linear.weight)
print(model.linear.weight.pow(2.0).sum())

Parameter containing:
tensor([[-0.3049,  0.6672]], requires_grad=True)
tensor(0.5381, grad_fn=<SumBackward0>)


## Random Seed 고정
- 모델 weight가 생성될 때마다 random하게 생성됨
- 성능 비교를 하기위해 값을 고정하는게 좋다
  - 어떤 random한 값에서는 좋게 나오고 다른 값에서는 나쁘게 나올 수 있음

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [10]:
print(model.linear.weight[0,1])

tensor(0.6672, grad_fn=<SelectBackward0>)


## Cifar-10

In [7]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.CIFAR10(root='CIFAR10/',
                                             train=True,
                                             transform=transforms.ToTensor(),
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root='CIFAR10/',
                                             train=False,
                                             transform=transforms.ToTensor(),
                                             download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting CIFAR10/cifar-10-python.tar.gz to CIFAR10/
Files already downloaded and verified


## Model Structure
Input size: 32*32*3  
Output size: 10

In [16]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [14]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [12]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(32*32*3, 256)
    self.linear2 = nn.Linear(256, 128)
    self.linear3 = nn.Linear(128, 10)

    self.activation = nn.Sigmoid()

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.iinear2(a1)
    a2 = self.activation(z2)

    z3 = self.lienar3(a2)

    return z3

In [18]:
model = Model().to(device).train()

In [19]:
optimizer = optim.SGD(model.parameters(), lr=1)

In [20]:
criterion = nn.CrossEntropyLoss()

In [17]:
epochs = 70
lmbd = 0.003

train_avg_costs = []
test_avg_costs = []

test_total_batch = len(test_dataloader)
total_batch_num = len(train_dataloader)

for epoch in range(epochs):
  avg_cost = 0
  model.train()
  for b_x, b_y in train_dataloader:
    b_x = b_x.view(-1, 32*32*3).to(device)
    logits = model(b_x) # forward propagation
    loss = criterion(logits, b_y.to(device)) # get cost

    # L2 Regularization
    reg = model.linear1.weight.pow(2.0).sum()
    reg += model.linear2.weight.pow(2.0).sum()
    reg += model.lienar3.weight.pow(2.0).sum()
    loss += lmbd*reg/len(b_x)/2.

    optimizer.zero_grad()
    loss.backward() # backward propagation
    optimizer.step() # update parameters

    avg_cost += loss/total_batch_num
  train_avg_costs.append(avg_cost.detach().cpu())
  print('Epoch : {} / {}, cost: {}'.format(epoch+1, epochs, avg_cost))

  test_avg_cost=0
  model.eval()
  for b_x, b_y in test_dataloader:
    b_x = b_x.view(-1, 32*32*3).to(device)
    with torch.no_grad():
      logits = model(b_x)
      test_loss = criterion(logits, b_y.to(device)) # get cost 
    test_avg_cost += test_loss / test_total_batch 

  test_avg_costs.append(test_avg_cost.detach().cpu())

RuntimeError: ignored

In [None]:
import matplotlib.pyplot as plt
import numpy as np
epoch = range(epochs)
plt.plot(epoch.train_avg_costs, 'r-')
plt.plot(epoch, test_avg_costs, 'b-')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['train','test'])
plt.show()

#3. Dropout

In [21]:
class Model(nn.Module):
  def __init__(self, drop_prob):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(32*32*3, 256)
    self.linear2 = nn.Linear(256, 128)
    self.linear3 = nn.Linear(128, 10)

    self.dropout = nn.Dropout(drop_prob)
    self.activation = nn.Sigmoid()

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)
    a1 = self.dropout(a1)

    z2 = self.iinear2(a1)
    a2 = self.activation(z2)
    a2 = self.dropout(a2)

    z3 = self.lienar3(a2)

    return z3

In [22]:
model = Model(0.1).to(device).train()

#4. Normalization

In [None]:
import torchvision
import torchvision.transforms as train

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))]
)

train_dataset = torchvision.datasets.CIFAR10(root='CIFAR10/',
                                             train=True,
                                             transform=transforms.ToTensor(),
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root='CIFAR10/',
                                             train=False,
                                             transform=transforms.ToTensor(),
                                             download=True)