<a href="https://colab.research.google.com/github/Jee-9/Study/blob/main/ResNet101_notworked.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torch import optim
from torch.optim.lr_scheduler import StepLR

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import os

from torchvision import utils
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import time
import copy

import json

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
train_transformation = transforms.Compose([
                                           transforms.Resize((224,224)),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

dataset = torchvision.datasets.ImageFolder(root = '/content/drive/MyDrive/PROJECT SHARING FILES/imageData', transform = train_transformation)
# torchvision is not from Facebook

# train test split
from torchvision import datasets, models, transforms

train_size = int(0.8*len(dataset))
test_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_dl = torch.utils.data.DataLoader(train_dataset, batch_size = 16, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_dataset, batch_size = 16, shuffle=True)

In [None]:
for i, l in dataset:
  print(i.shape)
  print(l)
  break

torch.Size([3, 224, 224])
0


In [None]:
for i, l in train_dl:
  print(i.shape)
  print(l)
  break

torch.Size([16, 3, 224, 224])
tensor([2, 1, 3, 2, 0, 2, 1, 2, 2, 0, 0, 3, 3, 0, 0, 2])


In [None]:
# from torchvision import datasets, models, transforms

# train_size = int(0.8*len(dataset))
# test_size = len(dataset) - train_size

# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [None]:
print(len(dataset))
print(len(val_dl)) # batch size

49836
623


In [None]:
print(train_dl.dataset)

<torch.utils.data.dataset.Subset object at 0x7fd979978c50>


In [None]:
class BottleNeck(nn.Module):
  expansion = 4 # BottleNeck을 사용하는 50 레이어부터는 * 4 로 채널 올려줌
  def __init__(self, in_channels, out_channels, stride=1):
    super().__init__()

    self.residual_function = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False), # bias = False 주는 이유는 BatchNorm에서 bias 들어가서
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False), ####
        # padding=1이 output size = output size 아닌가? 여기 padding = 'same' 안주고 1 주는 이유는 뭘까 ㅠㅠ 
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
        nn.BatchNorm2d(out_channels * BottleNeck.expansion), # 64 * 4 = 256 , 128 * 4 = 512, 256 * 4 = 1024
    )

    self.shortcut = nn.Sequential() ## identity shortcut
    self.relu = nn.ReLU()

    if stride != 1 or in_channels != out_channels * BottleNeck.expansion: ## 가운데 conv2d가 stride != 1이라 변화하지 않는 경우를 말하는 듯
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=stride, bias=False), # 위와 같은 경우에는 shortcut 인자에 다음과 같은 연산을 ++++ 해준다
          nn.BatchNorm2d(out_channels*BottleNeck.expansion) # BatchNorm 적용 out channels는 X4해서 내보냄.
      ) ## projection shortcut
      '''
      projection shortcut 기법을 사용한 걸로 보이는데 대부분의 경우 identity shortcut보다 projection shortcut 기법을 사용하는 것이 더 정확하지만,
      사실 모델의 복잡도가 크게 증가하는 방식이기 때문에 쓰지 않는 경우도 많이 있다고 합니다.
      '''

  def forward(self, x):
    x = self.residual_function(x) + self.shortcut(x)
    x = self.relu(x)
    return x

In [None]:
class ResNet(nn.Module):
  def __init__(self, block, num_block, num_classes=4, init_weights=True): 
    # init_weights : w의 초기값을 카이밍 히 normalization 사용하겠다는 뜻인거 같음, ResNet에서는 weight initialization 방법으로 he normalization 사용함.
    '''
    Tensorflow ResNet 모델 구현할 때 
      x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name_base+'2b')(x)
      x = BatchNormalization(axis=3, name=bn_name_base+'2b')(x)
      x = Activation('relu')(x)
    다음과 같이 레이어 쌓는 것과 같은 원리이고 kernel_initializer = 'he_normal'을 여기서는 init_weights로 사용.
    '''
    super().__init__()

    self.in_channels=64
    # 그림 보면 알겠지만 무조건 첫채널 64 !!

    self.conv1 = nn.Sequential(
        nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    ) # 여기는 모든 크기의 ResNet에서 공통적인 시작 부분 , kernel_size = 7로 주는 게 특징임.

    # Stacking layers
    self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
    self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
    self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
    self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

    self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
    self.fc = nn.Linear(512*block.expansion, num_classes)

    if init_weights: # initialize weights : w 초기값 주는 방법
      self._initialize_weights()

  def _make_layer(self, block, out_channels, num_blocks, stride):
    strides = [stride] + [1] * (num_blocks-1) # 이거 무슨 문법,,,,? ㅠ
    layers=[]
    for stride in strides:
      layers.append(block(self.in_channels, out_channels, stride))
      self.in_channels = out_channels * block.expansion
    return nn.Sequential(*layers)

  def forward(self, x):
    output = self.conv1(x)
    output = self.conv2_x(output)
    x = self.conv3_x(output)
    x = self.conv4_x(x)
    x = self.conv5_x(x)
    x = self.avg_pool(x)
    x = x.view(x.size(0), -1)
    '''
    view 사용문법!
    view(-1,3) -> (?,3) 행렬로 바꿔달라.
    size(0) -> 행
    -> 행 갯수를 그대로 유지하면서 flatten 시키는 건가?
    '''
    x = self.fc(x)
    return x

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity='relu')
        #### init.kaiming_normal_  = he_normalization
        if m.bias is not None:
          nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        nn.init.constant_(m.bias, 0)
    # 그러니까 resnet이 weight 처음에 줄 때 카이밍 히 방법을 쓰는 건 알겠는데,,문법이ㅠㅎ (tensorflow에서 'he_normalize' 같은 거인듯) 

def resnet50():
  return ResNet(BottleNeck, [3,4,6,3])

def resnet101():
  return ResNet(BottleNeck, [3,4,23,3])

def resnet152():
  return ResNet(BottleNeck, [3,8,36,3])

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet101().to(device)
x = torch.randn(3,3,224,224).to(device)
output = model(x)
print(output.size())

torch.Size([3, 4])


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [None]:
!nvidia-smi

Fri Sep 24 05:42:37 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    31W /  70W |   1840MiB / 15109MiB |     23%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
summary(model, (3,224,224), device = device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [None]:
# 손실함수, optimizer, lr_scheduler 정의
loss_func = nn.CrossEntropyLoss(reduction = 'sum')
opt = optim.Adam(model.parameters(), lr=0.001)

from torch.optim.lr_scheduler import ReduceLROnPlateau
lr_scheduler = ReduceLROnPlateau(opt, mode='min', factor=0.1, patience=10)

# 현재 lr을 계산하는 함수
def get_lr(opt):
  for param_group in opt.param_groups:
    return param_group['lr']

# 배치당 loss와 metric을 계산하는 함수
def metric_per_batch(output, target):
  pred = output.argmax(1, keepdim=True)
  corrects = pred.eq(target.view_as(pred)).sum().item()
  return corrects

def loss_per_batch(loss_func, output, target, opt=None):
  loss = loss_func(output, target)
  metric_b = metric_per_batch(output, target)

  if opt is not None:
    opt.zero_grad()
    loss.backward()
    opt.step()

  return loss, metric_b


# 에폭당 loss를 정의하는 함수
def loss_per_epoch(model, loss_func, dataset_dl, sanity_check=False, opt=None):
  running_loss = 0.0
  running_metric = 0.0
  len_data = len(dataset_dl.dataset)
  # dataloader -> next(iter(dataloader))

  for xb, yb in dataset_dl:
    # xb = xb.to(device)
    # yb = yb.to(device)
    # output = model(xb)
    print('output')

    loss_b, metric_b = loss_per_batch(loss_func, output, yb, opt)

    running_loss += loss_b

    if metric_b is not None:
      running_metric += metric_b

    if sanity_check is True:
      break

  loss = running_loss / len_data
  metric = running_metric / len_data
  print('loss, metric')

  return loss, metric

In [None]:
len(train_dl)

2492

In [None]:
def last_per_batch(model, loss_func, dataset_dl, opt=None):
  running_loss = 0.0
  running_metric = 0.0
  len_data = len(dataset_dl.dataset)

  for batch_idx, (data, target) in enumerate(dataset_dl):
    data = data.cuda()
    target = target.cuda()
    output = model(data)

    loss_b, metric_b = loss_per_batch(loss_func, output, target, opt)

    running_loss += loss_b

    if (batch_idx+1) % 200 :
      loss_batch = running_loss / len_data
      print('loss in 200 batches turn: ', loss_batch)
      print(loss_b)

    if metric_b is not None:
      running_metric += metric_b



  loss = running_loss / len_data
  metric = running_metric / len_data
  print('metric', metric)
  print('epoch loss', loss)

  return loss, metric

In [None]:
#!pi

In [None]:
def train_func(model, params):

  num_epochs = params['num_epochs']
  loss_func = params['loss_func']
  opt = params['optimizer']
  train_dl = params['train_dl']
  val_dl = params['val_dl']
  sanity_check = params['sanity_check']
  lr_scheduler = params['lr_scheduler']
  path2weights = params['path2weights']

  loss_history = { 'train': [], 'val': [] }
  metric_history = { 'train': [], 'val': [] } 

  start_time = time.time()

  for epoch in range(num_epochs):
    current_lr = get_lr(opt)
    print('Epoch {}/{}, current lr = {}'.format(epoch, num_epochs-1, current_lr))

    model.train()
    print('model trained')

    train_loss, train_metric = last_per_batch(model, loss_func, train_dl, opt)
    loss_history['train'].append(train_loss)

    # import json
    # if len(loss_history["train"]) > 100000
    # with open("file_name", "w") as file:
    #   json.dump(file, loss_history["train"])
    # loss_historty["train"].clear()

    if len(loss_history['train']) > 100000:
      with open("file_name", "w") as file:
      # the best way to open a file as read/write if it exists, or if it does not, then create it and open it as read/write
        json.dump(file, loss_history['train'])
        print('Train Loss : {}, Train Metric: {}'.format(train_loss, train_metric))
      loss_history['train'].clear


    # print('Train Loss : {}, Train Metric: {}'.format(train_loss, train_metric))


  return loss_history, train_loss, train_metric

In [None]:
params_train = {
    'num_epochs': 30,
    'optimizer' : opt,
    'loss_func' : loss_func,
    'train_dl' : train_dl,
    'val_dl' : val_dl,
    'sanity_check' : False,
    'lr_scheduler' : lr_scheduler,
    'path2weights' : './models/weights.pt',
}

In [None]:
train_func(model, params_train)

Epoch 0/29, current lr = 0.001
model trained
loss in 200 batches turn:  tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(27.3697, device='cuda:0', grad_fn=<NllLossBackward>)
loss in 200 batches turn:  tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
tensor(30.9919, device='cuda:0', grad_fn=<NllLossBackward>)
loss in 200 batches turn:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
tensor(39.9773, device='cuda:0', grad_fn=<NllLossBackward>)
loss in 200 batches turn:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
tensor(35.6813, device='cuda:0', grad_fn=<NllLossBackward>)
loss in 200 batches turn:  tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)
tensor(38.2967, device='cuda:0', grad_fn=<NllLossBackward>)
loss in 200 batches turn:  tensor(0.0050, device='cuda:0', grad_fn=<DivBackward0>)
tensor(26.2492, device='cuda:0', grad_fn=<NllLossBackward>)
loss in 200 batches turn:  tensor(0.0054, device='cuda:0', grad_fn=<DivBackward0>)
tensor(15.3509

KeyboardInterrupt: ignored

- reference : https://tutorials.pytorch.kr/beginner/former_torchies/nnft_tutorial.html

In [None]:
for batch_idx, (data, target) in enumerate(train_dl):
  print(batch_idx)
  print(data)
  print(target)
  break

0
tensor([[[[-0.9838, -0.9782, -0.9843,  ..., -0.9059, -0.9059, -0.9059],
          [-0.9739, -0.9826, -0.9903,  ..., -0.9059, -0.9059, -0.9059],
          [-0.9663, -0.9863, -0.9944,  ..., -0.9059, -0.9059, -0.9059],
          ...,
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]],

         [[-0.9759, -0.9720, -0.9843,  ..., -0.9451, -0.9451, -0.9451],
          [-0.9660, -0.9764, -0.9903,  ..., -0.9451, -0.9451, -0.9451],
          [-0.9585, -0.9801, -0.9944,  ..., -0.9451, -0.9451, -0.9451],
          ...,
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000],
          [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -1.0000]],

         [[-1.0000, -1.0000, -1.0000,  ..., -0.9686, -0.9686, -0.9686],
          [-0.9966, -1.0000,

In [None]:
def train_val(model, params):
  # wandb.init(project='test', entity='jeeeeeeeee') # 추가된 코드 1
  # wandb.watch(ResNet, log='all', log_freq=10) # 추가된 코드 2


  num_epochs = params['num_epochs']
  loss_func = params['loss_func']
  opt = params['optimizer']
  train_dl = params['train_dl']
  val_dl = params['val_dl']
  sanity_check = params['sanity_check']
  lr_scheduler = params['lr_scheduler']
  path2weights = params['path2weights']

  loss_history = { 'train': [], 'val': [] }
  metric_history = { 'train': [], 'val': [] } 


  # GPU out of memory error
  # best_model_wts = oopy.deepoopy(model.state_dict())

  # best_loss = float('inf')

  start_time = time.time()

  for epoch in range(num_epochs): # epoch 안에 
    current_lr = get_lr(opt)
    print('Epoch {}/{}, current lr = {}'.format(epoch, num_epochs-1, current_lr))

    model.train()
    
    print('model training finished') ############################################ print
    
    # loss_arr += [loss.item()]

    # print("Batch Loss : %.4f" % np.mean(loss_arr))

    # wandb.log({'Epoch': epoch, 'loss': np.mean(loss_arr)}) # 추가된 코드 3

    # train_loss, train_metric = loss_per_epoch(model, loss_func, train_dl, sanity_check, opt)
    # train_loss = loss_per_batch(loss_func, train_dl, opt)
    # print('loss_per_epoch finished') ########################################## print
    # loss_history['train'].append(train_loss)
    # metric_history['train'].append(train_metric)

    # model.eval()
    # print('model.eval() finished') ############################################### print

    # with torch.no_grad():
    #   val_loss, val_metric = loss_per_epoch(model, loss_func, val_dl, sanity_check)
    # loss_history['val'].append(val_loss)
    # metric_history['val'].append(val_metric)

    # if val_loss < best_loss:
    #   best_loss = val_loss
    #   # best_model_wts = copy.deepcopy(model.state_dict())

    #   # torch.save(model.state_dict(), path2weights)
    #   # print('Copied best model weights!')
    #   print('Got best val_loss')

    # lr_scheduler.step(val_loss)
    # # wandb.log({'Epoch': epoch, 'loss': np.mean(loss_arr)}) # 추가된 코드 3
    # print('train loss: %.6f, val loss: %.6f, accuracy: %.2f, time: %.4f min' %(train_loss, val_loss, 100*val_metric, (time.time()-start_time)/60))
    # print('-'*10)

  # return model, loss_history, metric_history
    return model, loss_history

In [None]:
model, loss_hist, metric_hist = train_val(model, params_train)

Epoch 0/1, current lr = 0.001
model training finished


NameError: ignored

In [None]:
# 하이퍼 파라미터 정의

params_train = {
    'num_epochs': 2,
    'optimizer' : opt,
    'loss_func' : loss_func,
    'train_dl' : train_dl,
    'val_dl' : val_dl,
    'sanity_check' : False,
    'lr_scheduler' : lr_scheduler,
    'path2weights' : './models/weights.pt',
}

def createFolder(directory):
  try:
    if not os.path.exists(directory):
      os.makedirs(directory)
  except OSerror:
    print('Error')

createFolder('./models')

In [None]:
model, loss_hist, metric_hist = train_val(model, params_train)

Epoch 0/1, current lr = 0.001
model training finished


KeyboardInterrupt: ignored

# 데이터 넣으면 왜 안될까

1. dataset 불러오고 이거 training 시키는 건 ㄹㅇ 10초..
2. dataset 불러오고 세가지 정도의 방법으로 train test split 해봤는데 자꾸 에러뜸 -> 해결!
- 자꾸 사이즈가 안맞대요!! Resize가 일을 안하는 것 같은데 왜 안하는지 모르겠어요 ㅠㅠㅠ
3. 그냥 loss만 찍으면 무한루프 돌기 시작함
4. STL 은 도대체 왜 되는 거임?ㅎ