https://github.com/dansuh17/alexnet-pytorch/blob/d0c1b1c52296ffcbecfbf5b17e1d1685b4ca6744/model.py#L40

In [1]:
!pip install tensorboardx

Collecting tensorboardx
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboardx
Successfully installed tensorboardx-2.6.2.2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 패키지 설치

import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter
# define pytorch device - useful for device-agnostic execution
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# define model parameters
NUM_EPOCHS = 90
BATCH_SIZE = 128
MOMENTUM = 0.9
LR_DECAY = 0.0005
LR_INIT = 0.01
IMAGE_DIM = 227
NUM_CLASSES = 1000
DEVICE_IDS = [0,1,2,3] # GPUs to use
# modify this to point to your data directory
INPUT_ROOT_DIR = 'alexnet_data_in'
TRAIN_IMG_DIR = 'alexnet_data_in/imagenet'
OUTPUT_DIR = 'alexnet_data_out'
LOG_DIR = OUTPUT_DIR + '/tblogs' # tensorboard logs
CHECKPOINT_DIR = OUTPUT_DIR + '/models' # model checkpoints
PATH = './drive/MyDrive/Colab Notebooks'
os.chdir(PATH)
os.makedirs(CHECKPOINT_DIR, exist_ok = True) # alexnet_data_out/models 파일을 만듬, exist_ok = True는 이미 폴더가 만들어져있어도 오류 X


In [None]:
os.getcwd()

'/content/drive/MyDrive/Colab Notebooks'

In [None]:
class AlexNet(nn.Module):

  def __init__(self, num_classes=1000): # 1000개의 클래스를 예측
    super().__init__()
    # ouput_dim(i,j) = np.floor((input_dim(i,j) - kernel_size + 2 x padding_size ) / stride) + 1
    self.net = nn.Sequential( # (batch_size x 3 x 227 x 227)
        nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), # (batch_size x 96 x 55 x 55)
        nn.Relu(), # max(0,x)
        nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2), # ouput feature map에 있는 픽셀 A(x,y)의 값을 인접한 픽셀들의 값을 합한 값으로 나눠줌
        nn.MaxPool2d(kernel_size=3, stride=2), # (b x 96 x 27 x 27), 커널안에 있는 최대값만 뽑아냄
        nn.Conv2d(96,256,5,padding=2),  # (b x 256 x 27 x 27), kernel_size = 2 x padding_size + 1 이고 stride = 1 이라 same padding임
        nn.Relu(),
        nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
        nn.MaxPool2d(kernel_size=3, stride=2), # (b x 256 x 13 x 13)
        nn.Conv2d(256, 384, 3, padding=1), # (b x 384 x 13 x 13), same padding
        nn.Relu(),
        nn.Conv2d(384, 384, 3, padding=1), # (b x 384 x 13 x 13), same padding
        nn.Relu(),
        nn.Conv2d(384, 256, 3, padding=1), # (b x 256 x 13 x 13), same padding
        nn.Relu(),
        nn.MaxPool2d(kernel_size=3, stride=2),  # (b x 256 x 6 x 6)
    )
    self.classifie = nn.Sequential(
        nn.Dropout(p=0.5, inplace=True), # inplace란 원본데이터에도 Dropout을 반영하는지 여부
        nn.Linear(in_features=(256 * 6 * 6), out_features=4096), # (b x 4096), 여기선 keras와 달리 flatten을 어떻게 해주는지 모르겠음)
        nn.Relu(),
        nn.Dropout(p=0.5, inplace=True),
        nn.Linear(in_features=4096, out_features=4096),
        nn.Relu(),
        nn.Linear(in_features=4096, out_features=num_classes),
    )
    self.init_bias()

  def init_bias(self):
    for layer in self.net:
      if isinstance(layer, nn.Conv2d): # isinstance(인스턴스, 데이터나 클래스 타입)
        nn.init.normal_(layer.weight, mean=0, std=0.01)
        nn.init.constant_(layer.bias, 0) # constant_(텐서, float), float으로 텐서를 채움
  # original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers

    nn.init.constant_(self.net[4].bias, 1)
    nn.init.constant_(self.net[10].bias, 1)
    nn.init.constant_(self.net[12].bias, 1)

  def forward(self, x):
    # 정방향패스

    x = self.net(x)
    x = x.view(-1, 256*6*6)
    return self.classifer(x)


if __name__ == '__main__':
  seed = torch.inital_seed()
  print('Used seed : {}'.format(seed))

  tbwriter = SummaryWriter(log_dir=LOG_DIR)
  print('TensorboardX summary writer created')

  alexnet = AlexNet(num_classes=NUM_CLASSES).to(device) # 모델을 device(cuda)에 넣음
 # train on multiple GPUs
  alexnet = torch.nn.parallel.DataParallel(alexnet, device_ids =DEVICE_IDS)

  print(alexnet)
  print('Alexnet created')

# create dataset and data loader
  dataset = datasets.ImageFolder(TRAIN_IMG_DIR, transforms.Compose([
       # transforms.RandomResizedCrop(IMAGE_DIM, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
       transforms.CenterCrop(IMAGE_DIM),
       # transforms.RandomHorizontalFlip(),
       transforms.ToTensor(),
       transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
  ]))
  print('Dataset created')
  dataloader = data.DataLoader(
      dataset,
      shuffle=True,
      pin_memory=True,
      num_workers=8,
      drop_last=True,
      batch_size=BATCH_SIZE)
  print('Dataloader created')

  # create optimizer
  # the one that WORKS

  optimizer = optim.Adam(params=alexnet.parameters(), lr=0.0001)
  ### BELOW is the setting proposed by the original paper - which doesn't train....
    # optimizer = optim.SGD(
    #     params=alexnet.parameters(),
    #     lr=LR_INIT,
    #     momentum=MOMENTUM,
    #     weight_decay=LR_DECAY)
  print('Optimizer created')
  # multiply LR by 1 / 10 after every 30 epochs

  lr_scheduler = optim.lr_scheduler.StepLR(optimizier, step_size=30, gamma=0.1)






NameError: name 'x' is not defined