In [1]:
import sys
sys.path.append('../modules')

from model import Resnet, Bottleneck
from utils import TinyImageNetDataset

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import numpy as np
import random

In [2]:
# Tiny Imagenet

transform = transforms.Compose([transforms.ToTensor()])
trainset = TinyImageNetDataset('./data/tiny_imagenet/tiny-imagenet-200', mode = 'train', transform=transform)

train_mean = torch.stack([data for data, label in trainset]).mean(dim=(0,2,3))
train_std = torch.stack([data for data, label in trainset]).std(dim=(0,2,3))

Preloading train data...:   0%|          | 0/100000 [00:00<?, ?it/s]

In [3]:
transform_train = transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip() ,transforms.Normalize(train_mean, train_std)])
trainset = TinyImageNetDataset('./data/tiny_imagenet/tiny-imagenet-200', mode = 'train', transform=transform_train)

Preloading train data...:   0%|          | 0/100000 [00:00<?, ?it/s]

In [33]:
# test_set_idx = []
# train_set_idx = []

# for key in label_idx_list.keys():
#     samples = random.sample(label_idx_list[key], 50)
#     test_set_idx += samples
#     train_set_idx += list(set(label_idx_list[key]) - set(samples))

# print(f'# Train Set : {len(train_set_idx)}\n# Test Set : {len(test_set_idx)}')

# np.save('tiny_imagenet_train_idx',train_set_idx)
# np.save('tiny_imagenet_test_idx', test_set_idx)

In [4]:
#param

#batch_size = 128
batch_size = 100
num_epoch = 200

In [5]:
train_idx = np.load('./tiny_imagenet_train_idx.npy')
train_random_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
train_dataloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = False,  sampler = train_random_sampler)

#train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

In [6]:
device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")

net = Resnet(Bottleneck, [3, 4, 6, 3], num_classes=200).to(device)
net = torch.nn.DataParallel(net, device_ids=[4,6,7]).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)

In [7]:
model_save_path = './model/tiny_imagenet'
loss_history = []

net.train()

DataParallel(
  (module): Resnet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shortcut): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_st

In [None]:
for epoch in range(num_epoch):
    epoch_loss = 0

    for batch_id, (train_x, train_y) in enumerate(tqdm(train_dataloader)):  
        optimizer.zero_grad()
        preds = net(train_x.to(device))

        loss = criterion(preds, train_y.to(device))
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    loss_history.append(epoch_loss)

    print(f'epoch : {epoch}, total train loss : {epoch_loss}') 

    with open(f'{model_save_path}/model_epoch_{epoch}.pt','wb') as f:
        torch.save({
            'state' : net.state_dict(),
            'optimizer' : optimizer.state_dict(),
            'epoch' : epoch,
            'loss_history' : loss_history,
        },f)

100%|██████████| 900/900 [04:10<00:00,  3.60it/s]


epoch : 0, total train loss : 4436.966950893402


100%|██████████| 900/900 [04:01<00:00,  3.73it/s]


epoch : 1, total train loss : 3842.150247335434


100%|██████████| 900/900 [04:20<00:00,  3.45it/s]


epoch : 2, total train loss : 3453.583591222763


100%|██████████| 900/900 [04:14<00:00,  3.53it/s]


epoch : 3, total train loss : 3164.1179797649384


100%|██████████| 900/900 [04:10<00:00,  3.59it/s]


epoch : 4, total train loss : 2936.4319825172424


100%|██████████| 900/900 [04:11<00:00,  3.57it/s]


epoch : 5, total train loss : 2748.3675723075867


100%|██████████| 900/900 [04:12<00:00,  3.56it/s]


epoch : 6, total train loss : 2577.4986493587494


100%|██████████| 900/900 [04:13<00:00,  3.56it/s]


epoch : 7, total train loss : 2421.428902864456


100%|██████████| 900/900 [04:12<00:00,  3.56it/s]


epoch : 8, total train loss : 2285.3879841566086


100%|██████████| 900/900 [04:10<00:00,  3.60it/s]


epoch : 9, total train loss : 2158.7090443372726


100%|██████████| 900/900 [04:11<00:00,  3.58it/s]


epoch : 10, total train loss : 2043.302451133728


100%|██████████| 900/900 [04:14<00:00,  3.54it/s]


epoch : 11, total train loss : 1924.014215350151


100%|██████████| 900/900 [04:10<00:00,  3.60it/s]


epoch : 12, total train loss : 1822.1546511650085


100%|██████████| 900/900 [04:14<00:00,  3.53it/s]


epoch : 13, total train loss : 1712.1200459003448


100%|██████████| 900/900 [04:12<00:00,  3.57it/s]


epoch : 14, total train loss : 1610.7243068218231


100%|██████████| 900/900 [04:09<00:00,  3.61it/s]


epoch : 15, total train loss : 1511.7720203399658


100%|██████████| 900/900 [04:10<00:00,  3.59it/s]


epoch : 16, total train loss : 1415.0721681118011


100%|██████████| 900/900 [04:11<00:00,  3.58it/s]


epoch : 17, total train loss : 1317.4662239551544


100%|██████████| 900/900 [04:13<00:00,  3.55it/s]


epoch : 18, total train loss : 1219.5159745812416


100%|██████████| 900/900 [04:09<00:00,  3.61it/s]


epoch : 19, total train loss : 1129.8396399021149


100%|██████████| 900/900 [04:12<00:00,  3.57it/s]


epoch : 20, total train loss : 1033.8261704444885


100%|██████████| 900/900 [04:10<00:00,  3.60it/s]


epoch : 21, total train loss : 941.727307498455


100%|██████████| 900/900 [04:11<00:00,  3.58it/s]


epoch : 22, total train loss : 855.0724087357521


100%|██████████| 900/900 [04:12<00:00,  3.57it/s]


epoch : 23, total train loss : 766.5878249406815


100%|██████████| 900/900 [04:11<00:00,  3.58it/s]


epoch : 24, total train loss : 688.9983425438404


100%|██████████| 900/900 [04:11<00:00,  3.58it/s]


epoch : 25, total train loss : 617.822217643261


 16%|█▌        | 143/900 [00:40<03:22,  3.73it/s]

In [4]:
train_mean

tensor([0.3975, 0.4481, 0.4802])

In [5]:
train_std

tensor([0.2816, 0.2689, 0.2764])