In [1]:
# Colab Setting 
# Google Drive Access Authorization  
from google.colab import drive
drive.mount('/content/gdrive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive/


In [2]:
# Path Setting
path = "/content/gdrive/My Drive/cifar-10/"

In [3]:
import torch
import torch.nn.functional as F
from torchvision import datasets,transforms
import torch.nn as nn

In [4]:
# If you haven't unpacked the zip file, run the following code.
# But, it takes a long time. so i recommend to unpack the zip file in the local system. 
'''
! pip install py7zr

import py7zr
with py7zr.SevenZipFile(path + 'train.7z', mode='r') as z:
    z.extractall(path)

with py7zr.SevenZipFile(path + 'test.7z', mode='r') as z:
    z.extractall(path)
'''

"\n! pip install py7zr\n\nimport py7zr\nwith py7zr.SevenZipFile(path + 'train.7z', mode='r') as z:\n    z.extractall(path)\n\nwith py7zr.SevenZipFile(path + 'test.7z', mode='r') as z:\n    z.extractall(path)\n"

In [5]:
# SEED SETTING 
import random
import os 
import numpy as np

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 0
seed_everything(SEED)

In [31]:
import pandas as pd 
from torch.utils.data import Dataset as BaseDataset
from PIL import Image

class CIFARDataset(BaseDataset):
    def __init__(self, path, transform = False):
        """
        train_files : train file list 
        is_test_or_not : test or not 
        is_transform : True augmentation 
        """
        self.path = path 
        self.labels = pd.read_csv("/content/gdrive/My Drive/cifar-10/" + 'trainLabels.csv')
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # 이미지가 있는 파일의 경로를 설정하고 불러올 이미지의 이름을 저장 (id가 1번인 이미지의 파일명은 1.jpg)
        img_name = os.path.join(self.path, str(self.labels.iloc[idx, 0]))
        # 이미지를 열어서 
        image = Image.open(img_name + '.png')
        if transform:
            # albumentations : PyTorch augmentation 도와주는 도구 
            albumentations_transforms = albumentations.Compose([
                albumentations.Resize(32, 32),
                albumentations.RandomCrop(224, 224),
                albumentations.HorizontalFlip(), # Same with transforms.RandomHorizontalFlip()
                transforms.ToTensor()
            ])
            image = albumentations_transforms(image)

        labels = self.labels.iloc[idx, 1]
        
        return image, labels

In [32]:
from torch.utils.data import Dataset, DataLoader

train_loader = DataLoader(CIFARDataset(path + 'train', transform = True), batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(CIFARDataset(path + 'test', transform = False), batch_size=64, shuffle=False, num_workers=0)

In [33]:
class DeepCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels = 3,
                                        out_channels = 64,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU())
        
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels = 64,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer3 = nn.Sequential(nn.Conv2d(in_channels = 128,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU())

        self.layer4 = nn.Sequential(nn.Conv2d(in_channels = 128,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size = 2, stride = 2), 
                            )   
        
        # 32 -> max pooling 2 times -> 8 
        self.layer5 = nn.Flatten()
             
        self.layer6 = nn.Sequential(nn.Linear(8 * 8 * 128, 256),
                            nn.ReLU(),
                            nn.Dropout(0.5))
                  
        self.layer7 = nn.Sequential(nn.Linear(256, 256),
                            nn.ReLU()
                            )
              
        self.fc = nn.Linear(256, 10)
    
    def forward(self, inputs):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.fc(x)
        return x 

In [34]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = DeepCNN().to(device)
model

DeepCNN(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer5): Flatten()
  (layer6): Sequential(
    (0): Linear(in_features=8192, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (layer7): Sequential(
    (0): Linear(in_features=256, out_features=256, bias=True)
    (1): ReLU()
  )
  (fc): Linear(in_features=256, out_features=10, bias=True)
)

In [35]:
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

In [36]:
# We iter the batch of images to display
dataiter = iter(train_loader) # converting our train_dataloader to iterable so that we can iter through it. 
images, labels = dataiter.next() #going from 1st batch of 100 images to the next batch
fig = plt.figure(figsize=(16, 16)) 

# We plot 16 images from our train_dataset
for idx in np.arange(16):
    ax = fig.add_subplot(4, 4, idx+1, xticks=[], yticks=[]) 
    plt.imshow(im_convert(images[idx])) #converting to numpy array as plt needs it.
    ax.set_title(classes[labels[idx].item()])

FileNotFoundError: ignored

In [19]:
def train():
    model.train()
    train_loss = 0
    for idx, train_batch_x, train_batch_y in enumerate(train_loader):
        train_batch = train_batch.to(dev)
        optimizer.zero_grad()
        
        prediction = model(train_batch)
        loss = criterion(prediction, train_batch)    
        loss.backward()
        train_loss += loss.item() 
        optimizer.step()
    
    return train_loss / (idx+1)

In [22]:
from tqdm.notebook import tqdm as tqdm_notebook

nb_epochs = 10
train_losses = []
# test_losses = []

for epoch in tqdm_notebook(range(0, nb_epochs)):
    train_loss = 0
    model.train()
    for idx, train_batch in enumerate(train_loader):
        train_batch = train_batch.to(dev)
        optimizer.zero_grad()
        
        prediction = model(train_batch)
        loss = criterion(prediction, train_batch)    
        loss.backward()
        train_loss += loss.item() 
        optimizer.step()

    if epoch % 1 == 0: 
        print('Epoch {:4d}/{} Train Loss: {:.6f} Test Loss: {:.6f}'.format(epoch+1, nb_epochs, train_loss, test_loss))
        train_losses.append(train_loss)

    # 모델의 스코어가 가장 높은 모델을 저장 
    # 단, 원래는 Validation set으로 진행해야 하지만 분석의 편의상 Train으로 진행 
    if (best_valid == None or best_valid < train_loss):
            best_valid = train_loss
            torch.save(model.state_dict(), path + 'cifar10-cnn.pth')

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7fdfe0c52af0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tqdm/notebook.py", line 217, in __iter__
    yield obj
KeyboardInterrupt: 


OSError: ignored

In [None]:
final_model = DeepCNN()
final_model.load_state_dict(torch.load(path + 'cifar10-cnn.pth'))
final_model.to(device)

In [None]:
result = evaluate(final_model, test_loader)
result