In [4]:
# Colab Setting 
# Google Drive Access Authorization  
from google.colab import drive
drive.mount('/content/gdrive/')

ValueError: ignored

In [None]:
# Path Setting
path = "/content/gdrive/My Drive/cifar-10/"

In [None]:
import torch
import torch.nn.functional as F
from torchvision import datasets,transforms
import torch.nn as nn

In [None]:
# SEED SETTING 
import random
import os 
import numpy as np

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 0
seed_everything(SEED)

In [None]:
import pandas as pd 
from torch.utils.data import Dataset as BaseDataset
from PIL import Image

class CIFARDataset(BaseDataset):
    def __init__(self, path, transform = False, tr_index = None):
        """
        train_files : train file list 
        is_test_or_not : test or not 
        is_transform : True augmentation 
        """
        self.path = path
        if tr_index is None
            self.trainLabels = pd.read_csv("/content/gdrive/My Drive/cifar-10/" + 'trainLabels.csv')
        else:
            self.trainLabels = pd.read_csv("/content/gdrive/My Drive/cifar-10/" + 'trainLabels.csv').iloc[tr_index]

        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # 이미지가 있는 파일의 경로를 설정하고 불러올 이미지의 이름을 저장 (id가 1번인 이미지의 파일명은 1.jpg)
        img_name = os.path.join(self.path, str(self.trainLabels.iloc[idx, 0]))
        # 이미지를 열어서 
        image = Image.open(img_name + '.png')
        if transform:
            # albumentations : PyTorch augmentation 도와주는 도구 
            albumentations_transforms = albumentations.Compose([
                albumentations.Resize(32, 32),
                albumentations.RandomCrop(224, 224),
                albumentations.HorizontalFlip(), # Same with transforms.RandomHorizontalFlip()
                transforms.ToTensor()
            ])
            image = albumentations_transforms(image)

        labels = self.trainLabels.iloc[idx, 1]
        
        return image, labels

In [None]:
class DeepCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels = 3,
                                        out_channels = 64,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU())
        
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels = 64,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer3 = nn.Sequential(nn.Conv2d(in_channels = 128,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU())

        self.layer4 = nn.Sequential(nn.Conv2d(in_channels = 128,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size = 2, stride = 2), 
                            )   
        
        # 32 -> max pooling 2 times -> 8 
        self.layer5 = nn.Flatten()
             
        self.layer6 = nn.Sequential(nn.Linear(8 * 8 * 128, 256),
                            nn.ReLU(),
                            nn.Dropout(0.5))
                  
        self.layer7 = nn.Sequential(nn.Linear(256, 256),
                            nn.ReLU()
                            )
              
        self.fc = nn.Linear(256, 10)
    
    def forward(self, inputs):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.fc(x)
        return x 

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = DeepCNN().to(device)
model

In [None]:
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

In [None]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold 
from tqdm.notebook import tqdm as tqdm_notebook

kf = KFold(5, shuffle=True, random_state=0)
nb_epochs = 10

for i, (tr_idx, val_idx) in enumerate(kf):
    train_losses = []
    valid_losses = []
    train_loader = DataLoader(CIFARDataset(path + 'train', transform = True, tr_idx), batch_size=64, shuffle=True, num_workers=0)
    valid_loader = DataLoader(CIFARDataset(path + 'train', transform = False, val_idx), batch_size=64, shuffle=False, num_workers=0)

    for epoch in tqdm_notebook(range(0, nb_epochs)):
        # train 학습 
        train_loss = 0
        model.train()
        for idx, train_batch, label in enumerate(train_loader):
            train_batch, label = train_batch.to(dev), label.to(dev)
            optimizer.zero_grad()
            
            prediction = model(train_batch)
            loss = criterion(prediction, label)    
            loss.backward()
            train_loss += loss.item() / (idx+1)
            optimizer.step()

        # valid 검증 
        valid_loss = 0
        model.eval()
        with torch.no_grad():
        for idx, valid_batch, label in enumerate(valid_loader):
            valid_batch, label = valid_batch.to(dev), label.to(dev)
            optimizer.zero_grad()
            
            prediction = model(valid_batch)
            loss = criterion(prediction, label)    
            valid_loss += loss.item() / (idx+1)

        
        if epoch % 1 == 0: 
            print('Epoch {:4d}/{} Train Loss: {:.6f} Test Loss: {:.6f}'.format(epoch+1, nb_epochs, train_loss, valid_loss))

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        # 모델의 스코어가 가장 높은 모델을 저장 
        # 단, 원래는 Validation set으로 진행해야 하지만 분석의 편의상 Train으로 진행 
        if (best_valid == None or best_valid < valid_loss):
                best_valid = valid_loss
                torch.save(model.state_dict(), path + '/savedmodel/{}_cifar10-cnn_v2.pth'.format(i+1))

In [None]:
test_loader = DataLoader(CIFARDataset(path + 'test', transform = False), batch_size=1, shuffle=False, num_workers=0)

In [None]:
torch_kfold_model ={1:'1_cifar10-cnn_v2.pth',
                    2:'2_cifar10-cnn_v2.pth',
                    3:'3_cifar10-cnn_v2.pth',
                    4:'4_cifar10-cnn_v2.pth',
                    5:'5_cifar10-cnn_v2.pth'}
preds_all = []
for i in range(1,6):
    print(f"{i} FOLD Predict")
    model_name = torch_kfold_model[i]
    best_model = torch.load(f'savedmodel/{model_name}')
    best_model.eval()
    results = []
    for batch_idx, (feature, target) in tqdm(enumerate(test_dataloader)):
        # 32*32 : image size 
        results.append(best_model.predict(feature.cuda()).view(-1,32*32).cpu().numpy())
    preds = np.concatenate(results,axis=0)
    preds_all.append(preds.copy())