In [1]:
'''
# Colab Setting 
# Google Drive Access Authorization  
from google.colab import drive
drive.mount('/content/gdrive/')

# Path Setting
path = "/content/gdrive/My Drive/cifar-10/"
'''

'\n# Colab Setting \n# Google Drive Access Authorization  \nfrom google.colab import drive\ndrive.mount(\'/content/gdrive/\')\n\n# Path Setting\npath = "/content/gdrive/My Drive/cifar-10/"\n'

In [2]:
filepath = 'D:/Google Drive/cifar-10/'

In [3]:
import torch
import torch.nn.functional as F
from torchvision import datasets,transforms
import torch.nn as nn

In [4]:
# SEED SETTING 
import random
import os 
import numpy as np

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 0
seed_everything(SEED)

In [24]:
import pandas as pd 
from torch.utils.data import Dataset as BaseDataset
from PIL import Image
import albumentations as A
from torch.autograd import Variable
from torch.utils.data import DataLoader


class CIFARDataset(BaseDataset):
    def __init__(self, path, data, transform = False, train = True):
        """
        train_files : train file list 
        is_test_or_not : test or not 
        is_transform : True augmentation 
        """
        self.path = path 
        self.train = train
        self.labels = data                  
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # 이미지가 있는 파일의 경로를 설정하고 불러올 이미지의 이름을 저장 (id가 1번인 이미지의 파일명은 1.jpg)
        img_name = self.path + str(self.labels.iloc[idx, 0])
        # 이미지를 열어서 
        image_file = Image.open(img_name + '.png')

        if self.transform:
            torchvision_transform = transforms.Compose([
                transforms.Resize((32, 32)), 
                # transforms.RandomCrop(224),
                # transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
            ])
            image_file = torchvision_transform(image_file)
        else:
            torchvision_transform = transforms.Compose([
                transforms.Resize((32, 32)), 
                # transforms.RandomCrop(224),
                # transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
            ])
            image_file = torchvision_transform(image_file)
            
        if self.train:
            label = self.labels.iloc[idx, 1]
            return image_file, label
        else:
            return image_file

In [35]:
class DeepCNN(nn.Module):
    def __init__(self):
        super(DeepCNN, self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels = 3,
                                        out_channels = 64,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.BatchNorm2d(64),
                            nn.ReLU())
        
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels = 64,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer3 = nn.Sequential(nn.Conv2d(in_channels = 128,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU())

        self.layer4 = nn.Sequential(nn.Conv2d(in_channels = 128,
                                        out_channels = 128,
                                        kernel_size = 3,
                                        stride = 1,
                                        padding = 1),
                            nn.ReLU(),
                            nn.MaxPool2d(kernel_size = 2, stride = 2), 
                            )   
        
        # 32 -> max pooling 2 times -> 8 
        self.layer5 = nn.Flatten()
    
        self.layer6 = nn.Sequential(nn.Linear(8 * 8 * 128, 256),
                            nn.ReLU(),
                            nn.Dropout(0.5))
                  
        self.layer7 = nn.Sequential(nn.Linear(256, 256),
                            nn.ReLU()
                            )
              
        self.fc = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.fc(x)
        return x

In [36]:
dev = torch.cuda.set_device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [37]:
from torch import optim

criterion = nn.CrossEntropyLoss()

In [38]:
data = pd.read_csv(filepath + 'trainLabels.csv')

# data의 값이 문자열로 되어있기에 수치형으로 변경 
class2idx = {}
for i, j in enumerate(data['label'].unique()):
    class2idx[j] = i

idx2class = {}
for i, j in class2idx.items():
    idx2class[j] = i

data['label'] = data['label'].apply(lambda x: class2idx[x])

In [39]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold 
from tqdm.notebook import tqdm as tqdm_notebook

kf = KFold(5, shuffle=True, random_state=0)
nb_epochs = 10


for i, (tr_idx, val_idx) in enumerate(kf.split(range(0, 50000))):
    train_accuracy = []
    valid_accuracy = []
    best_valid = 0
    
    train_loader = DataLoader(CIFARDataset(filepath + 'train/',data = data.iloc[tr_idx] , transform = True, train = True), batch_size=64, shuffle=True, num_workers=0)
    valid_loader = DataLoader(CIFARDataset(filepath + 'train/',data = data.iloc[val_idx] , transform = False, train = True), batch_size=64, shuffle=False, num_workers=0)
    
    model = DeepCNN()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in tqdm_notebook(range(0, nb_epochs)):
        # train 학습 
        train_loss = 0
        correct_tr = 0
        correct_val = 0
        
        model.train()
        for idx, (train_batch, label) in enumerate(train_loader):
            train_batch, label = train_batch.to(dev), label.to(dev)
            optimizer.zero_grad()
            
            prediction = model(train_batch)
            loss = criterion(prediction, label)    
            loss.backward()
            train_loss += loss.item() / (idx+1)
            optimizer.step()
            _, predicted = torch.max(prediction, 1)
            correct_tr += (predicted == label).sum()
            
        # valid 검증 
        valid_loss = 0
        model.eval()
        with torch.no_grad():
            for idx, (valid_batch, label) in enumerate(valid_loader):
                valid_batch, label = valid_batch.to(dev), label.to(dev)
                optimizer.zero_grad()
                
                prediction = model(valid_batch)
                loss = criterion(prediction, label)    
                valid_loss += loss.item() / (idx+1)
                _, predicted = torch.max(prediction, 1)
                correct_val += (predicted == label).sum()

        
        if epoch % 1 == 0: 
            print('Epoch {:4d}/{} Train Loss: {:.6f} Train Accuracy: {:.6f} Valid Accuracy: {:.6f}'.format(epoch+1, nb_epochs, train_loss, 
                                                                                correct_tr.detach().numpy() / len(train_loader.dataset),
                                                                                correct_val.detach().numpy() / len(valid_loader.dataset)))
        train_accuracy.append(correct_tr.detach().numpy() / len(train_loader.dataset))
        valid_accuracy.append(correct_val.detach().numpy() / len(valid_loader.dataset))
        
        # 모델의 스코어가 가장 높은 모델을 저장 
        # 단, 원래는 Validation set으로 진행해야 하지만 분석의 편의상 Train으로 진행 
        if (best_valid == None or best_valid < correct_val):
                best_valid = correct_val
                torch.save(model, filepath + '/savedmodel/{}_cifar10-cnn_v2.pth'.format(i+1))

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch    1/10 Train Loss: 14.425495 Train Accuracy: 0.420625 Valid Accuracy: 0.591000
Epoch    2/10 Train Loss: 7.934278 Train Accuracy: 0.612075 Valid Accuracy: 0.685800
Epoch    3/10 Train Loss: 6.232967 Train Accuracy: 0.686825 Valid Accuracy: 0.714800
Epoch    4/10 Train Loss: 5.453641 Train Accuracy: 0.735900 Valid Accuracy: 0.725700
Epoch    5/10 Train Loss: 4.680569 Train Accuracy: 0.771950 Valid Accuracy: 0.751000
Epoch    6/10 Train Loss: 3.955085 Train Accuracy: 0.796425 Valid Accuracy: 0.743700
Epoch    7/10 Train Loss: 3.465817 Train Accuracy: 0.818475 Valid Accuracy: 0.753700
Epoch    8/10 Train Loss: 2.924621 Train Accuracy: 0.838600 Valid Accuracy: 0.753700
Epoch    9/10 Train Loss: 2.856680 Train Accuracy: 0.854000 Valid Accuracy: 0.757800
Epoch   10/10 Train Loss: 2.331471 Train Accuracy: 0.868500 Valid Accuracy: 0.765100



HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch    1/10 Train Loss: 14.292286 Train Accuracy: 0.432475 Valid Accuracy: 0.561800
Epoch    2/10 Train Loss: 8.210514 Train Accuracy: 0.621000 Valid Accuracy: 0.681400
Epoch    3/10 Train Loss: 6.293219 Train Accuracy: 0.692500 Valid Accuracy: 0.722200
Epoch    4/10 Train Loss: 5.543118 Train Accuracy: 0.734300 Valid Accuracy: 0.740000
Epoch    5/10 Train Loss: 4.858045 Train Accuracy: 0.772650 Valid Accuracy: 0.739200
Epoch    6/10 Train Loss: 3.929193 Train Accuracy: 0.797025 Valid Accuracy: 0.756500
Epoch    7/10 Train Loss: 3.447669 Train Accuracy: 0.822025 Valid Accuracy: 0.758000
Epoch    8/10 Train Loss: 3.145616 Train Accuracy: 0.838300 Valid Accuracy: 0.767700
Epoch    9/10 Train Loss: 2.701858 Train Accuracy: 0.852225 Valid Accuracy: 0.761600
Epoch   10/10 Train Loss: 2.449179 Train Accuracy: 0.869675 Valid Accuracy: 0.765300



HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch    1/10 Train Loss: 14.689694 Train Accuracy: 0.406175 Valid Accuracy: 0.567300
Epoch    2/10 Train Loss: 8.503443 Train Accuracy: 0.616025 Valid Accuracy: 0.693000
Epoch    3/10 Train Loss: 6.058511 Train Accuracy: 0.701900 Valid Accuracy: 0.729600
Epoch    4/10 Train Loss: 5.206500 Train Accuracy: 0.749125 Valid Accuracy: 0.737400
Epoch    5/10 Train Loss: 4.319948 Train Accuracy: 0.780975 Valid Accuracy: 0.754700
Epoch    6/10 Train Loss: 3.809959 Train Accuracy: 0.811450 Valid Accuracy: 0.769200
Epoch    7/10 Train Loss: 3.181496 Train Accuracy: 0.832825 Valid Accuracy: 0.775400
Epoch    8/10 Train Loss: 2.744412 Train Accuracy: 0.856425 Valid Accuracy: 0.768800
Epoch    9/10 Train Loss: 2.391010 Train Accuracy: 0.867625 Valid Accuracy: 0.758800
Epoch   10/10 Train Loss: 2.143937 Train Accuracy: 0.883825 Valid Accuracy: 0.765300



HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch    1/10 Train Loss: 14.204767 Train Accuracy: 0.438325 Valid Accuracy: 0.608700
Epoch    2/10 Train Loss: 7.966471 Train Accuracy: 0.629675 Valid Accuracy: 0.665600
Epoch    3/10 Train Loss: 5.974961 Train Accuracy: 0.701425 Valid Accuracy: 0.726000
Epoch    4/10 Train Loss: 4.972634 Train Accuracy: 0.745050 Valid Accuracy: 0.731400
Epoch    5/10 Train Loss: 4.498719 Train Accuracy: 0.777475 Valid Accuracy: 0.755100
Epoch    6/10 Train Loss: 3.953755 Train Accuracy: 0.805875 Valid Accuracy: 0.760300
Epoch    7/10 Train Loss: 3.283971 Train Accuracy: 0.831275 Valid Accuracy: 0.765900
Epoch    8/10 Train Loss: 2.879429 Train Accuracy: 0.848700 Valid Accuracy: 0.766100
Epoch    9/10 Train Loss: 2.501077 Train Accuracy: 0.866350 Valid Accuracy: 0.759000
Epoch   10/10 Train Loss: 2.314623 Train Accuracy: 0.883475 Valid Accuracy: 0.758900



HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch    1/10 Train Loss: 14.314605 Train Accuracy: 0.432150 Valid Accuracy: 0.575700
Epoch    2/10 Train Loss: 7.728228 Train Accuracy: 0.627250 Valid Accuracy: 0.687500
Epoch    3/10 Train Loss: 6.105800 Train Accuracy: 0.694700 Valid Accuracy: 0.697100
Epoch    4/10 Train Loss: 5.482239 Train Accuracy: 0.736300 Valid Accuracy: 0.739200
Epoch    5/10 Train Loss: 4.814955 Train Accuracy: 0.771400 Valid Accuracy: 0.759300
Epoch    6/10 Train Loss: 3.964044 Train Accuracy: 0.795750 Valid Accuracy: 0.733300
Epoch    7/10 Train Loss: 3.715030 Train Accuracy: 0.820250 Valid Accuracy: 0.764400
Epoch    8/10 Train Loss: 3.253132 Train Accuracy: 0.835250 Valid Accuracy: 0.766900
Epoch    9/10 Train Loss: 2.747804 Train Accuracy: 0.854725 Valid Accuracy: 0.767300
Epoch   10/10 Train Loss: 2.314112 Train Accuracy: 0.868625 Valid Accuracy: 0.762700



In [41]:
data = pd.read_csv(filepath + 'sampleSubmission.csv')

test_dataloader = DataLoader(CIFARDataset(filepath + 'test/', data = data, transform = False, train=False), batch_size=1, shuffle=False, num_workers=0)    

In [99]:
torch_kfold_model ={1:'1_cifar10-cnn_v2.pth',
                    2:'2_cifar10-cnn_v2.pth',
                    3:'3_cifar10-cnn_v2.pth',
                    4:'4_cifar10-cnn_v2.pth',
                    5:'5_cifar10-cnn_v2.pth'}
preds_all = []
for i in range(1,6):
    print(f"{i} FOLD Predict")
    model_name = torch_kfold_model[i]
    best_model = torch.load(filepath + f'/savedmodel/{model_name}')
    best_model.eval()
    results = []
    with torch.no_grad():
        for batch_idx, feature in tqdm_notebook(enumerate(test_dataloader)):
            # 32*32 : image size 
            predict = best_model(feature.to(dev))
            _, predict = torch.max(predict, 1)
            results.append(idx2class[predict.detach().numpy()[0]])
    # preds = np.concatenate(results,axis=0)
    preds_all.append(results.copy())

1 FOLD Predict


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


2 FOLD Predict


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


3 FOLD Predict


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


4 FOLD Predict


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…


5 FOLD Predict


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…




In [105]:
sampleSubmission = pd.read_csv(filepath + 'sampleSubmission.csv')
sampleSubmission['label'] = results

label = pd.DataFrame(np.array(preds_all)).mode()
sampleSubmission['label'] = label.loc[0]
sampleSubmission.to_csv(filepath + "submission.csv", index=False)