In [58]:
import numpy as np 
import pandas
import matplotlib.pyplot as plt
from tqdm import tqdm
# from google.colab import files
import zipfile
import io
import os
import random
from sklearn.metrics import accuracy_score as ACC
from PIL import Image

In [59]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [60]:
train_dir='./Train/Train/'
print(len(os.listdir(train_dir)))

45


In [61]:
cls_name_list=sorted(os.listdir(train_dir))
cls_name_dict={cls_name_list[i]:i for i in range(len(cls_name_list))}

In [62]:
cls_name_dict

{'Alfa Romeo Stelvio': 0,
 'Aston Martin DB11': 1,
 'Aston Martin DBS': 2,
 'Aston Martin Valkyrie': 3,
 'Aston Martin Vantage': 4,
 'Aston Martin Vulcan': 5,
 'Audi A3': 6,
 'Audi A6': 7,
 'Audi E-tron GT': 8,
 'Audi R8': 9,
 'BMW 3-series': 10,
 'BMW 7-series': 11,
 'BMW x7': 12,
 'Bentley Bentayga': 13,
 'Bentley Continental': 14,
 'Bugatti Centidieci': 15,
 'Bugatti Chiron': 16,
 'Bugatti Divo': 17,
 'Bugatti La Voiture Noire': 18,
 'Buggati Veyron': 19,
 'Cadillac Escalade': 20,
 'Corvette ZR': 21,
 'Ferrari 458': 22,
 'Ferrari FF': 23,
 'Ferrari Pininfarina': 24,
 'Jaguar F-type': 25,
 'Jaguar XJ': 26,
 'Koenigsegg CC8S': 27,
 'Koenigsegg CCX': 28,
 'La Ferrari': 29,
 'Lamborghini Gallardo': 30,
 'Lamborghini Murceilago': 31,
 'Lamborghini Veneno': 32,
 'Mustang GT': 33,
 'Pagani Zonda': 34,
 'Porsche 911': 35,
 'Porsche Cayenne': 36,
 'Range Rover Discovery': 37,
 'Renault Duster': 38,
 'Rolls Royce Ghost': 39,
 'Rolls Royce Phantom': 40,
 'Tata Tiago': 41,
 'Toyota Fortuner': 4

In [63]:
image_items=[]
for folder in os.listdir(train_dir):
    for image in os.listdir(os.path.join(train_dir, folder)):
        image_items.append({'image_path':os.path.join(train_dir, folder, image),
                           'image_cls_gt': cls_name_dict[folder]})

In [64]:
image_items[0]

{'image_path': './Train/Train/Ferrari 458/image7.jpg', 'image_cls_gt': 22}

In [65]:
#data split
random.shuffle(image_items)
split_idx = int(len(image_items)*0.75) # 비율은 얼마나 generalize 성능을 평가하느냐에 따라 또 다를 수 있어
train_data, test_data = image_items[:split_idx], image_items[split_idx:]

In [66]:
len(train_data), len(test_data), len(train_data) + len(test_data)

(3037, 1013, 4050)

In [98]:
# hyperparameters
EPOCHS = 100
batch_size = 16 # 한번에 16장을 보고 모델을 한번 업데이트 하겠다 의미
learning_rate =  1e-4 # 1e-4 # 이게 크면 한번에 크게 업데이트를 한다 의미 

# augmentation
train_aug = transforms.Compose([
     transforms.RandomResizedCrop(224, scale=(0.08, 1.0), ratio=(0.75, 1.33)),
     transforms.RandomHorizontalFlip(p=0.5), # 여기 확률을 바꿔본다거나 하는 식으로 과제 수행 가능. 아래 컬러지터 조정, 빼도 되고. 
     transforms.ColorJitter(brightness=0.1, contrast=0.1),
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 ])
test_aug = transforms.Compose([
     # 학습 때는 ColorJitter 까지의 내용을 넣지 않는다.     
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 ])

In [99]:
class imageDataset(torch.utils.data.Dataset):
    def __init__(self, data, augmentation):
        self.data = data
        self.augmentation = augmentation
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.data[idx]['image_path']
        gt = self.data[idx]['image_cls_gt']
        
        image = Image.open(image_path)
        
        # augmentation
        image = self.augmentation(image)
        gt = torch.tensor(gt).long()
        return image, gt  # 각각 X_batch, y_batch 로 될 것 

In [100]:
# Data set
train_dataset = imageDataset(train_data, train_aug)
test_dataset = imageDataset(test_data, test_aug)

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True,
                                           num_workers=32)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False,
                                          num_workers=32)

In [101]:
class pretrained_model(nn.Module):
    def __init__(self, cls_size=45, pretrained=False): # True로 바꾸자 
        super().__init__()
#         self.model = torchvision.models.vgg19_bn(pretrained=pretrained)
#         vgg19_bn.classifier
        self.model = torchvision.models.resnet18(pretrained=pretrained) 

        self.model.fc = nn.Linear(self.model.fc.in_features, cls_size, bias=False)
        
    def forward(self, x):
        out = self.model(x)
        return out

In [102]:
class myModel(nn.Module):
    def __init__(self, num_classes=45):
        super(myModel, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(128*56*56, num_classes)
        
    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [103]:
model = pretrained_model(cls_size=45, pretrained=True)
# model = pretrained_model(cls_size=45, pretrained=False)
model = model.cuda()
criterion = nn.CrossEntropyLoss() # https://pytorch.org/docs/stable/nn.html
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [104]:
loss_list = []
acc_list = []
for epoch in range(EPOCHS):
    for i, (X_batch, y_batch) in enumerate(train_loader):
        X_batch = X_batch.cuda()
        y_batch = y_batch.cuda()
        #Forward 
        y_output = model(X_batch)
        loss = criterion(y_output, y_batch) #CELoss: The input is expected to contain raw, unnormalized scores for each class.
        
        #Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        #misc (acc 계산, etc) 
        y_pred = torch.max(y_output, 1)[1]
        acc = ACC(y_batch.data.cpu(), y_pred.data.cpu())
        loss_list.append(loss.item())
        acc_list.append(acc)

        if (i+1) % 10 == 0:
            print('Epoch [{}/{}] Step [{}/{}] Loss: [{:.4f}] Train ACC [{:.2f}%]'.format(epoch+1, EPOCHS, \
                                                                                       i+1, len(train_loader), loss.item(), acc*100))

Epoch [1/100] Step [10/190] Loss: [3.8429] Train ACC [0.00%]
Epoch [1/100] Step [20/190] Loss: [3.1533] Train ACC [31.25%]
Epoch [1/100] Step [30/190] Loss: [3.8012] Train ACC [6.25%]
Epoch [1/100] Step [40/190] Loss: [3.3604] Train ACC [12.50%]
Epoch [1/100] Step [50/190] Loss: [3.6202] Train ACC [6.25%]
Epoch [1/100] Step [60/190] Loss: [3.2828] Train ACC [25.00%]
Epoch [1/100] Step [70/190] Loss: [3.2205] Train ACC [25.00%]
Epoch [1/100] Step [80/190] Loss: [3.0102] Train ACC [25.00%]
Epoch [1/100] Step [90/190] Loss: [2.8529] Train ACC [25.00%]
Epoch [1/100] Step [100/190] Loss: [3.0592] Train ACC [18.75%]
Epoch [1/100] Step [110/190] Loss: [2.5316] Train ACC [50.00%]
Epoch [1/100] Step [120/190] Loss: [2.5301] Train ACC [25.00%]
Epoch [1/100] Step [130/190] Loss: [2.7949] Train ACC [25.00%]
Epoch [1/100] Step [140/190] Loss: [2.4100] Train ACC [31.25%]
Epoch [1/100] Step [150/190] Loss: [2.3904] Train ACC [50.00%]
Epoch [1/100] Step [160/190] Loss: [2.3668] Train ACC [37.50%]
Epoc

In [105]:
test_acc_list = []
with torch.no_grad():
    model.eval()
    for X_batch, y_batch in test_loader:    
        X_batch = X_batch.cuda()
        y_batch = y_batch.cuda()
        
        y_output = model(X_batch)
        y_pred = torch.max(y_output, 1)[1]
        
        acc = ACC(y_batch.data.cpu(), y_pred.data.cpu())
        test_acc_list.append(acc)
    test_acc = np.mean(test_acc_list)
print('Test ACC: [{:.2f}%]'.format(test_acc*100))

Test ACC: [82.01%]


In [None]:
# 테스트 성능을 78.11% 이상 얻어봐라가 과제임 

In [None]:
company = "케이존"
name = "천재홍"

In [None]:
print(f'{company}의 {name} 개발자 9회차 accuracy 성능 향상 과제 수행 후 제출합니다. 감사합니다. ')