<a href="https://colab.research.google.com/github/HyeJin816/ESAA_22/blob/main/0624_CNN2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **2. 전이학습(transfer learning)을 통한 이미지 분류**

In [1]:
import os 
os.environ['CUDA_VISIBLE_DEVICES']='0' 

import random
import numpy as np
from tqdm import tqdm

import torch
from torchvision import datasets,models,transforms
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision.transforms as transforms

from multiprocessing import cpu_count
from torch.utils.data import Dataset,DataLoader,SubsetRandomSampler
from torch.nn import CrossEntropyLoss
from torchvision.models import efficientnet_b3 as efficientnet
from sklearn.model_selection import train_test_split

## **데이터 load, 전처리**

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Resize([224,224]), 
                                transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)) 
])

In [3]:
train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

Files already downloaded and verified


In [4]:
train_idx, valid_idx = train_test_split(np.arange(len(train_data)), test_size=0.2, 
                                        random_state=42, shuffle=True, stratify=train_data.targets)

In [5]:
batch_size = 32
num_workers = int(cpu_count() / 2)

In [6]:
train_loader = DataLoader(train_data, batch_size=batch_size, 
                          sampler=SubsetRandomSampler(train_idx), num_workers=num_workers)
valid_loader = DataLoader(train_data, batch_size=batch_size, 
                          sampler=SubsetRandomSampler(valid_idx), num_workers=num_workers)

In [7]:
train_total = len(train_idx)
valid_total = len(valid_idx)

train_batches = len(train_loader)
valid_batches = len(valid_loader)

In [8]:
print('total train imgs :',train_total,'/ total train batches :', train_batches)
print('total valid imgs :',valid_total, '/ total valid batches :', valid_batches)

total train imgs : 40000 / total train batches : 1250
total valid imgs : 10000 / total valid batches : 313


## **Device 설정**

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.cuda.is_available()

False

## **모델 불러오기 / 파라미터 설정**

In [11]:
net = models.efficientnet_b3(pretrained=False)
net.classifier

Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1536, out_features=1000, bias=True)
)

In [12]:
net.fc = nn.Linear(1000, 10)
net = net.to(device) 

In [13]:
criterion = CrossEntropyLoss()
optimizer = optim.Adam(params=net.parameters(), lr=0.001)
epochs = 10

## **학습**

In [14]:
for epoch in range(epochs):
    net.train()

    train_loss = 0
    train_correct = 0 
    tqdm_dataset = tqdm(train_loader)
    for x,y in tqdm_dataset:
        x = x.to(device)
        y = y.to(device)
        outputs = net(x)
        loss = criterion(outputs, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_correct += predicted.eq(y).sum().item()

        tqdm_dataset.set_postfix({
            'Epoch': epoch + 1,
            'Loss': '{:06f}'.format(loss.item()),
        })

    train_loss = train_loss / train_batches
    train_acc = train_correct / train_total

    net.eval()

    valid_loss = 0
    valid_correct = 0 
    tqdm_dataset = tqdm(valid_loader)
    with torch.no_grad():
        for x,y in tqdm_dataset:
            x = x.to(device)
            y = y.to(device)
            
            outputs = net(x)
            loss = criterion(outputs, y)
            valid_loss += loss.item()
            _, predicted = outputs.max(1)
            valid_correct += predicted.eq(y).sum().item()

            tqdm_dataset.set_postfix({
                'Epoch': epoch + 1,
                'Loss': '{:06f}'.format(loss.item()),
            })

    valid_loss = valid_loss / valid_batches
    valid_acc = valid_correct / valid_total

    print('epochs', epoch+1, 'train_loss', train_loss, 'train_acc', train_acc, 'valid loss', valid_loss, 'valid acc', valid_acc)

  0%|          | 6/1250 [01:46<6:09:40, 17.83s/it, Epoch=1, Loss=5.716373]


KeyboardInterrupt: ignored

## **모델 저장 / 불러오기**

In [15]:
path = './model.pth'
torch.save(net.state_dict(),path)

In [16]:
path = './model.pth'
net.load_state_dict(torch.load(path))

<All keys matched successfully>

## **추론**

In [17]:
from glob import glob
import PIL.Image
import numpy as np

test_images = []

path = './data/'
for filename in sorted(glob(path + "test/*.jpg")):
    an_img = PIL.Image.open(filename) 
    img_array = np.array(an_img) 
    test_images.append(img_array) 

test_images = np.array(test_images)

In [18]:
class CustomDataset(Dataset):
    def __init__(self, transform):
        self.transform = transform 
        self.img_list = test_images
        self.img_labels = [0] * 10000 

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        return self.transform(self.img_list[idx]), self.img_labels[idx]

In [19]:
test_set = CustomDataset(transform)

In [20]:
test_loader = DataLoader(test_set, batch_size = batch_size, num_workers=num_workers)