In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import time
import copy
from PIL import Image
import os
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd 
import random

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, \
                            roc_auc_score, confusion_matrix, classification_report, \
                            matthews_corrcoef, cohen_kappa_score, log_loss

from tqdm import tqdm

In [2]:
# 완벽한 실험 재현성을 위한 랜덤제어
random_seed = 28
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU

In [3]:
if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
Device name: NVIDIA GeForce RTX 3060


In [4]:
data_transforms = {
    'train': transforms.Compose([
        #transforms.RandomResizedCrop(224),
        transforms.Resize(256),
        transforms.CenterCrop(224),
#         transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),    
}

In [5]:
image_path = "C:/Users/ANDlab3/Desktop/paper/fashion-dataset/data/"
image_datasets = {x: datasets.ImageFolder(os.path.join(image_path, x),
                                          data_transforms[x])
                  for x in ['train', 'val','test']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val','test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes

In [6]:
dataset_sizes

{'train': 6475, 'val': 925, 'test': 1850}

In [7]:
class_num = len(class_names)
class_num

37

In [8]:
def train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
#             if phase == 'train':
#                 scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [9]:
pre_model = models.resnet50(pretrained=True)
num_ftrs = pre_model.fc.in_features

#Changing the number of outputs in the last layer to the number of different item types
pre_model.fc = nn.Linear(num_ftrs, 500)

# classifier 제외하고 Freeze

In [10]:
for param in pre_model.parameters():
    param.requires_grad = False

In [11]:
class vision_module(nn.Module):
    def __init__(self, pre_model):
        super(vision_module, self).__init__()
    
        D_in, H, D_out = 1000, 500, 37
        self.resnet50 = pre_model
        
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(H, D_out)
        )
    
    def forward(self, image):
        outputs = self.resnet50(image)
        
        fc = self.classifier(outputs)
        
        return  fc

In [12]:
model_ft = vision_module(pre_model)
model_ft.to(device)

vision_module(
  (resnet50): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

In [13]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [14]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------
train Loss: 2.7427 Acc: 0.4843
val Loss: 1.8413 Acc: 0.7373

Epoch 1/19
----------
train Loss: 1.6915 Acc: 0.7175
val Loss: 1.2081 Acc: 0.7751

Epoch 2/19
----------
train Loss: 1.2576 Acc: 0.7598
val Loss: 0.9583 Acc: 0.7881

Epoch 3/19
----------
train Loss: 1.0466 Acc: 0.7751
val Loss: 0.7911 Acc: 0.8173

Epoch 4/19
----------
train Loss: 0.9178 Acc: 0.7773
val Loss: 0.6966 Acc: 0.8205

Epoch 5/19
----------
train Loss: 0.8323 Acc: 0.7952
val Loss: 0.6701 Acc: 0.8119

Epoch 6/19
----------
train Loss: 0.7782 Acc: 0.8029
val Loss: 0.6235 Acc: 0.8227

Epoch 7/19
----------
train Loss: 0.7293 Acc: 0.8023
val Loss: 0.5900 Acc: 0.8216

Epoch 8/19
----------
train Loss: 0.7014 Acc: 0.8117
val Loss: 0.5590 Acc: 0.8281

Epoch 9/19
----------
train Loss: 0.6673 Acc: 0.8159
val Loss: 0.5633 Acc: 0.8227

Epoch 10/19
----------
train Loss: 0.6483 Acc: 0.8128
val Loss: 0.5481 Acc: 0.8303

Epoch 11/19
----------
train Loss: 0.6148 Acc: 0.8252
val Loss: 0.5284 Acc: 0.8324

Ep

In [15]:
# SAVE_PATH = "C:/Users/ANDlab3/Desktop/paper/fashion-dataset/model/"
# torch.save(model_ft.state_dict(), SAVE_PATH + 'model_fine_tuned_v2.pt')

# Test

In [16]:
correct = 0
total = 0

true = []
pred = []

with torch.no_grad():
    model_ft.eval() #현재는 모델에 드랍아웃이나, 패딩이 없어서 필요없지만 만약 사용된 경우에는 eval을 통해서 평가시에는 꼭 비활성화 시켜야한다.
    f1_score = 0
    for data in dataloaders['test']:
        images, labels = data[0].to(device), data[1].to(device)
        
        outputs =  model_ft(images)
        _, predicted = torch.max(outputs.data, 1)
        
        
        total += labels.size(0) # 개수 누적(총 개수)
        correct += (predicted == labels).sum().item() # 누적(맞으면 1, 틀리면 0으로 합산)
  
        true += labels.cpu().numpy().tolist()
        pred += predicted.cpu().numpy().tolist()

print('done')

done


In [17]:
accuracy = accuracy_score(true, pred)
round((accuracy * 100), 3)

84.432

In [18]:
print('전체 데이터 수 : ', total)
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

전체 데이터 수 :  1850
Accuracy of the network on the 10000 test images: 84 %


In [19]:
# Classification Report 저장
# REPORT_PATH = "C:/Users/ANDlab3/Desktop/multimodal/visionAndNLPmodel/result/"
# CL_REPORT_FILE = REPORT_PATH + "cl_report.csv"

cl_report = classification_report(true, pred, output_dict = True)
cl_report_df = pd.DataFrame(cl_report).transpose()
cl_report_df = cl_report_df.round(3)
cl_report_df.to_csv('./cl_report_2')
print(cl_report_df)

              precision  recall  f1-score   support
0                 0.943   1.000     0.971    50.000
1                 0.980   0.960     0.970    50.000
2                 0.980   1.000     0.990    50.000
3                 1.000   0.920     0.958    50.000
4                 0.961   0.980     0.970    50.000
5                 0.697   0.460     0.554    50.000
6                 0.800   0.720     0.758    50.000
7                 0.942   0.980     0.961    50.000
8                 0.800   0.800     0.800    50.000
9                 0.909   1.000     0.952    50.000
10                0.471   0.480     0.475    50.000
11                0.870   0.800     0.833    50.000
12                0.731   0.980     0.838    50.000
13                0.956   0.860     0.905    50.000
14                0.468   0.440     0.454    50.000
15                0.646   0.620     0.633    50.000
16                0.957   0.880     0.917    50.000
17                0.824   0.840     0.832    50.000
18          