<a href="https://colab.research.google.com/github/NANNDA3463/ossp_ai2/blob/main/k_fashion_baseline_ipynb%EC%9D%98_%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget https://kr.object.ncloudstorage.com/aihub-competition/dataset/K-Fashion_Train.zip
!unzip -qq K-Fashion_Train.zip 

--2023-01-09 13:11:30--  https://kr.object.ncloudstorage.com/aihub-competition/dataset/K-Fashion_Train.zip
Resolving kr.object.ncloudstorage.com (kr.object.ncloudstorage.com)... 61.97.180.104
Connecting to kr.object.ncloudstorage.com (kr.object.ncloudstorage.com)|61.97.180.104|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 432747336 (413M) [application/zip]
Saving to: ‘K-Fashion_Train.zip’


2023-01-09 13:12:11 (10.5 MB/s) - ‘K-Fashion_Train.zip’ saved [432747336/432747336]



In [2]:
from __future__ import print_function, division

import torch
import torch.utils as utils
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from PIL import Image

from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import pandas as pd
import torchvision
from tqdm import tqdm
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [3]:
data_dir = '/content/Train/'
data_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_dataset =  datasets.ImageFolder(data_dir, data_transform)

train_split = 0.9
split_size = int(len(image_dataset) * train_split)
batch_size = 64
num_workers=16

train_set, valid_set = torch.utils.data.random_split(image_dataset, [split_size, len(image_dataset) - split_size])
tr_loader = utils.data.DataLoader(dataset=train_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=num_workers)
val_loader = utils.data.DataLoader(dataset=valid_set,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=num_workers)
dataloaders = {'train': tr_loader, 'val':val_loader}
dataset_sizes = {}
dataset_sizes['train'] = split_size
dataset_sizes['val'] = len(image_dataset) -split_size 
class_names = image_dataset.classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



In [17]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=2):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    model.load_state_dict(best_model_wts)
    return model

In [24]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

model_ft.fc = nn.Linear(num_ftrs, len(image_dataset.classes))
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.5)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=64, gamma=0.1)



In [25]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=20)

Epoch 0/19
----------




train Loss: 3.2139 Acc: 0.0467
val Loss: 3.1785 Acc: 0.0420

Epoch 1/19
----------
train Loss: 3.1131 Acc: 0.0670
val Loss: 3.0896 Acc: 0.0754

Epoch 2/19
----------
train Loss: 3.0456 Acc: 0.0931
val Loss: 2.9844 Acc: 0.1304

Epoch 3/19
----------
train Loss: 2.9700 Acc: 0.1290
val Loss: 2.9622 Acc: 0.1290

Epoch 4/19
----------
train Loss: 2.9119 Acc: 0.1546
val Loss: 2.8893 Acc: 0.1696

Epoch 5/19
----------
train Loss: 2.8415 Acc: 0.1768
val Loss: 2.8353 Acc: 0.1725

Epoch 6/19
----------
train Loss: 2.7977 Acc: 0.1882
val Loss: 2.7833 Acc: 0.1841

Epoch 7/19
----------
train Loss: 2.7435 Acc: 0.1998
val Loss: 2.7519 Acc: 0.2145

Epoch 8/19
----------
train Loss: 2.6990 Acc: 0.2151
val Loss: 2.7119 Acc: 0.2087

Epoch 9/19
----------
train Loss: 2.6554 Acc: 0.2219
val Loss: 2.6789 Acc: 0.2174

Epoch 10/19
----------
train Loss: 2.6233 Acc: 0.2403
val Loss: 2.6365 Acc: 0.2101

Epoch 11/19
----------
train Loss: 2.5885 Acc: 0.2411
val Loss: 2.6313 Acc: 0.2087

Epoch 12/19
----------
t

In [7]:
!wget https://kr.object.ncloudstorage.com/aihub-competition/dataset/K-Fashion_Test.zip
!unzip -qq K-Fashion_Test.zip 

--2023-01-09 13:31:29--  https://kr.object.ncloudstorage.com/aihub-competition/dataset/K-Fashion_Test.zip
Resolving kr.object.ncloudstorage.com (kr.object.ncloudstorage.com)... 1.255.58.138
Connecting to kr.object.ncloudstorage.com (kr.object.ncloudstorage.com)|1.255.58.138|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7374788914 (6.9G) [application/zip]
Saving to: ‘K-Fashion_Test.zip’


2023-01-09 13:42:52 (10.3 MB/s) - ‘K-Fashion_Test.zip’ saved [7374788914/7374788914]



In [9]:
class TestDataset(utils.data.Dataset):
  def __init__(self, root, transform=None):
    self.root = root
    self.image_list = os.listdir(root)
    self.transform = transform
  
  def __len__(self):
    return len(self.image_list)

  def __getitem__(self, index):
    image_path = os.path.join(self.root, self.image_list[index])
    image = np.array(Image.open(image_path))
    image = self.transform(image)
    return self.image_list[index], image

In [10]:
test_transform = transforms.Compose([
      transforms.ToPILImage(),
      transforms.Resize((224,224)),
      transforms.ToTensor(),
      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

In [11]:
test_dataset = TestDataset('Test',transform=test_transform)
test_dataloader = utils.data.DataLoader(test_dataset, batch_size=256, num_workers=16)

In [12]:
result = []
for fnames, data in tqdm(test_dataloader):
  data = data.to(device)
  output = model_ft(data)
  _,pred = torch.max(output,1)
  for j in range(len(fnames)):
    result.append(
        {
          'filename':fnames[j],
          'style':pred.cpu().detach().numpy()[j]
        }
    )

  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))

  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))
  image = np.array(Image.open(image_path))


KeyboardInterrupt: ignored

In [None]:
pd.DataFrame(sorted(result,key=lambda x:x['filename'])).to_csv('fashion_submission.csv',index=None)