# Library

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


import os, sys
import skimage.io
from skimage.transform import resize
#!pip install six numpy scipy Pillow matplotlib scikit-image opencv-python imageio
#!pip install --no-dependencies imgaug
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image, ImageOps
import cv2
from sklearn.utils import class_weight, shuffle
from sklearn.metrics import f1_score, fbeta_score
from sklearn.model_selection import train_test_split

WORKERS = 2
CHANNEL = 3

import warnings
warnings.filterwarnings("ignore")
IMG_SIZE = 512
NUM_CLASSES = 18
SEED = 77
TRAIN_NUM = 1000 # use 1000 when you just want to explore new idea, use -1 for full train


In [2]:
df_train = pd.read_csv('/opt/ml/input/data/train/train.csv')

In [3]:
df_train_class = pd.DataFrame(columns = ['id', 'per_id', 'gender', 'age', 'mask', 'class', 'path'])
df_train_class.set_index('id', inplace=True)

In [6]:
def return_class_simple(row, mask):
    # Assuming the mask is already labeled as 0,1,2
    # Each of them is 'wear', 'incorrect' and 'not wear'
    gender = 0 if row["gender"] == "male" else 3
    age = min(2, row["age"] // 30)

    # Print the class number
    return mask*6 + gender + age, age

In [5]:
path = '../../input/data/train/images'

!rm -rf ./data/train/.DS_Store
!rm -rf ./data/train/images/.DS_Store
folders = sorted([f for f in os.listdir(path) if "._" not in f])

In [6]:
idx = 0
mask_dict = {0: 'wear', 1: 'not wear', 2: 'incorrect'}
age_dict = {0: 'under 30', 1: '30 to 60', 2: 'over 60'}

for i in df_train.index:
    row = df_train.loc[i]
    imgs_path = os.path.join(path, row['path'])
    images = sorted([f for f in os.listdir(imgs_path) if "._" not in f])
    for img in images:
        #print(img)
        if img[:-4] == 'incorrect_mask':
            mask = 2 # incorrect
        elif img[:-4] == 'normal':
            mask = 1 # not wear
        else:
            mask = 0 # wear

        classnum, age = return_class_simple(row, mask)
        

        df_train_class.loc[idx] = [row['id'], row['gender'], age_dict[age], mask_dict[mask], classnum, os.path.join(imgs_path, img)]
        idx += 1


In [7]:
df_train_class.to_csv("./train_with_class.csv", encoding="utf-8")

# Data Preprocessing & Dataloader

In [7]:
df_train_class = pd.read_csv("./train_with_class.csv")

In [8]:
train_df = df_train_class[:11340]
valid_df = df_train_class[11341:15120]
test_df = df_train_class[15121:]

In [9]:
train_df

Unnamed: 0,id,per_id,gender,age,mask,class,path
0,0,000001,female,30 to 60,incorrect,16,../../input/data/train/images/000001_female_As...
1,1,000001,female,30 to 60,wear,4,../../input/data/train/images/000001_female_As...
2,2,000001,female,30 to 60,wear,4,../../input/data/train/images/000001_female_As...
3,3,000001,female,30 to 60,wear,4,../../input/data/train/images/000001_female_As...
4,4,000001,female,30 to 60,wear,4,../../input/data/train/images/000001_female_As...
...,...,...,...,...,...,...,...
11335,11335,003785,female,30 to 60,wear,4,../../input/data/train/images/003785_female_As...
11336,11336,003785,female,30 to 60,wear,4,../../input/data/train/images/003785_female_As...
11337,11337,003785,female,30 to 60,wear,4,../../input/data/train/images/003785_female_As...
11338,11338,003785,female,30 to 60,wear,4,../../input/data/train/images/003785_female_As...


In [12]:
# 출처: https://github.com/utkuozbulak/pytorch-custom-dataset-examples/blob/master/src/custom_dataset_from_file.py
import numpy as np
from PIL import Image
import glob
from torchvision import transforms

import torch
from torch.utils.data.dataset import Dataset  # For custom datasets
from torchvision.transforms import Resize, ToTensor, Normalize


transform = transforms.Compose([Resize((512, 384), Image.BILINEAR),
                                ToTensor(),
                                Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2))])

class CustomDataset(Dataset):
    def __init__(self, df_train, transform, train=True):
        # Get image list
        self.image_list = df_train['path'].tolist()
        self.target = df_train['class'].tolist()
        # Calculate len
        self.data_len = len(self.image_list)

        self.transform = transform
        self.train = train

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_path = self.image_list[index]
        # Open image
        # Open image
        image = Image.open(single_image_path)

        if self.transform:
            img = self.transform(image)
    
        if self.train:
            label = self.target[index]
            
            return (img, torch.tensor(label))
        else:
            return img

    def __len__(self):
        return self.data_len

# Model

In [20]:
from torchvision import models
import torch

resnet18_pretrained = models.resnet18(pretrained=True)

num_classes = 18
num_ftrs = resnet18_pretrained.fc.in_features
resnet18_pretrained.fc = nn.Linear(num_ftrs, num_classes)

print(resnet18_pretrained)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [24]:
device = 'cuda'
   
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data.dataset import Dataset  # For custom datasets



if __name__ == "__main__":

    # Dataset variant 3:
    # Read images from a folder, image classes are embedded in file names
    # No csv is used whatsoever
    # No torch transformations are used
    # Preprocessing operations are defined inside the dataset
    custom_mnist_from_file = CustomDataset(train_df, transform = transform)

    mn_dataset_loader = torch.utils.data.DataLoader(dataset=custom_mnist_from_file,
                                                    batch_size=2,
                                                    shuffle=False)


    model = resnet18_pretrained
    model = model.to(device)
    #model = torch.nn.DataParallel(model)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

    running_loss = 0
    for epoch in range(2): 
        for i, (images, labels) in enumerate(mn_dataset_loader):
            images = Variable(images).to(device)
            labels = Variable(labels).to(device)
            
            # Clear gradients
            optimizer.zero_grad()
            # Forward pass
            outputs = model(images)
            # Calculate loss
            loss = criterion(outputs, labels)

            # Backward pass
            loss.backward()
            # Update weights
            optimizer.step()
            
            running_loss += loss.item()
            
            if i % 2000 == 1999:
                print('Epoch: %d Batch ID:%d Loss:%f' %(epoch, i, running_loss/2000))
            

    print('A single forward-backward pass is done!')

# Predictions

In [93]:
valid_dataset = CustomDatasetFromFile(valid_df)

valid_dataloader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                            batch_size=10,
                                            shuffle=False)

In [100]:
targets = []
all_predictions = []
for images, labels in valid_dataloader:
    with torch.no_grad():
        #images = images.to(device)
        pred = model(images.float())
        pred = pred.argmax(dim=-1)
        targets.extend(labels.numpy())
        all_predictions.extend(pred.cpu().numpy())



In [112]:
import sklearn.metrics as metrics

print('accuracy', metrics.accuracy_score(targets, all_predictions) )
print('f1', np.mean(metrics.f1_score(targets, all_predictions, average=None)))




accuracy 0.5992945326278659
f1 0.33533191609193297
