# Library

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random


import os, sys
import skimage.io
from skimage.transform import resize
#!pip install six numpy scipy Pillow matplotlib scikit-image opencv-python imageio
#!pip install --no-dependencies imgaug
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image, ImageOps
import cv2
from sklearn.utils import class_weight, shuffle
from sklearn.metrics import f1_score, fbeta_score
from sklearn.model_selection import train_test_split

WORKERS = 2
CHANNEL = 3

import warnings
warnings.filterwarnings("ignore")
IMG_SIZE = 512
NUM_CLASSES = 18
SEED = 42
TRAIN_NUM = 1000 # use 1000 when you just want to explore new idea, use -1 for full train

torch.cuda.manual_seed(SEED)
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED) 


In [4]:
df_train = pd.read_csv('/opt/ml/code/level1-image-classification-level1-nlp-6/train.csv')
df_valid = pd.read_csv('/opt/ml/code/level1-image-classification-level1-nlp-6/valid.csv')

In [3]:
df_train_class = pd.DataFrame(columns = ['id', 'per_id', 'gender', 'age', 'mask', 'class', 'path'])
df_train_class.set_index('id', inplace=True)

In [6]:
def return_class_simple(row, mask):
    # Assuming the mask is already labeled as 0,1,2
    # Each of them is 'wear', 'incorrect' and 'not wear'
    gender = 0 if row["gender"] == "male" else 3
    age = min(2, row["age"] // 30)

    # Print the class number
    return mask*6 + gender + age, age

In [5]:
path = '../../input/data/train/images'

!rm -rf ./data/train/.DS_Store
!rm -rf ./data/train/images/.DS_Store
folders = sorted([f for f in os.listdir(path) if "._" not in f])

In [6]:
idx = 0
mask_dict = {0: 'wear', 1: 'not wear', 2: 'incorrect'}
age_dict = {0: 'under 30', 1: '30 to 60', 2: 'over 60'}

for i in df_train.index:
    row = df_train.loc[i]
    imgs_path = os.path.join(path, row['path'])
    images = sorted([f for f in os.listdir(imgs_path) if "._" not in f])
    for img in images:
        #print(img)
        if img[:-4] == 'incorrect_mask':
            mask = 2 # incorrect
        elif img[:-4] == 'normal':
            mask = 1 # not wear
        else:
            mask = 0 # wear

        classnum, age = return_class_simple(row, mask)
        

        df_train_class.loc[idx] = [row['id'], row['gender'], age_dict[age], mask_dict[mask], classnum, os.path.join(imgs_path, img)]
        idx += 1


In [7]:
df_train_class.to_csv("./train_with_class.csv", encoding="utf-8")

# Data Preprocessing & Dataloader

In [26]:
train_df = pd.read_csv('/opt/ml/code/level1-image-classification-level1-nlp-6/train.csv')
valid_df = pd.read_csv('/opt/ml/code/level1-image-classification-level1-nlp-6/valid.csv')
test_df = pd.read_csv('/opt/ml/code/level1-image-classification-level1-nlp-6/test.csv')

In [27]:
train_df

Unnamed: 0.1,Unnamed: 0,per_id,class,path
0,0,000225,15,../../input/data/train/images/000225_female_As...
1,1,000225,3,../../input/data/train/images/000225_female_As...
2,2,000225,3,../../input/data/train/images/000225_female_As...
3,3,000225,3,../../input/data/train/images/000225_female_As...
4,4,000225,3,../../input/data/train/images/000225_female_As...
...,...,...,...,...
11286,11286,004096,2,../../input/data/train/images/004096_male_Asia...
11287,11287,004096,2,../../input/data/train/images/004096_male_Asia...
11288,11288,004096,2,../../input/data/train/images/004096_male_Asia...
11289,11289,004096,2,../../input/data/train/images/004096_male_Asia...


In [28]:
# 출처: https://github.com/utkuozbulak/pytorch-custom-dataset-examples/blob/master/src/custom_dataset_from_file.py
import numpy as np
from PIL import Image
import glob
from torchvision import transforms

import torch
from torch.utils.data.dataset import Dataset  # For custom datasets
from torchvision.transforms import Resize, ToTensor, Normalize


transform = transforms.Compose([Resize((512, 384), Image.BILINEAR),
                                ToTensor(),
                                Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2))])

class CustomDataset(Dataset):
    def __init__(self, df_train, transform, train=True):
        # Get image list
        self.image_list = df_train['path'].tolist()
        self.target = df_train['class'].tolist()
        # Calculate len
        self.data_len = len(self.image_list)

        self.transform = transform
        self.train = train

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_path = self.image_list[index]
        # Open image
        # Open image
        image = Image.open(single_image_path)

        if self.transform:
            img = self.transform(image)
    
        if self.train:
            label = self.target[index]
            
            return (img, torch.tensor(label))
        else:
            return img

    def __len__(self):
        return self.data_len

# Model

In [29]:
from torchvision import models
import torch

resnet18_pretrained = models.resnet18(pretrained=True)

#for para in resnet18_pretrained.parameters():
#    para.requires_grad = False
    
num_classes = 18
num_ftrs = resnet18_pretrained.fc.in_features
resnet18_pretrained.fc = nn.Linear(num_ftrs, num_classes)
#resnet18_pretrained.fc.

#print(resnet18_pretrained)

In [30]:

   
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = F.log_softmax(input)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()

In [39]:
device = 'cuda'
   
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data.dataset import Dataset  # For custom datasets

report_every = 100

def eval(model,data_iter,criterion,epoch):
    model.eval()
    total_loss = 0
    batch_num = 0
    
    for i, (images, labels) in enumerate(data_iter):
        images = Variable(images).to(device)
        labels = Variable(labels).to(device)

        # Forward pass
        outputs = model(images)
        
        # Calculate loss
        loss = criterion(outputs, labels)

        total_loss += loss.data
        batch_num += 1
        
    loss = total_loss / batch_num
    model.train()
    
    return loss

if __name__ == "__main__":

    # Dataset variant 3:
    # Read images from a folder, image classes are embedded in file names
    # No csv is used whatsoever
    # No torch transformations are used
    # Preprocessing operations are defined inside the dataset
    train_dataset = CustomDataset(train_df, transform = transform)
    valid_dataset = CustomDataset(valid_df, transform = transform)

    train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                    batch_size=8,
                                                    shuffle=False)
    valid_dataloader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                                    batch_size=8,
                                                    shuffle=False)
    


    
    model = MyCustomModel()
    model = model.to(device)
    model = torch.nn.DataParallel(model)
    
    criterion = FocalLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    running_loss = 0
    min_loss = float('inf')
    
    for epoch in range(100): 
        for i, (images, labels) in enumerate(train_dataloader):
            images = Variable(images).to(device)
            labels = Variable(labels).to(device)
            
            # Clear gradients
            optimizer.zero_grad()
            # Forward pass
            outputs = model(images)
            # Calculate loss
            loss = criterion(outputs, labels)

            # Backward pass
            loss.backward()
            # Update weights
            optimizer.step()
            
            running_loss += loss.item()
            
            if i % report_every == 0:
                eval_loss = eval(model, valid_dataloader, criterion, epoch)
                if eval_loss < min_loss:
                    min_loss = eval_loss
                    torch.save(model, "./model/epoch_%d_loss_%6f.pt" % (epoch, min_loss))
                print('Epoch: %d - Batch ID:%d - Min Loss:%f' %(epoch, i, min_loss))
            

    print('done!')

NameError: name 'math' is not defined

In [33]:
# 모델의 state_dict 출력
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
  
torch.save(model, "./model/baseline_resnet+focal_loss.pt")

Model's state_dict:
module.conv1.weight 	 torch.Size([64, 3, 7, 7])
module.bn1.weight 	 torch.Size([64])
module.bn1.bias 	 torch.Size([64])
module.bn1.running_mean 	 torch.Size([64])
module.bn1.running_var 	 torch.Size([64])
module.bn1.num_batches_tracked 	 torch.Size([])
module.layer1.0.conv1.weight 	 torch.Size([64, 64, 3, 3])
module.layer1.0.bn1.weight 	 torch.Size([64])
module.layer1.0.bn1.bias 	 torch.Size([64])
module.layer1.0.bn1.running_mean 	 torch.Size([64])
module.layer1.0.bn1.running_var 	 torch.Size([64])
module.layer1.0.bn1.num_batches_tracked 	 torch.Size([])
module.layer1.0.conv2.weight 	 torch.Size([64, 64, 3, 3])
module.layer1.0.bn2.weight 	 torch.Size([64])
module.layer1.0.bn2.bias 	 torch.Size([64])
module.layer1.0.bn2.running_mean 	 torch.Size([64])
module.layer1.0.bn2.running_var 	 torch.Size([64])
module.layer1.0.bn2.num_batches_tracked 	 torch.Size([])
module.layer1.1.conv1.weight 	 torch.Size([64, 64, 3, 3])
module.layer1.1.bn1.weight 	 torch.Size([64])
module.

# Predictions

In [34]:
model = torch.load("./model/epoch_16_loss_3.823638.pt")

In [35]:
test_dataset = CustomDataset(test_df, transform)

test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset,
                                            batch_size=8,
                                            shuffle=False)

In [36]:
targets = []
all_predictions = []
for images, labels in test_dataloader:
    with torch.no_grad():
        #images = images.to(device)
        images = Variable(images).to(device)
        labels = Variable(labels).to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        targets.extend(labels.cpu().numpy())
        all_predictions.extend(pred.cpu().numpy())



In [37]:
import sklearn.metrics as metrics

print('accuracy', metrics.accuracy_score(targets, all_predictions) )
print('f1', np.mean(metrics.f1_score(targets, all_predictions, average=None)))




accuracy 0.2153558052434457
f1 0.0689292044797463


# 데이터 분리

In [11]:
df_train = pd.read_csv('/opt/ml/input/data/train/train.csv')

In [12]:
df_train['age_class'] = 0

# 30 미만: 0 / 30 ~ 60: 1 / 60 이상: 2
df_train.loc[df_train['age'] < 30, 'age_class'] = 0
df_train.loc[(df_train['age'] < 60) & (df_train['age'] >= 30), 'age_class'] = 1
df_train.loc[df_train['age'] >= 60, 'age_class'] = 2


In [13]:
df_train = df_train.drop(['race'], axis=1)
df_train = df_train.drop(['age'], axis=1)

In [14]:
df_train = df_train.sort_values(by=['gender', 'age_class'])
df_train.reset_index(drop=True, inplace=True)
df_train['index'] = df_train.index

* 여성 0 ~ 1657
    * under 30: 0 ~ 731
        * train: 0 ~ 438
        * valid: 439 ~ 585
        * test: 586 ~ 731
    * 30 to 60: 732 ~ 1548
        * train: 732 ~ 1221
        * valid: 1222 ~ 1384
        * test: 1385 ~ 1548
    * over 60: 1549 ~ 1657
        * train: 1549 ~ 1613
        * valid: 1614 ~ 1635
        * test: 1636 ~ 1657
* 남성 1658 ~ 2699
    * under 30: 1658 ~ 2206
        * train: 1658 ~ 1986
        * valid: 1987 ~ 2097
        * test: 2098 ~ 2206
    * 30 to 60: 2207 ~ 2616
        * train: 2207 ~ 2452
        * valid: 2453 ~ 2534
        * test: 2535 ~ 2616
    * over 60: 2617 ~ 2699
        * train: 2617 ~ 2666
        * valid: 2667 ~ 2682
        * test: 2683 ~ 2699

* train 60% / valid 20% / test 20%

In [15]:
train_df = pd.concat([df_train[0:438], df_train[732:1221], df_train[1549:1613], df_train[1658:1986], df_train[2207:2452], df_train[2617:2666]])
valid_df = pd.concat([df_train[429:585], df_train[1222:1384], df_train[1614:1635], df_train[1987:2097], df_train[2453:2534], df_train[2667:2682]])
test_df = pd.concat([df_train[586:731], df_train[1385:1548], df_train[1636:1657], df_train[2098:2206], df_train[2535:2616], df_train[2683:2699]])


In [16]:
valid_df

Unnamed: 0,id,gender,path,age_class,index
429,003326,female,003326_female_Asian_20,0,429
430,003328,female,003328_female_Asian_19,0,430
431,003329,female,003329_female_Asian_19,0,431
432,003330,female,003330_female_Asian_19,0,432
433,003331,female,003331_female_Asian_20,0,433
...,...,...,...,...,...
2677,004283,male,004283_male_Asian_60,2,2677
2678,004284,male,004284_male_Asian_60,2,2678
2679,004285,male,004285_male_Asian_60,2,2679
2680,004286,male,004286_male_Asian_60,2,2680


In [17]:
df_train_class = pd.DataFrame(columns = ['per_id', 'class', 'path'])
#df_train_class.set_index('id', inplace=True)

In [18]:
def return_class_simple(row, mask):
    # Assuming the mask is already labeled as 0,1,2
    # Each of them is 'wear', 'incorrect' and 'not wear'
    gender = 0 if row["gender"] == "male" else 3
    age = row["age_class"]

    # Print the class number
    return mask*6 + gender + age


In [19]:
path = '../../input/data/train/images'
idx = 0
mask_dict = {0: 'wear', 1: 'not wear', 2: 'incorrect'}
age_dict = {0: 'under 30', 1: '30 to 60', 2: 'over 60'}

for i in train_df.index:
    row = train_df.loc[i]

    
    imgs_path = os.path.join(path, row['path'])
    images = sorted([f for f in os.listdir(imgs_path) if "._" not in f])
    for img in images:
        if img[:-4] == 'incorrect_mask':
            mask = 2 # incorrect
        elif img[:-4] == 'normal':
            mask = 1 # not wear
        else:
            mask = 0 # wear

        classnum = return_class_simple(row, mask)
        

        df_train_class.loc[idx] = [row['id'], classnum, os.path.join(imgs_path, img)]
        idx += 1


In [20]:
df_valid_class = pd.DataFrame(columns = ['per_id', 'class', 'path'])
#df_train_class.set_index('id', inplace=True)

In [21]:
path = '../../input/data/train/images'
idx = 0
mask_dict = {0: 'wear', 1: 'not wear', 2: 'incorrect'}
age_dict = {0: 'under 30', 1: '30 to 60', 2: 'over 60'}

for i in valid_df.index:
    row = valid_df.loc[i]

    
    imgs_path = os.path.join(path, row['path'])
    images = sorted([f for f in os.listdir(imgs_path) if "._" not in f])
    for img in images:
        if img[:-4] == 'incorrect_mask':
            mask = 2 # incorrect
        elif img[:-4] == 'normal':
            mask = 1 # not wear
        else:
            mask = 0 # wear

        classnum = return_class_simple(row, mask)
        

        df_valid_class.loc[idx] = [row['id'], classnum, os.path.join(imgs_path, img)]
        idx += 1


In [22]:
df_test_class = pd.DataFrame(columns = ['per_id', 'class', 'path'])
#df_train_class.set_index('id', inplace=True)

In [23]:
path = '../../input/data/train/images'
idx = 0
mask_dict = {0: 'wear', 1: 'not wear', 2: 'incorrect'}
age_dict = {0: 'under 30', 1: '30 to 60', 2: 'over 60'}

for i in test_df.index:
    row = test_df.loc[i]

    
    imgs_path = os.path.join(path, row['path'])
    images = sorted([f for f in os.listdir(imgs_path) if "._" not in f])
    for img in images:
        if img[:-4] == 'incorrect_mask':
            mask = 2 # incorrect
        elif img[:-4] == 'normal':
            mask = 1 # not wear
        else:
            mask = 0 # wear

        classnum = return_class_simple(row, mask)
        

        df_test_class.loc[idx] = [row['id'], classnum, os.path.join(imgs_path, img)]
        idx += 1


In [24]:
df_train_class.to_csv("./train.csv")
df_valid_class.to_csv("./valid.csv")
df_test_class.to_csv("./test.csv")