## Import

In [5]:
# !pip3 install albumentations ;!pip3 install opencv-python ; !pip3 install tqdm

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

# torch.multiprocessing import
from torch import multiprocessing

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 경로지정
import os
os.chdir('../DATA')
os.getcwd()

'c:\\Users\\DoSungjin\\Documents\\GitHub\\Dacon_papering_classification\\DATA'

In [3]:
import torch
# Set the device to CPU or GPU depending on availability
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
torch.cuda.is_available()

True

## Hyperparameter Setting

In [4]:
# hyperparameter
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':25,
    'LEARNING_RATE':0.0001,
    'BATCH_SIZE': 1,
    'SEED':42
}

## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [6]:
os.getcwd()

'c:\\Users\\DoSungjin\\Documents\\GitHub\\Dacon_papering_classification\\DATA'

In [7]:
import os

# 데이터셋 디렉토리 경로
dataset_dir = "new_train"

# 모든 이미지 파일 경로 리스트
all_img_list = []
folder_list = []
train_file_list = os.listdir(dataset_dir)
for item in train_file_list:
    item_path = os.path.join(dataset_dir, item)
    for file in os.listdir(item_path):
        all_img_list.append(os.path.join(item_path, file))
        folder_list.append(item)

In [8]:
# # folder name list
ori_names = os.listdir('ori_train')

new_names = ['furniture_repair', 'cleaning_mop_holder_repair', 'mold', 'twist', 'rust_contamination', 'wobbling', 'fabric_defect', 'molding_repair', 'stain', 'plaster_repair', 'pollution', 'typographical_error', 'crying', 'defective_joint', 'windowdoor_frame_repair', 'cracking', 'excessive_gap', 'piece', 'damage']

# print(len(ori_names), len(new_names))
# os.chdir('new_train')
# # # folder rename
# for ori_name, new_name in zip(ori_names, new_names):
#     os.rename(ori_name, new_name)

# print(os.listdir())
# os.chdir('../')

In [9]:
for a,b in zip(ori_names, new_names):
    len_a = len(os.listdir(f'ori_train/{a}'))
    len_b = len(os.listdir(f'new_train/{b}'))
    if len_a == len_b:
        print('go on')
    else: 
        print('stop')

go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on
go on


In [10]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('\\')[1])

In [11]:
df

Unnamed: 0,img_path,label
0,new_train\cleaning_mop_holder_repair\0.png,cleaning_mop_holder_repair
1,new_train\cleaning_mop_holder_repair\1.png,cleaning_mop_holder_repair
2,new_train\cleaning_mop_holder_repair\10.png,cleaning_mop_holder_repair
3,new_train\cleaning_mop_holder_repair\100.png,cleaning_mop_holder_repair
4,new_train\cleaning_mop_holder_repair\101.png,cleaning_mop_holder_repair
...,...,...
3452,new_train\wobbling\53.png,wobbling
3453,new_train\wobbling\6.png,wobbling
3454,new_train\wobbling\7.png,wobbling
3455,new_train\wobbling\8.png,wobbling


In [12]:
from sklearn.model_selection import train_test_split

train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, stratify=df['label'], random_state=CFG['SEED'])


## Label-Encoding

In [13]:
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'])
val['label'] = le.transform(val['label'])
train

Unnamed: 0,img_path,label
1182,new_train\damage\356.png,3
2455,new_train\pollution\137.png,12
3226,new_train\twist\92.png,15
2742,new_train\pollution\396.png,12
2134,new_train\mold\63.png,8
...,...,...
112,new_train\cleaning_mop_holder_repair\2.png,0
315,new_train\cracking\105.png,1
1932,new_train\fabric_defect\21.png,6
1089,new_train\damage\272.png,3


## CustomDataset

In [14]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [15]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

train_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
    A.HorizontalFlip(p=0.5),  # Flip the image horizontally with probability 0.5
    A.Rotate(limit=10, p=0.5),  # Rotate the image by -10 to 10 degrees with probability 0.5
    A.RandomBrightnessContrast(p=0.2),  # Add random brightness and contrast with probability 0.2
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])


In [16]:
train

Unnamed: 0,img_path,label
1182,new_train\damage\356.png,3
2455,new_train\pollution\137.png,12
3226,new_train\twist\92.png,15
2742,new_train\pollution\396.png,12
2134,new_train\mold\63.png,8
...,...,...
112,new_train\cleaning_mop_holder_repair\2.png,0
315,new_train\cracking\105.png,1
1932,new_train\fabric_defect\21.png,6
1089,new_train\damage\272.png,3


In [17]:
train['img_path'].values

array(['new_train\\damage\\356.png', 'new_train\\pollution\\137.png',
       'new_train\\twist\\92.png', ...,
       'new_train\\fabric_defect\\21.png', 'new_train\\damage\\272.png',
       'new_train\\crying\\5.png'], dtype=object)

In [18]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [19]:
#Creating a class for extracting embeddings via Patches of images
class ImagePatcher(nn.Module):
    def __init__(self,input_channels=3, patch_size=16, embed_dim=768):
        super().__init__()
        self.patch_size = patch_size
        # creating the embedding layers
        self.img_cropper_layer = nn.Conv2d(in_channels=input_channels, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size, padding=0)
        self.linear_layer = nn.Flatten(start_dim=2, end_dim=3)

    def forward(self,x):
        image_res = x.shape[-1]
        assert image_res% self.patch_size==0 #divisible  by 16

        cropped_img_features = self.img_cropper_layer(x)
        flattened_img_features = self.linear_layer(cropped_img_features)
        flattened_img_features = flattened_img_features.permute(0,2,1)# as per pytorch

        return flattened_img_features

In [20]:
encoder_layer = nn.TransformerEncoderLayer(d_model=768,nhead=12, dim_feedforward=2048, dropout=0.1, activation='gelu', batch_first=True, norm_first=True)
encoder = nn.TransformerEncoder(encoder_layer=encoder_layer,num_layers=12)

In [21]:
class ViT(nn.Module):
    def __init__(self, img_size=224, num_channel=3, patch_size=16, embed_dim=768, p=0.1, num_layers=12,num_heads=12, hidden_dim=2048, num_classes=18):
        super().__init__()
        assert img_size%patch_size==0
        #embedding
        self.cropping_layer = ImagePatcher(input_channels=num_channel,patch_size=patch_size, embed_dim=embed_dim)

        # class token - *
        self.classtoken = nn.Parameter(torch.randn(1,1,embed_dim), requires_grad=True)

        #positional embedding for each
        num_patches = (img_size*img_size) //patch_size**2

        self.positional_embedding = nn.Parameter(torch.randn(1,num_patches+1, embed_dim))

        #dropout
        self.dropout = nn.Dropout(p=0.1) 

        #encoder 
        self.encoders = nn.TransformerEncoder(encoder_layer= nn.TransformerEncoderLayer(d_model=768,nhead=12, dim_feedforward=2048, dropout=0.1, activation='gelu', batch_first=True, norm_first=True), num_layers=num_layers)

        #mlp
        self.mlp = nn.Sequential(nn.LayerNorm(normalized_shape=embed_dim),
                                 nn.Linear(in_features=embed_dim,out_features=num_classes))
        
    
    def forward(self,x):
        batch_size = x.shape[0]
        x = self.cropping_layer(x)
        cls_token = self.classtoken.expand(batch_size,-1,-1)
        x = torch.cat((cls_token,x), dim=1)
        x = self.positional_embedding + x #similar to NLP [CLS] [0][f1] [1][f2]
        x = self.dropout(x)
        x = self.encoders(x)
        x = self.mlp(x[:,0])

        return x


## Train

In [26]:
def fit(model, loss_criterion, optimizer, epochs=25):

    history = []
    best_acc = 0.0

    for epoch in range(epochs):
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for i, (inputs, labels) in enumerate(train_loader):

            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Clean existing gradients
            optimizer.zero_grad()
            
            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)
            
            # Compute loss
            loss = loss_criterion(outputs, labels)
            
            # Backpropagate the gradients
            loss.backward()
            
            # Update the parameters
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)
            
            # Compute the accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
            
            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            
            # Compute total accuracy in the whole batch and add to train_acc
            train_acc += acc.item() * inputs.size(0)
            

            
        # Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            # Validation loop
            for j, (inputs, labels) in enumerate(val_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs)

                # Compute loss
                loss = loss_criterion(outputs, labels)

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))

                # Convert correct_counts to float and then compute the mean
                acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                valid_acc += acc.item() * inputs.size(0)

            
        # Find average training loss and training accuracy
        avg_train_loss = train_loss/len(train) 
        avg_train_acc = train_acc/len(train) 

        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss/len(val)  
        avg_valid_acc = valid_acc/len(val) 

        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])
                
    
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%".format(epoch+1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100))
        
            
    return model, history

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
model,history = fit(ViT, criterion,optimizer)

Epoch: 1/25


TypeError: train() missing 1 required positional argument: 'self'

In [22]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.long).to(device)
        
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val Weighted F1 Score : [{_val_score:.5f}]')
       
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_score < _val_score:
            best_score = _val_score
            best_model = model
    
    return best_model

In [23]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.type(torch.long).to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='weighted')
    
    return _val_loss, _val_score

## Run!!

In [24]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [25]:
model = ViT(num_classes=len(df.columns)).to(device)
model.eval()
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/2765 [00:03<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered

## Inference

In [43]:
test = pd.read_csv('test.csv')
test

Unnamed: 0,id,img_path
0,TEST_000,./test/000.png
1,TEST_001,./test/001.png
2,TEST_002,./test/002.png
3,TEST_003,./test/003.png
4,TEST_004,./test/004.png
...,...,...
787,TEST_787,./test/787.png
788,TEST_788,./test/788.png
789,TEST_789,./test/789.png
790,TEST_790,./test/790.png


In [44]:
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [45]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            pred = model(imgs)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
    
    preds = le.inverse_transform(preds)
    return preds

In [46]:
preds = inference(infer_model, test_loader, device)

NameError: name 'infer_model' is not defined

## Submission

In [None]:
submit = pd.read_csv('sample_submission.csv')

In [None]:
submit['label'] = preds

In [None]:
submit

Unnamed: 0,id,label
0,TEST_000,damage
1,TEST_001,pollution
2,TEST_002,damage
3,TEST_003,molding_repair
4,TEST_004,pollution
...,...,...
787,TEST_787,damage
788,TEST_788,damage
789,TEST_789,pollution
790,TEST_790,pollution


In [None]:
# result folder 생성
from datetime import datetime, timezone, timedelta

kst = timezone(timedelta(hours=9))
train_serial =  datetime.now(kst).strftime('%Y%m%d_%H%M%S')

Record_path = os.path.join('result', train_serial)

os.makedirs(Record_path, exist_ok=True)




In [None]:
# Dictionary to map old column names to new column names
name_map = dict(zip(new_names, ori_names))
name_map


# Rename columns using the rename() method
submit_test = submit.replace(name_map)
submit_test

Unnamed: 0,id,label
0,TEST_000,훼손
1,TEST_001,오염
2,TEST_002,훼손
3,TEST_003,몰딩수정
4,TEST_004,오염
...,...,...
787,TEST_787,훼손
788,TEST_788,훼손
789,TEST_789,오염
790,TEST_790,오염


In [None]:
submit_test.to_csv(os.path.join(Record_path,'submission.csv'), index=False)

In [None]:
submit_test

Unnamed: 0,id,label
0,TEST_000,훼손
1,TEST_001,오염
2,TEST_002,훼손
3,TEST_003,몰딩수정
4,TEST_004,오염
...,...,...
787,TEST_787,훼손
788,TEST_788,훼손
789,TEST_789,오염
790,TEST_790,오염


In [None]:
# model information 저장
import json
model_info = {'CFG' : CFG, 'model' : str(model), 'optimizer' : str(optimizer), 'scheduler' : str(scheduler), 'train_serial' : train_serial}
with open(os.path.join(Record_path,'model_info.json'), 'w') as f:
    json.dump(model_info, f)