## Make Dataset

In [1]:
# # 이미지 Crop 후 저장하기
# import glob
# import os
# from tqdm.auto import tqdm
# import cv2

# data_dir = '/custom_dataset/'
# number = 0
# img_path = os.path.join(os.getcwd()+data_dir, 'train')
# img_folder = os.path.join(os.getcwd()+data_dir)
# for image in tqdm(glob.glob(img_path+'/*.png')):
#     original_image = cv2.imread(image)
#     image_name = image.split('\\')[-1].split('.')[0]
#     txt_path = os.path.join(img_path, image_name + '.txt')
#     tree = open(txt_path)
#     for line in tree.readlines():
#         scores = line.split(' ')
#         class_id = int(float(scores[0]))
#         x1, y1, x2, y2 = int(scores[1]), int(scores[2]), int(scores[5]), int(scores[6])
#         crop_img = original_image[y1:y2 , x1:x2]
#         crop_img = crop_img[(y2 - y1)//2:-1, :]
#         try:
#             if not os.path.exists(os.path.join(img_folder, 'croped_imgs', str(class_id))):
#                 os.makedirs(os.path.join(img_folder, 'croped_imgs', str(class_id)))
#         except OSError:
#             print("Error: Failed to create the directory.")
#         save_path = os.path.join(img_folder, 'croped_imgs', str(class_id), str(number).zfill(6)+'.png')
#         cv2.imwrite(save_path, crop_img)
#         number += 1

## Import

In [2]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2
import timm
import torch
import torch.nn as nn
import torch.optim as optim

import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [4]:
CFG = {
    'IMG_SIZE':640,
    'EPOCHS':5,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':32,
    'SEED':41
}

## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [6]:
all_img_list = glob.glob('./custom_dataset/croped_imgs/*/*')

In [7]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('\\')[1])

In [8]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.1, stratify=df['label'], random_state=CFG['SEED'])

## CustomDataset

In [9]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image)
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, int(label)
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [10]:
mean = [0.485, 0.456, 0.406] 
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([transforms.ToTensor(), 
                                    transforms.Resize((224,224)),
                                    transforms.Normalize(mean, std)])

In [11]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, train_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [12]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=34):
        super(BaseModel, self).__init__()
        self.backbone = models.maxvit_t(pretrained=True)
        # self.backbone = timm.create_model('densenet201', pretrained=True, num_classes = 1000)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

## Train

In [13]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        running_corrects = 0
        total = 0
        
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = torch.LongTensor(labels).to(device)      # ADDED .type(torch.LongTensor)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            _, preds = torch.max(output, 1)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            running_corrects += torch.sum(preds == labels.data)
            train_loss.append(loss.item())
            total += preds.size(0)   
                 
        _val_loss = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        epoch_acc = 100 * (running_corrects.double() / total)
        
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Train Acc : [{epoch_acc:.3f} %]')
       
        if scheduler is not None:
            scheduler.step(_val_loss)
            
        if best_score < _val_loss:
            best_score = _val_loss
            best_model = model
    
    return best_model

In [14]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = torch.LongTensor(labels).to(device)        # ADDED .type(torch.LongTensor)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
    
    return _val_loss

## Run!!

In [15]:
import timm
model_names = timm.list_models(pretrained=True)
print(model_names)

['adv_inception_v3', 'bat_resnext26ts', 'beit_base_patch16_224', 'beit_base_patch16_224_in22k', 'beit_base_patch16_384', 'beit_large_patch16_224', 'beit_large_patch16_224_in22k', 'beit_large_patch16_384', 'beit_large_patch16_512', 'beitv2_base_patch16_224', 'beitv2_base_patch16_224_in22k', 'beitv2_large_patch16_224', 'beitv2_large_patch16_224_in22k', 'botnet26t_256', 'cait_m36_384', 'cait_m48_448', 'cait_s24_224', 'cait_s24_384', 'cait_s36_384', 'cait_xs24_384', 'cait_xxs24_224', 'cait_xxs24_384', 'cait_xxs36_224', 'cait_xxs36_384', 'coat_lite_mini', 'coat_lite_small', 'coat_lite_tiny', 'coat_mini', 'coat_tiny', 'coatnet_0_rw_224', 'coatnet_1_rw_224', 'coatnet_bn_0_rw_224', 'coatnet_nano_rw_224', 'coatnet_rmlp_1_rw_224', 'coatnet_rmlp_nano_rw_224', 'convit_base', 'convit_small', 'convit_tiny', 'convmixer_768_32', 'convmixer_1024_20_ks9_p14', 'convmixer_1536_20', 'convnext_atto', 'convnext_atto_ols', 'convnext_base', 'convnext_base_384_in22ft1k', 'convnext_base_in22ft1k', 'convnext_base

In [16]:
# model = BaseModel()
model = timm.create_model('resnext101_64x4d', pretrained=True, num_classes=34)
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

torch.save(infer_model.state_dict(), './CNN_checkpoint.pth')

  0%|          | 0/479 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.20110] Val Loss : [0.02697] Train Acc : [96.046 %]


  0%|          | 0/479 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.01015] Val Loss : [0.00070] Train Acc : [99.817 %]


  0%|          | 0/479 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.00287] Val Loss : [0.00002] Train Acc : [99.935 %]


  0%|          | 0/479 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00010] Val Loss : [0.00001] Train Acc : [100.000 %]
Epoch 00004: reducing learning rate of group 0 to 5.0000e-05.


  0%|          | 0/479 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.00006] Val Loss : [0.00001] Train Acc : [100.000 %]


In [None]:
# infer_model = timm.create_model('tf_efficientnet_b0_ns', pretrained=True, num_classes=34)
# infer_model.load_state_dict(torch.load('./CNN_checkpoint.pth'))
# infer_model.cuda()

## Inference

In [17]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader,Dataset
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
softmax = nn.Softmax(dim=-1)
def pre_image(image_path, model, bbox):
   img = cv2.imread(image_path)
   x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
   img = img[y1:y2 , x1:x2, :]
   # img = img[(y2 - y1)//2:-1 , :, :]
   mean = [0.485, 0.456, 0.406] 
   std = [0.229, 0.224, 0.225]
   transform_norm = transforms.Compose([transforms.ToTensor(), 
   transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),transforms.Normalize(mean, std)])
   # get normalized image
   img_normalized = transform_norm(img).float()
   img_normalized = img_normalized.unsqueeze_(0)
   # input = Variable(image_tensor)
   img_normalized = img_normalized.to(device)
   # print(img_normalized.shape)
   with torch.no_grad():
      model.eval()  
      output = model(img_normalized)
      _, index = torch.max(output.data, 1)
      score = softmax(output.data.squeeze(0))[index]
   return index.cpu().item(), score.cpu().item()

In [18]:
classes = ["chevrolet_malibu_sedan_2012_2016", "chevrolet_malibu_sedan_2017_2019", 
               "chevrolet_spark_hatchback_2016_2021", "chevrolet_trailblazer_suv_2021_", 
               "chevrolet_trax_suv_2017_2019", "genesis_g80_sedan_2016_2020", 
               "genesis_g80_sedan_2021_", "genesis_gv80_suv_2020_", 
               "hyundai_avante_sedan_2011_2015", "hyundai_avante_sedan_2020_", 
               "hyundai_grandeur_sedan_2011_2016", "hyundai_grandstarex_van_2018_2020", 
               "hyundai_ioniq_hatchback_2016_2019", "hyundai_sonata_sedan_2004_2009", 
               "hyundai_sonata_sedan_2010_2014", "hyundai_sonata_sedan_2019_2020", 
               "kia_carnival_van_2015_2020", "kia_carnival_van_2021_", 
               "kia_k5_sedan_2010_2015", "kia_k5_sedan_2020_", 
               "kia_k7_sedan_2016_2020", "kia_mohave_suv_2020_", 
               "kia_morning_hatchback_2004_2010", "kia_morning_hatchback_2011_2016", 
               "kia_ray_hatchback_2012_2017", "kia_sorrento_suv_2015_2019", 
               "kia_sorrento_suv_2020_", "kia_soul_suv_2014_2018", 
               "kia_sportage_suv_2016_2020", "kia_stonic_suv_2017_2019", 
               "renault_sm3_sedan_2015_2018", "renault_xm3_suv_2020_", 
               "ssangyong_korando_suv_2019_2020", "ssangyong_tivoli_suv_2016_2020"]

In [None]:
# with torch.no_grad():
#     image_path = './custom_dataset/croped_imgs/32/016981.png'
#     index , score = pre_image(image_path, infer_model, None)
#     print(index, score)

In [19]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import cv2
import torch
import glob
from mmdet.apis import init_detector, inference_detector

results = pd.read_csv('./custom_dataset/sample_submission.csv') 
img_path = './custom_dataset/test'
# Specify the path to model config and checkpoint file
config_file = 'configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py'
checkpoint_file = 'checkpoints/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth'

# build the model from a config file and a checkpoint file
model = init_detector(config_file, checkpoint_file, device='cuda:0')
infer_model.eval()

for img in tqdm(glob.glob(img_path+'/*.png')):
    filename = img.split('/')[-1].split('\\')[-1]
    result = inference_detector(model, img)
    bboxes = result[2]
    for bbox in bboxes:
        if bbox[4] < 0.5: continue
        x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2],bbox[3]
        class_id, score = pre_image(img, infer_model, [x1, y1, x2, y2])
        results = results.append({
            "file_name" : filename,               
            "class_id" : class_id,                                                        
            "confidence" : score,                     
            "point1_x" : x1, "point1_y" : y1,
            "point2_x" : x2, "point2_y" : y1,
            "point3_x" : x2, "point3_y" : y2,
            "point4_x" : x1, "point4_y" : y2,
        }, ignore_index = True)
        image = cv2.imread(img)
        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        label = "{}".format(classes[class_id]) + ' ' + str(score)
        print(label)
        cv2.imshow("Cars", image)
        cv2.waitKey()
        cv2.destroyAllWindows()
results.to_csv('submit.csv', index = False)
print(len(results))

load checkpoint from local path: checkpoints/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth


  0%|          | 0/3400 [00:00<?, ?it/s]

kia_k5_sedan_2020_ 0.06156598776578903
kia_sportage_suv_2016_2020 0.04622223600745201
kia_morning_hatchback_2004_2010 0.07954694330692291
genesis_g80_sedan_2021_ 0.05211656913161278
kia_morning_hatchback_2004_2010 0.0732710212469101
genesis_g80_sedan_2021_ 0.04432202875614166
kia_morning_hatchback_2004_2010 0.08088451623916626
genesis_g80_sedan_2021_ 0.04532833397388458
genesis_g80_sedan_2021_ 0.04433555528521538
genesis_g80_sedan_2021_ 0.06349968165159225
genesis_g80_sedan_2021_ 0.05634152144193649
genesis_g80_sedan_2021_ 0.04603038355708122
genesis_g80_sedan_2021_ 0.05924428626894951
genesis_g80_sedan_2021_ 0.05166201665997505
genesis_g80_sedan_2021_ 0.04949759319424629
genesis_g80_sedan_2021_ 0.04452462121844292
genesis_g80_sedan_2021_ 0.05399024114012718
kia_sportage_suv_2016_2020 0.06806442886590958
genesis_g80_sedan_2021_ 0.05741387978196144
genesis_g80_sedan_2021_ 0.085752472281456
genesis_g80_sedan_2021_ 0.05464678630232811


KeyboardInterrupt: 

In [None]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import os
import cv2
from torchvision.models import resnet50, ResNet50_Weights
import torchvision.models as models
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
from torchvision.io.image import read_image
import glob

results = pd.read_csv('./custom_dataset/sample_submission.csv') 
pred = pd.read_csv('Results/Val_1%_Mixup_distortion_24.csv')
img_path = './custom_dataset/test'

# Step 1: Initialize model with the best available weights
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
model.eval()
model.cuda()
# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

for image in tqdm(glob.glob(img_path+'/*.png')):
    
    img = read_image(image)
    img = img[0:3, :, :]
    
    # Step 3: Apply inference preprocessing transforms
    batch = torch.cuda.FloatTensor(preprocess(img).cuda().unsqueeze(0))
    
    filename = image.split('/')[-1].split('\\')[-1]  
    iter_preds = pred.loc[pred['file_name'] == filename]
    # Step 4: Use the model and visualize the prediction
    prediction = model(batch)[0]
    labels = [weights.meta["categories"][i] for i in prediction["labels"]]
    bboxes = prediction["boxes"]
    
    for i, label in enumerate(labels):
        if label == 'car':
            bbox = bboxes[i].detach().cpu().numpy()
            X1, Y1, X2, Y2 = bbox[0], bbox[1], bbox[2],bbox[3]
            # box = draw_bounding_boxes(img, boxes=prediction["boxes"],
            #                     labels=labels,
            #                     colors="red",
            #                     width=4, font_size=30)
            # im = to_pil_image(box.detach())
            # im.show()
            for iter in iter_preds.values:
                x1, y1, x2, y2 = iter[3], iter[4], iter[7], iter[8]
                if x1 <= X1 + 100 and x1 >= X1 - 100 and y1 <= Y1 + 100 and y1 >= Y1 - 100 and x2 <= X2 + 100 and x2 >= X2 - 100 and y2 <= Y2 + 100 and y2 >= Y2 - 100:
                    results = results.append({
                    "file_name" : iter[0],
                    "class_id" : iter[1],
                    "confidence" : iter[2],
                    "point1_x" : x1, "point1_y" : y1,
                    "point2_x" : x2, "point2_y" : y1,
                    "point3_x" : x2, "point3_y" : y2,
                    "point4_x" : x1, "point4_y" : y2,
                }, ignore_index = True)
                # else:
                #     img = cv2.imread(image)
                #     cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                #     cv2.imshow("Cars", img)
                #     cv2.waitKey()
                #     cv2.destroyAllWindows()

print(len(results))
results.to_csv('Stacked_del_submit.csv', index = False)

In [None]:
from mmdet.apis import init_detector, inference_detector
import mmcv
from tqdm.auto import tqdm
import glob
import pandas as pd

# Specify the path to model config and checkpoint file
config_file = 'configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py'
checkpoint_file = 'checkpoints/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth'
results = pd.read_csv('./custom_dataset/sample_submission.csv') 
pred = pd.read_csv('del_submit.csv')
img_path = './custom_dataset/test'

# build the model from a config file and a checkpoint file
model = init_detector(config_file, checkpoint_file, device='cuda:0')

for img in tqdm(glob.glob(img_path+'/*.png')):
    filename = img.split('/')[-1].split('\\')[-1]  
    iter_preds = pred.loc[pred['file_name'] == filename]
    result = inference_detector(model, img)
    bboxes = result[2]
    for bbox in bboxes:
        if bbox[4] < 0.5: continue
        X1, Y1, X2, Y2 = bbox[0], bbox[1], bbox[2],bbox[3]
        for iter in iter_preds.values:
            x1, y1, x2, y2 = iter[3], iter[4], iter[7], iter[8]
            if x1 <= X1 + 100 and x1 >= X1 - 100 and y1 <= Y1 + 100 and y1 >= Y1 - 100 and x2 <= X2 + 100 and x2 >= X2 - 100 and y2 <= Y2 + 100 and y2 >= Y2 - 100:
                results = results.append({
                "file_name" : iter[0],
                "class_id" : iter[1],
                "confidence" : iter[2],
                "point1_x" : x1, "point1_y" : y1,
                "point2_x" : x2, "point2_y" : y1,
                "point3_x" : x2, "point3_y" : y2,
                "point4_x" : x1, "point4_y" : y2,
            }, ignore_index = True)

print(len(results))
results.to_csv('Stacked_del_submit.csv', index = False) # 9138

In [None]:
import os
import cv2
import pandas as pd
from tqdm.auto import tqdm
pred = pd.read_csv('Stacked_del_submit.csv')
img_path = './custom_dataset/test'
classes = ["chevrolet_malibu_sedan_2012_2016", "chevrolet_malibu_sedan_2017_2019", 
               "chevrolet_spark_hatchback_2016_2021", "chevrolet_trailblazer_suv_2021_", 
               "chevrolet_trax_suv_2017_2019", "genesis_g80_sedan_2016_2020", 
               "genesis_g80_sedan_2021_", "genesis_gv80_suv_2020_", 
               "hyundai_avante_sedan_2011_2015", "hyundai_avante_sedan_2020_", 
               "hyundai_grandeur_sedan_2011_2016", "hyundai_grandstarex_van_2018_2020", 
               "hyundai_ioniq_hatchback_2016_2019", "hyundai_sonata_sedan_2004_2009", 
               "hyundai_sonata_sedan_2010_2014", "hyundai_sonata_sedan_2019_2020", 
               "kia_carnival_van_2015_2020", "kia_carnival_van_2021_", 
               "kia_k5_sedan_2010_2015", "kia_k5_sedan_2020_", 
               "kia_k7_sedan_2016_2020", "kia_mohave_suv_2020_", 
               "kia_morning_hatchback_2004_2010", "kia_morning_hatchback_2011_2016", 
               "kia_ray_hatchback_2012_2017", "kia_sorrento_suv_2015_2019", 
               "kia_sorrento_suv_2020_", "kia_soul_suv_2014_2018", 
               "kia_sportage_suv_2016_2020", "kia_stonic_suv_2017_2019", 
               "renault_sm3_sedan_2015_2018", "renault_xm3_suv_2020_", 
               "ssangyong_korando_suv_2019_2020", "ssangyong_tivoli_suv_2016_2020"]

for i in tqdm(range(len(pred))):
    scores = pred.iloc[i, :]
    label = "{}".format(classes[scores[1]]) + ' ' + str(scores[2])
    image_path = os.getcwd()+'/custom_dataset/test/'+scores[0]
    image = cv2.imread(image_path)
    cv2.rectangle(image, (int(scores[3]), int(scores[4])), (int(scores[7]), int(scores[8])), (0, 255, 0), 2)
    cv2.putText(image, label, (int(scores[3]), int(scores[4])-4), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        2 * 0.75, (0, 255, 0), 2, lineType=cv2.LINE_AA)
    cv2.imshow("Cars", image)
    cv2.waitKey()
    cv2.destroyAllWindows()