**About** : This notebook is used to train detection models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
import os
import re
import cv2
import sys
import ast
import glob
import json
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from distutils.dir_util import copy_tree

warnings.filterwarnings("ignore", category=UserWarning)
pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from inference.yolo import *
from util.metrics import *

from post_process.similarity import extract_similarities
from post_process.ticks import restrict_on_line, assign
from post_process.in_graph import post_process_preds

### Load data

In [None]:
df = pd.read_csv('../input/df_train.csv')
df_text = pd.read_csv('../input/texts.csv')
df_target = pd.read_csv('../input/y_train.csv')
df_elt = pd.read_csv('../input/elements.csv')

In [None]:
df = df[~df['id'].isin(ANOMALIES)].reset_index(drop=True)

In [None]:
df_split = pd.read_csv('../input/df_split.csv')
df = df.merge(df_split)

In [None]:
CLASSES = [
#     "dot",
#     "line",
    "scatter",
]

df = df[df['chart-type'].isin(CLASSES)].reset_index(drop=True)

### Model

In [None]:
class ConfigMarker:
    selected_model = "yolo"
    bbox_format = "yolo"
    pred_format = "pascal_voc"

    weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v8.2/weights/best.pt"

#     weights = '../output/weights/det_1/yolov7x-w6-v8.2_weights.pt'
#     cfg = "../output/weights/det_1/yolov7x-w6-v8.2_cfg.yml"

    version = "v5"
    labels = ['chart', 'text', 'tick', 'point']
    
#     version = "v6"
#     labels = ['chart', 'text', 'tick']
    
#     version = "v11"
#     labels = ["point"]

#     size = (512, 512)
    size = (640, 640)
#     size = (1024, 1024)

    # NMS
#     conf_thresh = 0.001  # [0.1, 0.4, 0.2, 0.2]
#     iou_thresh = 0.5  # [0.5, 0.25, 0.25, 0.25]
    
    conf_thresh = [0.1, 0.4, 0.2, 0.1]
    iou_thresh = [0.5, 0.25, 0.25, 0.1]
    
#     conf_thresh = [0.1, 0.4, 0.2, 0.001]
#     iou_thresh = [0.5, 0.25, 0.25, 0.5]

    max_per_img = 500
    min_per_img = 1
    
    val_bs = 16
    device = "cuda"
    
config_marker = ConfigMarker
VERSION = config_marker.version

In [None]:
assert os.path.exists(config_marker.weights), "Weights do not exist"
model_marker = retrieve_model(config_marker)

In [None]:
# from models.yolo import Model
# from inference.yolo import YoloWrapper
# from utils.general import non_max_suppression

# def retrieve_model_robust(config):
#     model = Model(config.cfg)
#     model.load_state_dict(torch.load(config.weights), strict=True)
#     model_marker = YoloWrapper(model, config_marker).cuda()
#     model_marker.non_max_suppression = non_max_suppression
#     model_marker.eval()
    
#     return model_marker

In [None]:
# assert os.path.exists(config_marker.weights), "Weights do not exist"
# model_marker = retrieve_model_robust(config_marker)

In [None]:
# FOLDER = "../output/weights/det_1/"
# name = config_marker.weights.split('/')[-3]

# cp = torch.load(config_marker.weights)

# import yaml
# with open(FOLDER + name + '_cfg.yml', 'w') as outfile:
#     yaml.dump(cp['model'].yaml, outfile)
    
# from util.torch import save_model_weights
# torch.save(cp['model'].state_dict(), FOLDER + name + "_weights.pt")

# print('-> Saved config to', FOLDER + name + '_cfg.yml')
# print('-> Saved weight to', FOLDER + name + '_weights.pt')

### Data

In [None]:
VERSION = "v12"

In [None]:
df = pd.DataFrame({"path": glob.glob(f'../input/{VERSION}/images/*/*.jpg')})

In [None]:
df['id'] = df['path'].apply(lambda x: x[:-4].split('/')[-1])

In [None]:
df['gt_path'] = df['path'].apply(lambda x: re.sub('/images/', '/labels/', x[:-4])) + '.txt'

### Main

In [None]:
PLOT = False
DEBUG = False
SAVE = True

In [None]:
ids = np.arange(len(df))

chunk_size = 100
chunks = [ids[i: i + chunk_size] for i in range(0, len(ids), chunk_size)]

In [None]:
if SAVE:
    SAVE_FOLDER = f"../input/{VERSION}_sim/"
    os.makedirs(SAVE_FOLDER, exist_ok=True)
    os.makedirs(SAVE_FOLDER + "images/", exist_ok=True)
    os.makedirs(SAVE_FOLDER + "images/train/", exist_ok=True)
    os.makedirs(SAVE_FOLDER + "images/valid/", exist_ok=True)

    os.makedirs(SAVE_FOLDER + "labels/", exist_ok=True)
    _ = copy_tree(f"../input/{VERSION}/labels/train", f"../input/{VERSION}_sim/labels/train")
    _ = copy_tree(f"../input/{VERSION}/labels/valid", f"../input/{VERSION}_sim/labels/valid")

In [None]:
if SAVE:
    # Dump config file
    cfg = yaml.load(open(f'../yolov7/data_{VERSION[1:]}.yaml', 'r'), Loader=yaml.loader.SafeLoader)

    data_yaml = dict(
        train=SAVE_FOLDER + "images/train/",
        val=SAVE_FOLDER + "images/valid/",
        nc=cfg['nc'],
        names=cfg['names'],
    )

    print(data_yaml)

    with open(f'../yolov7/data_{VERSION[1:]}_sim.yaml', 'w') as outfile:
        yaml.dump(data_yaml, outfile, default_flow_style=True)

In [None]:
%matplotlib inline

In [None]:
for i, chunk in tqdm(enumerate(chunks), total=len(chunks)):
    
    df_val = df.iloc[chunk].reset_index(drop=True)

    transforms = get_transfos(size=config_marker.size)
    dataset = InferenceDataset(df_val, transforms)
    
    meter_marker, fts = predict(model_marker, dataset, config_marker, extract_fts=True)
    
    dataset = InferenceDataset(df_val, None)
    
    scores = []
    for idx in range(len(dataset)):
        img, gt, shape = dataset[idx]

        preds = meter_marker.preds[idx]
        preds.update_shape(shape)

        preds = [
            preds['pascal_voc'][meter_marker.labels[idx] == i]
            for i in range(len(config_marker.labels))
        ]
        preds = post_process_preds(preds)

#     #     break
#         if DEBUG:
#             plot_results(img, preds, figsize=(12, 7), title=title)
#     #     break
        try:
            sim_img = extract_similarities(fts, idx, preds, img, verbose=0)
        except Exception:
            print('Error extracting similarities')
            sim_img = np.zeros(img.shape)

        img_final = np.concatenate([
            img.mean(-1, keepdims=True),
            (1 - sim_img[:, :, :2]) * 255
        ],-1).astype(np.uint8)
        
        if SAVE:
            cv2.imwrite(
                re.sub(f'/{VERSION}/', f'/{VERSION}_sim/', df_val['path'][idx]),
                img_final
            )

        if PLOT or DEBUG:
            plt.figure(figsize=(15, 7))
            plt.subplot(1, 2, 1)
            plt.imshow(sim_img)
            plt.axis(False)
            plt.subplot(1, 2, 2)
            plt.imshow(img)
            plt.axis(False)
            plt.show()
            
            plt.figure(figsize=(15, 7))
            plt.imshow(img_final)
            plt.show()

        if DEBUG:
            break
    if DEBUG:
        break

Done ! 