**About** : This notebook is used to train detection models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
import os
import cv2
import sys
import ast
import glob
import json
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

warnings.filterwarnings("ignore", category=UserWarning)
pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from inference.yolo import *
from util.metrics import *

from post_process.retrieve import retrieve_missing_boxes
from post_process.reg import rounding, linear_regression
from post_process.ticks import restrict_on_line, assign
from post_process.in_graph import post_process_preds

In [None]:
VERSION = "v3"

### Load data

In [None]:
df = pd.read_csv('../input/df_train.csv')
df_text = pd.read_csv('../input/texts.csv')
df_target = pd.read_csv('../input/y_train.csv')
df_elt = pd.read_csv('../input/elements.csv')

In [None]:
df = df[~df['id'].isin(ANOMALIES)].reset_index(drop=True)

In [None]:
df_split = pd.read_csv('../input/df_split.csv')
df = df.merge(df_split)

In [None]:
# if VERSION == "v2":
CLASSES = [
    "dot",
    "line",
    "scatter",
]

df = df[df['chart-type'].isin(CLASSES)].reset_index(drop=True)

### Model

In [None]:
class Config:
    selected_model = "yolo"
    bbox_format = "yolo"
    pred_format = "pascal_voc"

#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v2.5/weights/best.pt"
    weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v4./weights/last.pt"

#     size = (512, 512)
    size = (640, 640)

    # NMS
    conf_thresh = [0.1, 0.4, 0.2, 0.5]  # todo : per class
    max_per_img = 500
    min_per_img = 0
    iou_thresh = [0.5, 0.25, 0.25, 0.75]

    val_bs = 16
    device = "cuda"

In [None]:
model = retrieve_model(Config)

### Evaluate

In [None]:
chart_types = [
#     "dot",
#     "line",
#     "vertical_bar",
#     "horizontal_bar",
    "scatter",
]

if VERSION == "v1":
    classes = ["x_text", "y_text", "x_tick", "y_tick", "point", "bar"]
else:
    classes = ['chart', 'text', 'tick', 'point']

In [None]:
df_val = df[df['split'] == "val"].reset_index(drop=True)
df_val['path'] = f'../input/{VERSION}/images/valid/' + df_val['id'] + '.jpg'
df_val['gt_path'] = f'../input/{VERSION}/labels/valid/' + df_val['id'] + '.txt'
df_val_ = df_val.copy()

for t in chart_types:
    print(f'\n-> Chart type : {t}\n')
    df_val = df_val_[df_val_['chart-type'] == t].reset_index(drop=True)

    transforms = get_transfos(size=Config.size)
    dataset = InferenceDataset(df_val, transforms)
    
    meter = predict(model, dataset, Config)
    for i, p in enumerate(meter.preds):
        p.update_shape((df_val['img_h'][i], df_val['img_w'][i]))

    scores = {c: [] for c in classes}
    for idx in tqdm(range(len(dataset))):
        img, gt, shape = dataset[idx]

        gt = Boxes(gt, (shape[0], shape[1]), bbox_format="yolo")['pascal_voc']
        gt = [gt[dataset.classes[idx] == i] for i in range(len(classes))]
        preds = [meter.preds[idx]['pascal_voc'][meter.labels[idx] == i] for i in range(len(classes))]
        
        preds = post_process_preds(preds)

        for i, (t, p) in enumerate(zip(gt, preds)):
            metrics = compute_metrics(p, t)
            scores[classes[i]].append(metrics['f1_score'])
    #         print(classes[i], metrics['f1_score'])
    #     print()
    #     if idx == 1:
    #         break
    for k, v in scores.items():
        print(f'{k} \t Avg F1: {np.mean(v):.3f}  \t Avg F1==1: {np.mean(np.array(v) == 1):.3f}')
#         break
#     break

In [None]:
df_val = df[df['split'] == "val"].reset_index(drop=True)
df_val['path'] = f'../input/{VERSION}/images/valid/' + df_val['id'] + '.jpg'
df_val['gt_path'] = f'../input/{VERSION}/labels/valid/' + df_val['id'] + '.txt'
df_val_ = df_val.copy()

for t in chart_types:
    print(f'\n-> Chart type : {t}\n')
    df_val = df_val_[df_val_['chart-type'] == t].reset_index(drop=True)

    transforms = get_transfos(size=Config.size)
    dataset = InferenceDataset(df_val, transforms)
    
    meter = predict(model, dataset, Config)
    for i, p in enumerate(meter.preds):
        p.update_shape((df_val['img_h'][i], df_val['img_w'][i]))

    scores = {c: [] for c in classes}
    for idx in tqdm(range(len(dataset))):
        img, gt, shape = dataset[idx]

        gt = Boxes(gt, (shape[0], shape[1]), bbox_format="yolo")['pascal_voc']
        gt = [gt[dataset.classes[idx] == i] for i in range(len(classes))]
        preds = [meter.preds[idx]['pascal_voc'][meter.labels[idx] == i] for i in range(len(classes))]
        
        preds = post_process_preds(preds)

        for i, (t, p) in enumerate(zip(gt, preds)):
            metrics = compute_metrics(p, t)
            scores[classes[i]].append(metrics['f1_score'])
    #         print(classes[i], metrics['f1_score'])
    #     print()
    #     if idx == 1:
    #         break
    for k, v in scores.items():
        print(f'{k} \t Avg F1: {np.mean(v):.3f}  \t Avg F1==1: {np.mean(np.array(v) == 1):.3f}')
#         break
#     break

### Predict
- IoU per class
- merge xticks and yticks (/labels)
- train without bars

In [None]:
df_val = df[df['split'] == "val"].reset_index(drop=True)
df_val['path'] = '../input/v1/images/valid/' + df_val['id'] + '.jpg'
df_val['gt_path'] = '../input/v1/labels/valid/' + df_val['id'] + '.txt'

In [None]:
TYPES = [
#     "dot",
#     "line",
#     "vertical_bar",
#     "horizontal_bar",
    "scatter",
]

df_val = df_val[df_val['chart-type'].isin(TYPES)].reset_index(drop=True)
# df_val = df_val[df_val['source'] == "extracted"].reset_index(drop=True)

In [None]:
transforms = get_transfos(size=Config.size)
dataset = InferenceDataset(df_val, transforms)

In [None]:
%%time
meter = predict(model, dataset, Config)

for i, p in enumerate(meter.preds):
    p.update_shape((df_val['img_h'][i], df_val['img_w'][i]))

In [None]:
dataset = InferenceDataset(df_val, None)

### OCR

In [None]:
import transformers
transformers.utils.logging.set_verbosity_error()

from transformers import TrOCRProcessor
from transformers import VisionEncoderDecoderModel

from util.boxes import expand_boxes
from util.ocr import *

In [None]:
name = "microsoft/trocr-base-stage1"

processor = TrOCRProcessor.from_pretrained(name)
ocr_model = VisionEncoderDecoderModel.from_pretrained(name).cuda()

In [None]:
# x = np.exp(np.arange(-3, 3))
# plt.plot(np.arange(-3, 3), x)
# plt.yscale('log')
# plt.grid()

### Main
- verify sim between dets
- conv sim not robust to col  (#26)
- 21 26 56 80 93 94

In [None]:
PLOT = False

In [None]:
scores = []
for idx in range(len(dataset)):
#     idx = 94
#     PLOT = False
    
    img, gt, _ = dataset[idx]

    id_ = df_val.id[idx]
    
    print(idx, id_, end="\t")
    title = f"{id_} - {df_val.source[idx]} {df_val['chart-type'][idx]}"
    
    preds = [meter.preds[idx]['pascal_voc'][meter.labels[idx] == i] for i in range(len(classes))]
    preds = post_process_preds(preds)
    
    if PLOT:
        plot_results(img, preds, figsize=(12, 7), title=title)

    margin = (img.shape[0] + img.shape[1]) / (2 * 20)
    preds = restrict_on_line(preds, margin=margin)
    
    retrieved_boxes = retrieve_missing_boxes(preds, img, verbose=PLOT)

    if len(retrieved_boxes):
#         PLOT = True
#         print("Retrieved")
#         if PLOT:
#             plot_results(img, preds, figsize=(12, 7), title=title)
            
        preds[-1] = np.concatenate([preds[-1], retrieved_boxes])
        
    if PLOT:
        plot_results(img, preds, figsize=(12, 7), title=title)
#     PLOT = False
#     break

#     print('Target')
#     display(df_target[df_target['id'] == df_val.id[idx]][["x", "y"]])

    # OCR
    x_texts = ocr(ocr_model, processor, img, preds[1], margin=1, plot=PLOT)
    x_values, x_errors = post_process_texts(x_texts)

    if PLOT:
        print("x labels :", x_values, " - errors:", x_errors)
#     print(x_values)
#     print(preds[3])
    
    if len(preds[-1]):
        reg_x = linear_regression(preds[3], x_values, x_errors, preds[-1], mode="x", verbose=PLOT)

        y_texts = ocr(ocr_model, processor, img, preds[2], margin=3, plot=PLOT)
        y_values, y_errors = post_process_texts(y_texts)

        if PLOT:
             print("y labels :", y_values, " - errors:", y_errors)

        reg_y = linear_regression(preds[4], y_values, y_errors, preds[-1], mode="y", verbose=PLOT)

        gt = df_target[df_target['id'] == id_].reset_index(drop=True)
        gt[["x", "y"]] = gt[["x", "y"]].astype(float)
        gt = gt.sort_values(['x', 'y'], ignore_index=True)

        reg_x = np.round(reg_x, rounding(np.max(reg_x)))
        pred = pd.DataFrame({"x": reg_x, "y": reg_y})
        pred = pred.sort_values(['x', 'y'], ignore_index=True)

        score_x = score_series(gt['x'].values, pred['x'].values)
        score_y = score_series(gt['y'].values, pred['y'].values)
    else:
        score_x, score_y = 0, 0

    print(f"Scores  -  x: {score_x:.3f}  - y: {score_y:.3f}")
    
    scores += [score_x, score_y]
    
#     if score_x == 0 and score_y == 0:
#         plot_results(img, preds, figsize=(12, 7), title=title)

    if PLOT:
        print('GT')
        display(gt)
        print('PRED')
        display(pred)

#     if idx >= 2:
#     break

In [None]:
print(f'Scatter CV : {np.mean(scores) :.3f}')

In [None]:
print(f'Scatter CV : {np.mean(scores) :.3f}')

Done ! 