**About** : This notebook is used to train detection models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
import os
import cv2
import sys
import ast
import glob
import json
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

warnings.filterwarnings("ignore", category=UserWarning)
pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from inference.yolo import *
from util.metrics import *

from post_process.retrieve import retrieve_missing_boxes
from post_process.reg import rounding, linear_regression
from post_process.ticks import restrict_on_line, assign
from post_process.in_graph import post_process_preds

### Load data

In [None]:
df = pd.read_csv('../input/df_train.csv')
df_text = pd.read_csv('../input/texts.csv')
df_target = pd.read_csv('../input/y_train.csv')
df_elt = pd.read_csv('../input/elements.csv')

In [None]:
df = df[~df['id'].isin(ANOMALIES)].reset_index(drop=True)

In [None]:
df_split = pd.read_csv('../input/df_split.csv')
df = df.merge(df_split)

In [None]:
CLASSES = [
#     "dot",
#     "line",
    "scatter",
]

df = df[df['chart-type'].isin(CLASSES)].reset_index(drop=True)

### Model

In [None]:
class ConfigChart:
    selected_model = "yolo"
    bbox_format = "yolo"
    pred_format = "pascal_voc"

#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v2.5/weights/best.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v6.6/weights/best.pt"  # detect only labels & ticks
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v7./weights/best.pt"  # detect only markers
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v4./weights/last.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v8./weights/best.pt"
    weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v8.2/weights/last.pt"

    version = "v8"
    labels = ['chart', 'text', 'tick', 'point']
    
#     version = "v6
#     labels = ['chart', 'text', 'tick']
    
#     version = "v7"
#     labels = ["dots"]

#     size = (512, 512)
    size = (640, 640)
#     size = (1024, 1024)

    # NMS
    conf_thresh = [0.1, 0.4, 0.2, 0.2]
    iou_thresh = [0.5, 0.25, 0.25, 0.25]
#     conf_thresh = [0.1, 0.4, 0.2, 0.001]
#     iou_thresh = [0.5, 0.25, 0.25, 0.5]

    max_per_img = 500
    min_per_img = 0

    val_bs = 16
    device = "cuda"
    
config_chart = ConfigChart

In [None]:
assert os.path.exists(config_chart.weights), "Weights do not exist"
model_chart = retrieve_model(config_chart)

In [None]:
class ConfigMarker:
    selected_model = "yolo"
    bbox_format = "yolo"
    pred_format = "pascal_voc"

#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v2.5/weights/best.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v6.6/weights/best.pt"  # detect only labels & ticks
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v7./weights/best.pt"  # detect only markers
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v4./weights/last.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v8./weights/best.pt"
    weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v8.2/weights/best.pt"
    
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v7.20/weights/best.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v7.21/weights/best.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v10./weights/last.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v11./weights/last.pt"
#     weights = "/workspace/kaggle_benetech/logs/yolov7x-w6-v11.5/weights/last.pt"
    version = "v5"
    labels = ['chart', 'text', 'tick', 'point']
    
#     version = "v6"
#     labels = ['chart', 'text', 'tick']
    
#     version = "v11"
#     labels = ["point"]

#     size = (512, 512)
    size = (640, 640)
#     size = (1024, 1024)

    # NMS
#     conf_thresh = 0.001  # [0.1, 0.4, 0.2, 0.2]
#     iou_thresh = 0.5  # [0.5, 0.25, 0.25, 0.25]
    
    conf_thresh = [0.1, 0.4, 0.2, 0.2]
    iou_thresh = [0.5, 0.25, 0.25, 0.25]
    
#     conf_thresh = [0.1, 0.4, 0.2, 0.001]
#     iou_thresh = [0.5, 0.25, 0.25, 0.5]

    max_per_img = 200
    min_per_img = 0
    
    val_bs = 16
    device = "cuda"
    
config_marker = ConfigMarker
VERSION = config_marker.version

In [None]:
assert os.path.exists(config_marker.weights), "Weights do not exist"
model_marker = retrieve_model(config_marker)

### Evaluate

In [None]:
chart_types = [
#     "dot",
#     "line",
#     "vertical_bar",
#     "horizontal_bar",
    "scatter",
]

In [None]:
# from models.yolo import Model
# model_marker = retrieve_model(
#     config_marker,
#     Model("../yolov7/cfg/training/yolov7-w6.yaml", nc=len(config_marker.labels))
# )

# state_dict = torch.load(config_marker.weights)
# state_dict = state_dict['model'].state_dict()

# model_marker.model.load_state_dict(state_dict)

In [None]:
df_val = df[df['split'] == "val"].reset_index(drop=True)  # .head(10)
df_val['path'] = f'../input/{VERSION}/images/valid/' + df_val['id'] + '.jpg'
df_val['gt_path'] = f'../input/{VERSION}/labels/valid/' + df_val['id'] + '.txt'
df_val_ = df_val.copy()

for t in chart_types:
    print(f'\n-> Chart type : {t}\n')
    df_val = df_val_[df_val_['chart-type'] == t].reset_index(drop=True)  # .head(8)

    transforms = get_transfos(size=config_marker.size)
    dataset = InferenceDataset(df_val, transforms)
    
    try:
        meter, fts = predict(model_marker, dataset, config_marker)
    except:
        meter = predict(model_marker, dataset, config_marker)
        
    for i, p in enumerate(meter.preds):
        p.update_shape((df_val['img_h'][i], df_val['img_w'][i]))

    f1s = {c: [] for c in config_marker.labels}
    recalls = {c: [] for c in config_marker.labels}
    for idx in tqdm(range(len(dataset))):
        img, gt, shape = dataset[idx]

        gt = Boxes(gt, (shape[0], shape[1]), bbox_format="yolo")['pascal_voc']
        gt = [gt[dataset.classes[idx] == i] for i in range(len(config_marker.labels))]
        preds = [meter.preds[idx]['pascal_voc'][meter.labels[idx] == i] for i in range(len(config_marker.labels))]
        
#         preds = post_process_preds(preds)

        for i, (t, p) in enumerate(zip(gt, preds)):
            metrics = compute_metrics(p, t)
            f1s[config_marker.labels[i]].append(metrics['f1_score'])
            recalls[config_marker.labels[i]].append(metrics['recall'])

    for k, v in f1s.items():
        print(f'{k} \t Avg F1: {np.mean(v):.3f}  \t Avg F1==1: {np.mean(np.array(v) == 1):.3f}', end="\t")
        print(f'Avg Recall==1: {np.mean(np.array(recalls[k]) == 1):.3f}')
#         break
#     break

- 0.001 - Avg F1: 0.699 - Avg Recall==1: 0.764
- 0.010  - Avg F1: 0.842 - Avg Recall==1: 0.733

### Predict
- IoU per class
- merge xticks and yticks (/labels)
- train without bars

In [None]:
df_val = df[df['split'] == "val"].reset_index(drop=True)
df_val['path'] = f'../input/{VERSION}/images/valid/' + df_val['id'] + '.jpg'
df_val['gt_path'] = f'../input/{VERSION}/labels/valid/' + df_val['id'] + '.txt'

In [None]:
df_val = df_val  # .head(10)

In [None]:
TYPES = [
#     "dot",
#     "line",
#     "vertical_bar",
#     "horizontal_bar",
    "scatter",
]

df_val = df_val[df_val['chart-type'].isin(TYPES)].reset_index(drop=True)
# df_val = df_val[df_val['source'] == "extracted"].reset_index(drop=True)

In [None]:
transforms = get_transfos(size=config_marker.size)
dataset = InferenceDataset(df_val, transforms)

In [None]:
%%time
# meter_marker, fts = predict(model_marker, dataset, config_marker)
meter_marker = predict(model_marker, dataset, config_marker)


for i, p in enumerate(meter_marker.preds):
    p.update_shape((df_val['img_h'][i], df_val['img_w'][i]))

In [None]:
%%time
# meter_chart, _ = predict(model_chart, dataset, config_chart)
meter_chart = predict(model_chart, dataset, config_chart)


for i, p in enumerate(meter_chart.preds):
    p.update_shape((df_val['img_h'][i], df_val['img_w'][i]))

In [None]:
dataset = InferenceDataset(df_val, None)

### OCR

In [None]:
import transformers
transformers.utils.logging.set_verbosity_error()

from transformers import TrOCRProcessor
from transformers import VisionEncoderDecoderModel

from util.boxes import expand_boxes
from util.ocr import *

In [None]:
name = "microsoft/trocr-base-stage1"

processor = TrOCRProcessor.from_pretrained(name)
ocr_model = VisionEncoderDecoderModel.from_pretrained(name).cuda()

### Main
- Enforce sim between dets
- conv sim not robust to col  (#26)
- Make sure

In [None]:
# plt.imshow(img)

In [None]:
# feats = fts[idx]

In [None]:
# feats = feats / ((feats ** 2).sum(0, keepdims=True) + 1e-6).sqrt()

In [None]:
# min_sim = 0.7

# sims = []
# for box in preds[-1][:5]:
# #     print(box)
#     y = (box[0] + box[2]) / 2
#     y = int(y / img.shape[1] * feats.size(2))
#     x = (box[1] + box[3]) / 2
#     x = int(x / img.shape[0] * feats.size(1))
    
# #     print(x, y)
    
# #     plt.imshow(img[box[1]: box[3], box[0]: box[2]])
# #     plt.show()
    
#     vec = feats[:, x, y][:, None, None]
    
# #     sim = ((feats - vec) ** 2).mean(0, keepdims=True)
# #     sim = 1 / (sim + 1)

#     sim = (feats * vec).sum(0, keepdims=True)
    
# #     sim = torch.clamp(sim, torch.quantile(sim, 0.5) * 1.05, 1)
# #     sim = (sim - sim.min()) / (sim.max() - sim.min())

#     sim = torch.where(sim < min_sim, 0, sim)
#     sims.append(sim)
    
# #     plt.imshow(sim[0].cpu().numpy())
# #     plt.colorbar()
# #     plt.show()
    
# #     break

In [None]:
%matplotlib inline

In [None]:
PLOT = False
DEBUG = False

In [None]:
scores = []
for idx in range(len(dataset)):
#     idx = 0
#     if recalls['point'][idx] >= 1:
#         continue
#     idx = 0
#     DEBUG = True
    
    img, gt, _ = dataset[idx]

    id_ = df_val.id[idx]

    print(idx, id_, end="\t")
    title = f"{id_} - {df_val.source[idx]} {df_val['chart-type'][idx]}"
    
    preds = [
        meter_chart.preds[idx]['pascal_voc'][meter_chart.labels[idx] == i]
        for i in range(len(config_chart.labels))
    ]
    preds_marker = [
        meter_marker.preds[idx]['pascal_voc'][meter_marker.labels[idx] == i]
        for i in range(len(config_marker.labels))
    ]
    confidences_marker =  [
        meter_marker.confidences[idx][meter_marker.labels[idx] == i]
        for i in range(len(config_marker.labels))
    ]
    
#     n = 10
#     # Filter on size
#     widths = preds[-1][:, 2] - preds[-1][:, 0]
#     heights = preds[-1][:, 3] - preds[-1][:, 1]
#     preds[-1] = preds[-1][
#         (widths < widths[:5].mean() * 2) & (heights < heights[:5].mean() * 2)
#     ]
    
#     preds[-1] = preds[-1][:n]
#     confidences[-1] = confidences[-1][:n]

#     plt.grid()
#     for c in confidences:
#         plt.plot(c, marker="x", linewidth=0)
#     plt.show()

    if len(preds) == 4:  # Replace
        preds[-1] = preds_marker[-1]
    elif len(preds) == 3: # Append
        preds.append(preds_marker[-1])
        
#     break
#     plot_results(img, preds, figsize=(12, 7), title=title)
#     break

    preds = post_process_preds(preds)
    
    if DEBUG:
        plot_results(img, preds, figsize=(12, 7), title=title)

    margin = (img.shape[0] + img.shape[1]) / (2 * 20)
    preds = restrict_on_line(preds, margin=margin)
    
    retrieved_boxes = retrieve_missing_boxes(preds, img, verbose=DEBUG)

    if len(retrieved_boxes):
        preds[-1] = np.concatenate([preds[-1], retrieved_boxes])
        
    if PLOT:
#         preds[-1] = preds[-1][:5]
        plot_results(img, preds, figsize=(12, 7), title=title)

    # OCR
    x_texts = ocr(ocr_model, processor, img, preds[1], margin=1, plot=DEBUG)
    x_values, x_errors = post_process_texts(x_texts)

    if DEBUG:
        print("x labels :", x_values, " - errors:", x_errors)
#     print(x_values)
#     print(preds[3])
    
    if len(preds[-1]):
        reg_x = linear_regression(preds[3], x_values, x_errors, preds[-1], mode="x", verbose=DEBUG)

        y_texts = ocr(ocr_model, processor, img, preds[2], margin=3, plot=DEBUG)
        y_values, y_errors = post_process_texts(y_texts)

        if DEBUG:
             print("y labels :", y_values, " - errors:", y_errors)

        reg_y = linear_regression(preds[4], y_values, y_errors, preds[-1], mode="y", verbose=DEBUG)

        gt = df_target[df_target['id'] == id_].reset_index(drop=True)
        gt[["x", "y"]] = gt[["x", "y"]].astype(float)
        gt = gt.sort_values(['x', 'y'], ignore_index=True)

        reg_x = np.round(reg_x, rounding(np.max(reg_x)))
        pred = pd.DataFrame({"x": reg_x, "y": reg_y})
        pred = pred.sort_values(['x', 'y'], ignore_index=True)

        score_x = score_series(gt['x'].values, pred['x'].values)
        score_y = score_series(gt['y'].values, pred['y'].values)
    else:
        score_x, score_y = 0, 0

    print(f"Scores  -  x: {score_x:.3f}  - y: {score_y:.3f}")
    
    scores += [score_x, score_y]
    
#     if score_x == 0 and score_y == 0:
#         plot_results(img, preds, figsize=(12, 7), title=title)

    if DEBUG:
        print('GT')
        display(gt)
        print('PRED')
        display(pred)

    if DEBUG:
        break

In [None]:
print(f'Scatter CV : {np.mean(scores) :.3f}')

Done ! 