**About** : This notebook is used to train detection models.

In [None]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "-1"

In [None]:
import os
import cv2
import sys
import ast
import glob
import json
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from PIL import Image, ImageDraw, ImageOps

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import plot_annotated_image, plot_sample
from util.torch import seed_everything
from util.yolo import *

### Load data

In [None]:
df = pd.read_csv('../input/df_train.csv')
df_text = pd.read_csv('../input/texts.csv')
df_target = pd.read_csv('../input/y_train.csv')
df_elt = pd.read_csv('../input/elements.csv')

## EDA

In [None]:
ANOMALIES = [
    # DUPLICATED STUFF
    'ae686738e744', 'c76f6d0d5239', '760c3fa4e3d9', 'c0c1f4046222', '3e568d136b85', '913447978a74', '2ff071a45cce', 'a9a07d74ee31',
    # MISSING or MISLABELED TICKS ANNOTS
    "36079df3b5b2", "3968efe9cbfc", "6ce4bc728dd5", "733b9b19e09a", "aa9df520a5f2", "d0cf883b1e13", "fa91f20f277d",
    # WEIRD
    "9f6b7c57e6cd", "e1034ff92655", "e796b10718bd", "f8bdbaf0b97d", "3ef41bbc82c3", "73cfbba65962", "872d1be39bae", "3ef41bbc82c3"
]

In [None]:
df = df[~df['id'].isin(ANOMALIES)].reset_index(drop=True)

In [None]:
df = df[df['chart-type'] == "vertical_bar"].reset_index(drop=True)

### Tools

In [None]:
from PIL import Image, ImageDraw, ImageOps

def remove_polygon(image, x, y):
    try:
        image = Image.fromarray(image)
    except:
        pass
    
    # hide previous
    background = image.getpixel((np.min(x), np.min(y)))
    draw = ImageDraw.Draw(image)
    draw.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=background)
    
    return image, background

In [None]:
from PIL import Image, ImageDraw, ImageOps

def flip_polygon_vertically(image, x, y, image_before):
    try:
        image = Image.fromarray(image)
    except:
        pass
    
    # Create a blank mask with the same size as the image
    mask = Image.new('L', image.size, 0)
    draw = ImageDraw.Draw(mask)
    draw.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=255)
#     mask.show()

    # Flip
    mid_point = y[np.argmax(x)]
    start_y, end_y = np.min(y), np.max(y)
    flipped = np.array(image_before)
    flipped[start_y : end_y] = flipped[start_y : end_y][::-1]
    flipped = Image.fromarray(flipped)
    
    mask_flipped = np.array(mask)
    mask_flipped[start_y : end_y] = mask_flipped[start_y : end_y][::-1]
    mask_flipped = Image.fromarray(mask_flipped)
    
    # hide previous
    background = image.getpixel((np.min(x), start_y))
    draw = ImageDraw.Draw(image)
    draw.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=background)
    
    # Combine the flipped content and the original image using the mask
    combined_image = Image.composite(image, flipped, ImageOps.invert(mask_flipped))
    return combined_image, background

In [None]:
def move_polygon_up(image, x, y, pixels):
    # Open the image
    try:
        image = Image.fromarray(image)
    except:
        pass
    
    # Create a blank mask with the same size as the image
    moved_image = Image.new('RGB', image.size)
    draw = ImageDraw.Draw(moved_image)
    moved_image.paste(image, (0, 0))
    
    # Create a mask for the polygon region
    mask = Image.new('L', image.size, 0)
    draw_mask = ImageDraw.Draw(mask)
    draw_mask.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=255)
    
    # Move the content inside the polygon by the specified number of pixels
    region = image.crop((min(x), min(y), max(x), max(y)))
    
    background = image.getpixel((np.max(x), np.max(y)))
    white_region = Image.new('RGB', region.size, background)
    moved_image.paste(white_region, (min(x), min(y)), mask=mask.crop((min(x), min(y), max(x), max(y))))
    
    moved_image.paste(region, (min(x), min(y) - pixels), mask=mask.crop((min(x), min(y), max(x), max(y))))
    return moved_image, background

In [None]:
from PIL import Image, ImageDraw, ImageOps

def copy_polygon(image, x, y, new_x, image_before):

    # Create a blank mask with the same size as the image
    mask = Image.new('L', image.size, 0)
    draw = ImageDraw.Draw(mask)
    draw.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=255)
#     mask.show()

    # Flip
    mid_point = y[np.argmax(x)]
    start_y, end_y = np.min(y), np.max(y)
    flipped = np.array(image_before)
    flipped[start_y : end_y] = flipped[start_y : end_y][::-1]
    flipped = Image.fromarray(flipped)
    
    mask_flipped = np.array(mask)
    mask_flipped[start_y : end_y] = mask_flipped[start_y : end_y][::-1]
    mask_flipped = Image.fromarray(mask_flipped)
    
    # hide previous
    background = image.getpixel((np.min(x), start_y))
    draw = ImageDraw.Draw(image)
    draw.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=background)
    
    # Combine the flipped content and the original image using the mask
    combined_image = Image.composite(image, flipped, ImageOps.invert(mask_flipped))
    return combined_image, background

In [None]:
from PIL import Image, ImageDraw, ImageOps

def flip_polygon_vertically_and_shift(image, x, y, image_before, shift=0):
    # Create a blank mask with the same size as the image
    mask = Image.new('L', image.size, 0)
    draw = ImageDraw.Draw(mask)
    draw.polygon([(x[0], y[0]), (x[1], y[1]), (x[2], y[2]), (x[3], y[3])], fill=255)
#     mask.show()

    # Flip
    mid_point = y[np.argmax(x)]
    start_y, end_y = np.min(y), np.max(y)
    flipped = np.array(image_before)
    flipped[start_y : end_y] = flipped[start_y : end_y][::-1]
    flipped = Image.fromarray(flipped)
    
    mask_flipped = np.array(mask)
    mask_flipped[start_y : end_y] = mask_flipped[start_y : end_y][::-1]
    mask_flipped = Image.fromarray(mask_flipped)
    
    # Combine the flipped content and the original image using the mask
    if not shift:
        combined_image = Image.composite(image, flipped, ImageOps.invert(mask_flipped))
        return combined_image
    
#     print(mask_flipped.getextrema())
#     print(mask_flipped.max())
    
#     mask_flipped.show()
#     flipped.show()
    
    shifted_flipped_image = Image.new('RGB', flipped.size)
    shifted_flipped_image.paste(flipped, (0, -shift))
    
#     shifted_flipped_image.show()

    shifted_mask_flipped = Image.new('L', mask_flipped.size)
    shifted_mask_flipped.paste(mask_flipped, (0, -shift))
    
#     shifted_mask_flipped.show()
    
    combined_image = Image.composite(
        image, shifted_flipped_image, ImageOps.invert(shifted_mask_flipped)
    )
    
    return combined_image

In [None]:
def flip_vertical_bar(image, annot):
#     issue = False
    # Rotate 90
    image = cv2.rotate(image, 0)
    image = image[::-1]
    
    h, w = image.shape[:2]
    
    image_old = Image.fromarray(image)
    image_nopoly = Image.fromarray(image)
    image = Image.fromarray(image)
    
    # Bars
    new_bars = []
    for bar in annot['visual-elements']['bars']:
        bar['height'], bar['width'] = bar['width'], bar['height']
        bar['x0'], bar['y0'] = w - bar['y0'] - bar['width'], h - bar['x0'] - bar['height']
        new_bars.append(bar)
    annot['visual-elements']['bars'] = new_bars
    
    # xticks
    new_xticks = []
    for ticks in annot['axes']['x-axis']['ticks']:
        ticks['tick_pt']['x'], ticks['tick_pt']['y'] = w - ticks['tick_pt']['y'], h - ticks['tick_pt']['x']
        new_xticks.append(ticks)
    
    # yticks
    new_yticks = []
    for ticks in annot['axes']['y-axis']['ticks']:
        ticks['tick_pt']['x'], ticks['tick_pt']['y'] = w - ticks['tick_pt']['y'], h - ticks['tick_pt']['x']
        new_yticks.append(ticks)

    annot['axes']['y-axis']['ticks'] = new_xticks
    annot['axes']['x-axis']['ticks'] = new_yticks
    
    # Image with no texts
    for txt in annot["text"][::-1]:
        poly = {
            'x0': w - txt['polygon']['y0'],
            'x1': w - txt['polygon']['y1'],
            'x2': w - txt['polygon']['y2'],
            'x3': w - txt['polygon']['y3'],
            'y0': h - txt['polygon']['x0'],
            'y1': h - txt['polygon']['x1'],
            'y2': h - txt['polygon']['x2'],
            'y3': h - txt['polygon']['x3'],
        }
        xs = np.array([poly['x0'], poly['x1'], poly['x2'], poly['x3']])
        ys = np.array([poly['y0'], poly['y1'], poly['y2'], poly['y3']])
        
        image_nopoly, bg = remove_polygon(image_nopoly, xs, ys)
        
        if np.mean(bg) < 200:
            return None
#             issue = True

    new_txts = []
    for txt in annot["text"]:
        txt['polygon'] = {
            'x0': w - txt['polygon']['y0'],
            'x1': w - txt['polygon']['y1'],
            'x2': w - txt['polygon']['y2'],
            'x3': w - txt['polygon']['y3'],
            'y0': h - txt['polygon']['x0'],
            'y1': h - txt['polygon']['x1'],
            'y2': h - txt['polygon']['x2'],
            'y3': h - txt['polygon']['x3'],
        }
        xs = np.array([txt['polygon']['x0'], txt['polygon']['x1'], txt['polygon']['x2'], txt['polygon']['x3']])
        ys = np.array([txt['polygon']['y0'], txt['polygon']['y1'], txt['polygon']['y2'], txt['polygon']['y3']])
        
        delta = 0
        if txt["text"] in [k['x'] for k in annot['data-series']]:  # SHIFT
            center = int((ys.max() + ys.min()) / 2)
            ys_after = center - (ys - center)

            tick = [t for t in new_xticks if t["id"] == txt["id"]][0]
            tgt_y = tick['tick_pt']['y']
            
            rotated = len(np.unique(xs)) == 4
            if not rotated:
                delta = 0
            else:
                y_avg = np.sort(ys_after)[-2:].mean()
                delta = int(y_avg - tgt_y)
                if delta > ys.min() + 5:
                    return None
                    
#         print(delta)

        image_nopoly = flip_polygon_vertically_and_shift(image_nopoly, xs, ys, image_old, shift=delta)

        center = int((ys.max() + ys.min()) / 2)
        txt['polygon'] = {
            'x0': txt['polygon']['x0'],
            'x1': txt['polygon']['x1'],
            'x2': txt['polygon']['x2'],
            'x3': txt['polygon']['x3'],
            'y0': center - (txt['polygon']['y0'] - center) - delta,
            'y1': center - (txt['polygon']['y1'] - center) - delta,
            'y2': center - (txt['polygon']['y2'] - center) - delta,
            'y3': center - (txt['polygon']['y3'] - center) - delta,
        }
        new_txts.append(txt)
    annot['text'] = new_txts
    image = image_nopoly

    # Chart
    bar = annot['plot-bb']
    bar['height'], bar['width'] = bar['width'], bar['height']
    bar['x0'], bar['y0'] = w - bar['y0'] - bar['width'], h - bar['x0'] - bar['height']
    annot['plot-bb'] = bar
    
    # Upscale
    img = np.array(image)
    img_upscale = 255 * np.ones((h, w * 3, 3), dtype=np.uint8)
    m = 5
#     print(img.shape, img_upscale.shape)
    img_upscale[:, :bar['x0'] - m] = img[:, :bar['x0'] - m]
    
    chart = img[bar['y0'] - m: bar['y0'] + bar['height'] + m:, bar['x0'] - m: bar['x0'] + bar['width']+ m:]
    chart = cv2.resize(
        chart,
        (3 * (2 * m + bar['width']), 2 * m + bar['height'])
    )
    img_upscale[bar['y0'] - m : bar['y0'] - m + chart.shape[0], bar['x0'] - m: bar['x0'] - m + chart.shape[1]] = chart
    
    x_max = 0
    start = bar['x0'] - m
    for txt in annot["text"]:
        xs = np.array([txt['polygon']['x0'], txt['polygon']['x1'], txt['polygon']['x2'], txt['polygon']['x3']])
        ys = np.array([txt['polygon']['y0'], txt['polygon']['y1'], txt['polygon']['y2'], txt['polygon']['y3']])
        
        x = xs.min()
        if xs.mean() > start:
            new_x = int(xs.min() - xs.mean() + start + (xs.mean() - start) * 3)
            w = xs.max() - xs.min()
            img_upscale[np.min(ys): np.max(ys), new_x : new_x + w] = img[np.min(ys): np.max(ys), x : x + w]
            
            x_max = max(new_x + w, x_max)
            txt["polygon"].update({
                'x0': txt['polygon']['x0'] - x + new_x,
                'x1': txt['polygon']['x1'] - x + new_x,
                'x2': txt['polygon']['x2'] - x + new_x,
                'x3': txt['polygon']['x3'] - x + new_x,
            })
            
    img_upscale = img_upscale[:, :x_max + 20]
    
    taken = np.zeros(img_upscale.shape)
    for txt in annot["text"]:
        xs = np.array([txt['polygon']['x0'], txt['polygon']['x1'], txt['polygon']['x2'], txt['polygon']['x3']])
        ys = np.array([txt['polygon']['y0'], txt['polygon']['y1'], txt['polygon']['y2'], txt['polygon']['y3']])
    
        if xs.mean() > start:
            if "title" in txt['role']:
                continue

            x1, w = xs.max() + 1, xs.max() - xs.min()
            y0, h = ys.min() - 1, ys.max() - ys.min()
            hw = int(max(w, h)) + 2
            
            crop = img_upscale[y0: y0 + hw, x1 - hw: x1]
            crop = np.rot90(crop)
            img_upscale[y0: y0 + hw, x1 - hw: x1] = crop
            taken[y0: y0 + hw, x1 - hw: x1] += 1
            
    if taken.max() > 1:
        return None
#     print(taken.max())
    
    # Title
    txt = annot["text"][0]
    xs = np.array([txt['polygon']['x0'], txt['polygon']['x1'], txt['polygon']['x2'], txt['polygon']['x3']])
    
    title = img_upscale[:, xs.min() - m : xs.max() + m]
    img_upscale = img_upscale[:, :xs.min() - m]

    title = np.rot90(title)
#     print(title.shape, img_upscale.shape)
    
    title_ph = 255 * np.ones((title.shape[0], img_upscale.shape[1], 3), dtype=np.uint8)
    
    mid = title_ph.shape[1] // 2
    title_ph[:, mid - title.shape[1] // 2: mid - title.shape[1] // 2 + title.shape[1]] = title

    img_upscale = np.concatenate([title_ph, img_upscale], 0)
    img_upscale = Image.fromarray(img_upscale)
#     img_upscale.show()

    return img_upscale  # , annot, issue

### Example

In [None]:
file = "00261ed70def"

In [None]:
img = Image.open(f"../input/train/images/{file}.jpg")
with open(f"../input/train/annotations/{file}.json") as annotation_f:
    annot = json.load(annotation_f)

In [None]:
# fig = plot_annotated_image(file, img=img, annot=annot)

In [None]:
img = flip_vertical_bar(np.array(img), annot)

In [None]:
# fig = plot_annotated_image(file, img=img, annot=annot)

### Flip

In [None]:
PLOT = True
SAVE = False

In [None]:
labels = ["chart", "text", "tick", "point"]

for i, (id_, dfg) in tqdm(enumerate(df.groupby('id')), total=len(df)):    
#     id_ = 'e93bed1228d6'
#     dfg = df[df['id'] == id_]
    
    img = Image.open(f"../input/train/images/{id_}.jpg")
    with open(f"../input/train/annotations/{id_}.json") as annotation_f:
        annot = json.load(annotation_f)

    img_r = flip_vertical_bar(np.array(img), annot)

    if (PLOT or not (i % 10000)) and img_r is not None:
        print(id_)
        img.show()
        img_r.show()
#         break

    if i >= 10:
        break

Done ! 