In [1]:
import sys
sys.path.append('../../30_data_tools')

In [2]:
from helper import load_dotenv
from get_labelstudio_data import get_tasks, get_results_of_project
from file_interaction import open_img

In [3]:
import pandas as pd
import requests
from PIL import Image
from datetime import datetime
from tqdm.auto import tqdm
import sqlite3
import re

In [4]:
Image.MAX_IMAGE_PIXELS = None

In [5]:
config = load_dotenv()

In [6]:
con = sqlite3.connect(config['DB_PATH'])

In [7]:
tasks = get_tasks(2)
results = get_results_of_project(2)

In [8]:
def transform_result( r ):
    try:
        res = re.match(r'(.+?)\.(.+\.p\d+)\.halftone\d+dpi\.(.+?)\.(\d+)\.4c_.+', r['img_name'])
    
        if res is None:
            res = re.match(r'(.+?)\.(.+\.p\d+)\.(.+?)\.(\d+)\.4c_.+', r['img_name'])
        
        return {
            'job' : res.groups()[0],
            'pdf_filename' : res.groups()[1],
            'variant_name' : 'halftone600dpi',
            'method' : res.groups()[2],
            'idx' : int(res.groups()[3])
        }
    except:
        return None

In [9]:
masks = pd.read_sql(
    '''
        SELECT m.*, gi.timestamp FROM mask m
        LEFT JOIN generic_image gi
        ON
            m.pdf_filename=gi.pdf_filename AND
            m.job=gi.job AND
            m.type=gi.type AND
            m.variant_name=gi.variant_name AND
            m.method=gi.method AND
            m.idx=gi.idx
        WHERE ssim < 0.95
    ''',
    con
)

if len(results) > 0:
    result_data = pd.DataFrame.from_dict([transform_result(r) for r in results if (transform_result(r) is None) == False])
    result_data.loc[:,'is_processed'] = True
    
    masks = pd.merge(
        masks,
        result_data,
        how="left",
        on=['job','pdf_filename','variant_name','method','idx']
    )
    masks.is_processed.fillna(False, inplace=True)
else:
    masks.loc[:,'is_processed'] = False

In [10]:
#masks = masks.sample(n=100)
#masks.loc[:,'is_processed'] = False

In [11]:
def create_annotation( task, masks ):
    with open_img( (task['data_path'],'azure') ) as img:
        img_size = img.size
     
    headers = {'Authorization': f'Token { config["LABEL_STUDIO_TOKEN"] }'}
    json_data = {
        "completed_by": 1,
        "result": [
            {
                "original_width": img_size[0],
                "original_height": img_size[1],
                "image_rotation": 0,
                "value": {
                    "x": m[0] / img_size[0] * 100,
                    "y": m[1] / img_size[1] * 100,
                    "width": m[2] / img_size[0] * 100,
                    "height": m[3] / img_size[1] * 100,
                    "rotation": 0,
                    "rectanglelabels": [
                        "unchecked_moire"
                    ]
                },
                "from_name": "label",
                "to_name": "image",
                "type": "rectanglelabels",
                "origin": "manual"
            } for m in masks
        ],
        "was_cancelled": False,
        "ground_truth": False,
        "draft_created_at": datetime.now().isoformat(),
        "task": task["id"],
        "project": task["project_id"],
        "updated_by": 1,
        "parent_prediction": None,
        "parent_annotation": None
    }

    return requests.post(
        f'http://localhost:8080/api/tasks/{ task["id"] }/annotations/',
        headers=headers,
        json=json_data
    ).status_code

In [12]:
with tqdm(total=masks.loc[masks.is_processed == False].shape[0]) as pbar:
    while masks.loc[masks.is_processed == False].shape[0] > 0:
        mask = masks.loc[masks.is_processed == False].sample(n=1).iloc[0]
        
        relevant_masks = masks.loc[
            (masks.job == mask.job) &
            (masks.pdf_filename == mask.pdf_filename) &
            (masks.variant_name == mask.variant_name) &
            (masks.method == mask.method) &
            (masks.idx == mask.idx)
        ].index
        
        relevant_tasks = [t for t in tasks if f'{ mask.job }.{ mask.pdf_filename }.' in t['data_path'] and f'.{ mask.method }.{ mask.idx }.' in t['data_path']]
        
        if len(relevant_tasks) > 0:
            task = relevant_tasks[0]
            
            masks_for_img = [
                [int(val) for val in m.split(';')]
                for m in
                masks.loc[masks.index.isin(relevant_masks)].bbox.values
            ]

            try:
                return_code = create_annotation( task, masks_for_img )
            except:
                pass
    
        masks.loc[masks.index.isin(relevant_masks), 'is_processed'] = True
        pbar.update(relevant_masks.shape[0])

  0%|          | 0/1221 [00:00<?, ?it/s]