In [1]:
import sys
sys.path.append('../../30_data_tools/')

In [2]:
import pandas as pd
import sqlite3
import re
from tqdm import tqdm
import requests

In [3]:
from helper import load_dotenv
from get_labelstudio_data import get_results_of_project

In [4]:
import plotly.express as px

In [5]:
dotenv = load_dotenv()

In [6]:
con = sqlite3.connect(dotenv['DB_PATH'])

In [None]:
def delete_duplicated_masks( con ):
    masks = pd.read_sql(
        '''
            SELECT * FROM mask m
        ''',
        con
    )   

    relevant_selection = masks.loc[
        :,
        ['pdf_filename','job','type','variant_name','method','idx','bbox']
    ]
    
    relevant_idx = masks.loc[
        relevant_selection.duplicated(keep='first')
    ].index

    delete_count = 0
    for idx in tqdm(relevant_idx):
        row = masks.loc[idx]

        mask_ids = masks.loc[
            (masks.pdf_filename == row.pdf_filename) &
            (masks.job == row.job) &
            (masks.type == row.type) &
            (masks.variant_name == row.variant_name) &
            (masks.method == row.method) &
            (masks.idx == row.idx) &
            (masks.bbox == row.bbox)
        ].mask_id.to_list()
        
        # keep
        non_temp_ids = [m_id for m_id in mask_ids if re.match(r'^temp_.+',m_id) == None]
        keep_id = non_temp_ids[0] if len(non_temp_ids) > 0 else mask_ids[0]
        delete_ids = [m_id for m_id in mask_ids if m_id != keep_id]
        delete_count += len(delete_ids)
        
        sql = f'''
            DELETE FROM mask
            WHERE (
                pdf_filename='{ row.pdf_filename }' AND
                job='{ row.job }' AND
                "type"='{ row.type }' AND
                variant_name='{ row.variant_name }' AND
                method='{ row.method }' AND
                idx={ row.idx } AND
                bbox='{ row.bbox }' AND
                mask_id IN ({ ','.join(["'" + d_id + "'" for d_id in delete_ids]) })
            )
        '''

        c = con.cursor()
        c.execute(sql)
        c.close()
        con.commit()

    print(f'{ delete_count } masks deleted')

In [None]:
results.loc[
    (results.img_name == '140233.39_DO01175010_Spielplaene_S_001-240.p89.soft_light.1.4c_300.jpg') &
    (results.mask_id.isin(masks.mask_id) == False)
]

In [None]:
results.loc[
    results.mask_id.isin(masks.mask_id) == False
].groupby('img_name').count()

In [None]:
delete_duplicated_masks(con)

In [None]:
masks = pd.read_sql(
    '''
        SELECT * FROM mask m
    ''',
    con
)  

masks.loc[
    (masks.mask_id.isin(results.mask_id) == False) &
    (masks.mask_id.str.match('^temp_.+'))
]

In [None]:
results = pd.DataFrame(
    [
        (r['img_name'],r['value']['x'],r['value']['y'],r['value']['width'],r['value']['height'],r['id'] if 'id' in r else '-',r['rectanglelabels'][0])
        for r in get_results_of_project(2)
    ],
    columns=[
        'img_name',
        'x',
        'y',
        'width',
        'height',
        'mask_id',
        'label'
    ]
)

masks = pd.read_sql(
    '''
        SELECT mask_id FROM mask m
    ''',
    con
)   

relevant_selection = results.loc[
    :,
    [c for c in results.columns if c not in ['label','mask_id']]
]

relevant_idx = results.loc[
    relevant_selection.duplicated(keep='first')
].index

In [None]:
for idx in relevant_idx:
    row = results.loc[idx]

    selection = results.loc[
        (results.img_name == row.img_name) &
        (results.x == row.x) &
        (results.y == row.y) &
        (results.width == row.width) &
        (results.height == row.height)
    ]

    crossover_selection = selection.loc[
        selection.mask_id.isin(masks.mask_id)
    ]

    if crossover_selection.shape[0] > 0:
        print(idx, selection.shape[0], crossover_selection.shape[0])

In [None]:
delete_annotation( "jP4gjyRy_5" )

In [None]:
selection

In [None]:
selection.img_name.iloc[0]

In [None]:
masks.mask_id

In [None]:
results.loc[
    :,
    [c for c in results.columns if c not in ['label','mask_id']]
]

In [None]:
masks

In [None]:
def delete_annotation( mask_id ):     
    headers = {'Authorization': f'Token { dotenv["LABEL_STUDIO_TOKEN"] }'}
    
    res = requests.delete(
        f'http://localhost:8080/api/annotations/{ mask_id }/',
        headers=headers,
    )

    return res

In [52]:
def _masks():
    # Masken laden
    masks = pd.read_sql(
        '''
            SELECT * FROM mask m
        ''',
        con
    )
    
    masks.loc[
        :,
        'filename'
    ] = masks.apply(lambda val: f"{ val.job }.{ val.pdf_filename }.{ val.method }.{ val.idx }.4c_600.jpg", axis=1)

    relevant_results = [
        r for r in
        get_results_of_project(2)
        if 'id' in r and r['id'] not in masks.mask_id.unique()
    ]
    
    masks_to_update = masks.loc[
        masks.mask_id.str.match('^temp_.+')
    ].index

    # update masken filtern
    requires_update = []

    for idx in masks_to_update:
        row = masks.loc[idx]
        bbox = [int(val) for val in row.bbox.split(";")]
        results = [
            r for r in relevant_results 
            if f'{ row.job }.{ row.pdf_filename }' in r['img_name'] and 
                f'{ row.method }.{ row.idx }.4c_600.jpg' in r['img_name'] and 
                r['bbox']['x'] == bbox[0] and 
                r['bbox']['y'] == bbox[1] and
                r['bbox']['width'] == bbox[2] and
                r['bbox']['height'] == bbox[3]
        ]
        
        if len(results) > 0:
            requires_update.append((idx,results[0]['id']))

    for update_line in tqdm(requires_update):
        row = masks.loc[update_line[0]]

        sql = f'''
            UPDATE mask
            SET mask_id='{ update_line[1] }'
            WHERE (
                pdf_filename='{ row.pdf_filename }' AND 
                job='{ row.job }' AND 
                "type"='{ row['type'] }' AND
                variant_name='{ row.variant_name }' AND
                idx={ row.idx } AND
                mask_id='{ row.mask_id }'
            )
        '''
        c = con.cursor()
        try:
            c.execute( sql )
        except sqlite3.IntegrityError:
            pass
            
        c.close()
        con.commit()

In [53]:
_masks()

100%|██████████| 864/864 [00:01<00:00, 662.62it/s]
