In [None]:
import sqlite3
import pandas as pd

In [None]:
DATABASE = "../data/database/annotations.sqlite"
training_table = "training"
validation_table = "validation"

In [None]:
query = f"""
WITH RankedFiles AS (
    SELECT
        file AS filename,
        class,
        "x-center",
        "y-center",
        width,
        height,
        ROW_NUMBER() OVER (PARTITION BY file ORDER BY file) AS rn
    FROM
        {training_table}
    WHERE
        name != "ristiinluokittelu"
)
SELECT
    filename,
    class,
    "x-center",
    "y-center",
    width,
    height
FROM
    RankedFiles
WHERE
    rn = 1;
"""

with sqlite3.connect(DATABASE) as conn:
    df = pd.read_sql_query(query, conn)

len(df)

In [None]:
# Lets save these as files
import os

output_dir = "../data/training_dataset"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename, group_df in df.groupby('filename'):
    txt_filename = filename
    txt_filepath = os.path.join(output_dir, txt_filename)
    with open(txt_filepath, 'w') as f:
        for _, row in group_df.iterrows():
            class_id = int(row['class'])
            x_center = row['x-center']
            y_center = row['y-center']
            width = row['width']
            height = row['height']

            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


In [None]:
query = f"""
SELECT
    file AS filename,
    GROUP_CONCAT(DISTINCT class) AS classes
FROM
    silver
GROUP BY
    file
HAVING
    COUNT(*) > 1 AND COUNT(DISTINCT class) > 1;
"""

with sqlite3.connect(DATABASE) as conn:
    df = pd.read_sql_query(query, conn)

print(len(df))
df.head(10)