In [None]:
import sqlite3
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
TABLE = "silver"
DATABASE = "../data/database/annotations.sqlite"

with sqlite3.connect(DATABASE) as conn:
    query = f"SELECT * FROM {TABLE} WHERE class != 1"
    df = pd.read_sql_query(query, conn)

df.head(10)

In [None]:
df.columns

In [None]:
sql_query = f"""
SELECT name, file, class, `x-center`, `y-center`, width, height
FROM {TABLE}
WHERE rowid IN (
    SELECT MIN(rowid)
    FROM {TABLE}
    GROUP BY file
    HAVING COUNT(*) = 1

    UNION ALL

    SELECT MIN(rowid)
    FROM {TABLE}
    WHERE file IN (
        SELECT file
        FROM {TABLE}
        GROUP BY file
        HAVING COUNT(*) > 1
    )
    GROUP BY file
    HAVING class = (
        SELECT class
        FROM {TABLE} AS t2
        WHERE t2.file = {TABLE}.file
        GROUP BY class
        ORDER BY COUNT(*) DESC
        LIMIT 1
    )
);
"""

with sqlite3.connect(DATABASE) as conn:
    #query = f"SELECT * FROM {TABLE} WHERE class != 1"
    df = pd.read_sql_query(sql_query, conn)

df.head(10)


In [None]:
print(len(df))

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

categories_data = {
    "categories": [
        {"id": 0, "name": "Betoni"},
        {"id": 1, "name": "Ei materiala"},
        {"id": 2, "name": "Materiaali ei tiedossa"},
        {"id": 3, "name": "Muovi"},
        {"id": 4, "name": "Ter√§s"},
    ]
}

# Create a mapping from class ID to class name
class_id_to_name = {cat['id']: cat['name'] for cat in categories_data['categories']}

# Replace class IDs with class names in the DataFrame
df['class_name'] = df['class'].map(class_id_to_name)

# Count class name occurrences
class_counts = df['class_name'].value_counts()

# Create a bar plot
plt.figure(figsize=(10, 6))

sns.barplot(x=class_counts.index, y=class_counts.values)
plt.title('Class Distribution')
plt.xlabel('Class Name')
plt.ylabel('Number of Occurrences')
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()