<h4 style="text-align: center;">NOTEBOOK</h4>
<p style="text-align: center; font-style: italic">~~ TRAINING AI MODEL ~~</p>
</br>

<p style="text-align: justify; margin-left: 80px; margin-right: 80px;">
Ce notebook a pour objectif de regrouper les différents modèles d'architecture d'intelligence artificiel pour la détection des glyphes cunéiformes.</p>

</br>

<h3 style="text-align: justify; margin-left: 40px; margin-right: 80px;">
<b>Sommaire</b>
</h3>

<span style="margin-left: 80px;">- 1</span></br>
<span style="margin-left: 80px;">- 2</span></br>
<span style="margin-left: 80px;">- 3</span></br>

</br>

---
</br>

In [11]:
import base64
import dagshub
import mlflow
import numpy as np
import os
import pandas as pd
import sys

from ast import literal_eval
from io import BytesIO
from PIL import Image
from tqdm import tqdm

# - Import Personal Library
sys.path.append("../utils/")    ## - db_data_fetch
import db_data_fetch as dbdf

## Get data from database (bbox and images)

In [2]:
SPLIT_SET = ['train', 'test']

df_train = pd.DataFrame()
df_test = pd.DataFrame()

train_images = pd.DataFrame()
test_images = pd.DataFrame()

#### Fetch Data Bbox Annotation

In [3]:
for param in SPLIT_SET:
    result = dbdf.fetch_data_bbox_annotation(param)

    if param == 'train':
        df_train = result
    elif param == 'test':
        df_test = result

#### Fetch Image

In [4]:
for param in SPLIT_SET:
    result = dbdf.fetch_image(param)

    if param == 'train':
        train_images = result
    elif param == 'test':
        test_images = result

#### Save dataframe in local

In [5]:
df_train.to_csv('data/train.csv', index=False)
df_test.to_csv('data/test.csv', index=False)

#### Decode tablet_picture, crop it to get each view and saved them on local in two folder (train and test)

In [7]:
def crop_img(img: Image.Image, bbox: np.ndarray) -> Image.Image:
    """Crop image reference to specific dimensions 
    with bbox (x_min, y_min, x_max, y_max) in numpy array format"""
    return img.crop((bbox[0], bbox[1], bbox[2], bbox[3]))

##### Train

In [15]:
directory = 'data/train_images/'

## - Convert the bbox_segment to a numpy array for the crop_img function
train_images['bbox_segment'] = train_images['bbox_segment']\
                               .apply(literal_eval).apply(np.array)

for index, row in tqdm(train_images.iterrows(), total=train_images.shape[0]):
    picture_data = row['tablet_picture']
    binary_data = base64.b64decode(picture_data)

    image = Image.open(BytesIO(binary_data))
    tablet_view = crop_img(image, row['bbox_segment'])

    if not os.path.exists(directory):
        os.makedirs(directory)

    tablet_view.save(os.path.join(directory, f'{row["tablet_name"]}.jpg'))

    # tablet_view.show()

100%|██████████| 132/132 [00:14<00:00,  9.04it/s]


##### Test

In [17]:
directory = 'data/test_images/'

## - Convert the bbox_segment to a numpy array for the crop_img function
test_images['bbox_segment'] = test_images['bbox_segment']\
                               .apply(literal_eval).apply(np.array)

for index, row in tqdm(test_images.iterrows(), total=test_images.shape[0]):
    picture_data = row['tablet_picture']
    binary_data = base64.b64decode(picture_data)

    image = Image.open(BytesIO(binary_data))
    tablet_view = crop_img(image, row['bbox_segment'])

    if not os.path.exists(directory):
        os.makedirs(directory)

    tablet_view.save(os.path.join(directory, f'{row["tablet_name"]}.jpg'))

    # tablet_view.show()

100%|██████████| 112/112 [00:09<00:00, 11.51it/s]


------------
## NAIVE AI

In [None]:
dagshub.init("my-first-repo", "CharleyDL", mlflow=True)
mlflow.start_run()