# Training Model with V2 Datas


### Initializing GPU

In [15]:
pip install faiss-cpu 

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-win_amd64.whl.metadata (5.2 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-win_amd64.whl (18.2 MB)
   ---------------------------------------- 0.0/18.2 MB ? eta -:--:--
   -------------------------------------- - 17.3/18.2 MB 99.1 MB/s eta 0:00:01
   ---------------------------------------- 18.2/18.2 MB 88.2 MB/s eta 0:00:00
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)


### Imports

In [None]:

from tensorflow import keras
import numpy as np
import json
from sklearn.model_selection import train_test_split
import os
import  random
import numpy as np
from tensorflow.keras import layers
import keras as keras
from keras.applications import  Xception
from PIL import Image
import random
print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.config.threading.set_intra_op_parallelism_threads(4)
tf.config.threading.set_inter_op_parallelism_threads(4)
np.set_printoptions(precision=3, suppress=True)




2.18.0
Num GPUs Available:  0


### Global variables

In [12]:

IS_WSL = False
WINDOWS_DATA_FOLDER="/mnt/e/code/ai/data/tcg/"
GENERATED_DATAS_V2_FOLDER_PATH = "../data/generated_datas_v2/"
if IS_WSL :
    GENERATED_DATAS_V2_FOLDER_PATH = WINDOWS_DATA_FOLDER + GENERATED_DATAS_V2_FOLDER_PATH

JSONS_FOLDER=GENERATED_DATAS_V2_FOLDER_PATH+"json/"
IMAGES_FOLDER=GENERATED_DATAS_V2_FOLDER_PATH+"images/"

RANDOM_STATE=42
RESIZE_SIZE=224
RGB = True
EPOCHS = 10
BATCH_SIZE = 28
TEST_SIZE = 0.25
MIN_ASSETS=0
MAX_ASSETS=72 #35000
if RGB:
    RGB_VALUE = 3
else:
    RGB_VALUE = 1



In [13]:

def load_files_list():

    json_files_list = []
    for json_file in os.listdir(JSONS_FOLDER):
        if json_file.endswith(".json"):
            json_files_list.append(json_file)
           

    random.Random(RANDOM_STATE).shuffle(json_files_list)
    json_files_list = json_files_list[MIN_ASSETS:MAX_ASSETS]
    print("Total files : ",json_files_list.__len__())
    print("First file : ",json_files_list[0])
    return json_files_list



### Loading files in generator

In [40]:
def read_json_from_s3(json_key):
    with open(JSONS_FOLDER + json_key) as json_data:
        raw_json = json.load(json_data)
        image_key = IMAGES_FOLDER + raw_json["asset"]["name"]
        points = raw_json["regions"][0]["points"]
        x0 = points[0]["x"]
        y0 = points[0]["y"] 
        x1 = points[1]["x"] 
        y1 = points[1]["y"] 
        x2 = points[2]["x"]
        y2 = points[2]["y"]
        x3 = points[3]["x"]
        y3 = points[3]["y"]
        new_json = {
            "image_key": image_key,
            "bounding_boxes": [x0, y0, x1, y1, x2, y2, x3, y3]
        }
        return new_json
    return None
# Fonction pour lire les images depuis S3
def read_image_from_s3(image_key):
    try:
        img = Image.open(image_key)
        img = img.resize((RESIZE_SIZE, RESIZE_SIZE))
        img = np.asarray( img, dtype="int32" )
        np.array(img)
        img = img/255
        return img
    except Exception as e:
        print(e)
        return None
# Fonction pour créer un dataset TensorFlow
def create_dataset(json_keys, batch_size=32):
    def generator():
        for json_key in json_keys:
            annotations = read_json_from_s3(json_key)
            if annotations == None or annotations["image_key"] == None :
                print("File : " + json_key + " is Malformated, delete it")
                continue
            img = read_image_from_s3(annotations["image_key"])
            if img is None:
                print("Image : " + annotations["image_key"] + " is Malformated, delete it")
                continue
            yield img, {"bb_head": annotations['bounding_boxes']}
    dataset = tf.data.Dataset.from_generator(generator,
                                             output_signature=(
                                                 tf.TensorSpec(shape=(RESIZE_SIZE, RESIZE_SIZE, RGB_VALUE), dtype=tf.float32),
                                                    {
                                                     "bb_head": tf.TensorSpec(shape=(8,), dtype=tf.float32)
                                                 }
                                             ))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

### Load datasets

In [41]:
json_files_list = load_files_list()

train_files, val_files = train_test_split(json_files_list, shuffle=True, random_state=42, test_size=TEST_SIZE)
print("Train files : ", len(train_files))
print("Val files : ", len(val_files))
train_dataset = create_dataset(train_files, BATCH_SIZE)
val_dataset = create_dataset(val_files, BATCH_SIZE)


Total files :  72
First file :  base1-7_1762430565.json
Train files :  54
Val files :  18


In [42]:

# Définir explicitement la couche d'entrée
input_layer = layers.Input(shape=(RESIZE_SIZE, RESIZE_SIZE, RGB_VALUE))
xception = Xception(weights="imagenet", include_top=False)(input_layer)

# Ajouter une couche dense pour les embeddings
embedding = layers.GlobalAveragePooling2D()(xception)
embedding = layers.Dense(256, activation='relu', name='embedding')(embedding)


# Construire le reste du modèle
flatten = layers.Flatten(name='flatten')(xception)
locator_branch = layers.Dense(128, activation='relu', name='bb_1')(flatten)
locator_branch = layers.Dense(64, activation='relu', name='bb_2')(locator_branch)
locator_branch = layers.Dense(32, activation='relu', name='bb_3')(locator_branch)
locator_branch = layers.Dense(8, activation='sigmoid', name='bb_head')(locator_branch)

# Créer le modèle multi-tâches
model = tf.keras.Model(
    inputs=input_layer,
    outputs={
        "bb_head": locator_branch,
        "embedding": embedding
    }
)
# Définir les pertes et métriques
losses = {
    "bb_head": tf.keras.losses.MeanSquaredError()
}

metrics = {
    "bb_head": ["accuracy"]
}

# Compiler le modèle
model.compile(
    optimizer='Adam',
    loss=losses,
    metrics=metrics
)


In [None]:

# Callbacks
checkpoint = keras.callbacks.ModelCheckpoint(
    filepath='../checkpoints/model_checkpoint_{epoch:02d}.keras',
    save_best_only=True,
    monitor='val_bb_head_accuracy',
    mode='max',
    save_weights_only=False,
    verbose=1
)

early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_bb_head_accuracy',
    patience=3,
    mode='max',
    verbose=1
)

In [44]:

# Entraînement du modèle
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=3,
    callbacks=[checkpoint,early_stopping]
)


Epoch 1/3
      2/Unknown [1m37s[0m 4s/step - bb_head_accuracy: 0.1733 - loss: 0.0825 
Epoch 1: val_bb_head_accuracy improved from -inf to 0.22222, saving model to checkpoints/model_checkpoint_01.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 10s/step - bb_head_accuracy: 0.1834 - loss: 0.0798 - val_bb_head_accuracy: 0.2222 - val_loss: 0.1751
Epoch 2/3
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - bb_head_accuracy: 0.0827 - loss: 0.0875
Epoch 2: val_bb_head_accuracy improved from 0.22222 to 0.38889, saving model to checkpoints/model_checkpoint_02.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8s/step - bb_head_accuracy: 0.0983 - loss: 0.0855 - val_bb_head_accuracy: 0.3889 - val_loss: 0.0679
Epoch 3/3
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - bb_head_accuracy: 0.2361 - loss: 0.0792
Epoch 3: val_bb_head_accuracy did not improve from 0.38889
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [None]:
model.save("model.keras")