In [None]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# lit le fichier csv
df = pd.read_csv('../data/datas.csv')

print(df.columns)

# Fonction de chargement des images
def load_image(image_path, target_size=(128, 128)):
    img = load_img(image_path, target_size=target_size, color_mode='grayscale')
    img_array = img_to_array(img) / 255.0  # Normalisation des pixels
    return img_array

# charge les images et les étiquettes
images = []
labels = []

for index, row in df.iterrows():
    img_path = row['Image Path']
    label = row['Finding Labels']
    try:
        img = load_image(img_path)
        images.append(img)
        labels.append(label)
    except Exception as e:
        print(f"Error loading image {img_path}: {e}")

images = np.array(images)
labels = np.array(labels)

# encode les étiquettes
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# divise les données
X_train, X_temp, y_train, y_temp = train_test_split(images, labels_encoded, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# sauvegarde les ensembles dans des fichiers numpy
np.save('data/X_train.npy', X_train)
np.save('data/X_val.npy', X_val)
np.save('data/X_test.npy', X_test)
np.save('data/y_train.npy', y_train)
np.save('data/y_val.npy', y_val)
np.save('data/y_test.npy', y_test)