In [16]:
import os 
import matplotlib.pyplot as plt 
import numpy as np 
from tqdm import tqdm 
import cv2
image_dir = "data/raw/images"
masks_dir = "data/raw/masks"

attributes = [ "globules", "steaks", "negative_network", "milia_like_cyst", "pigment_network"]

### Cet etape nous permet d'associer chaque image avec 5 masks 

In [17]:
img_size = (256, 256)  

X_data = []
y_data= []


image_files = sorted(os.listdir(image_dir))

for img_file in tqdm(image_files[:200]):
    img_id = img_file.split(".")[0]
    img_path = os.path.join(image_dir , img_file)

    # lire et redimmensionner l image 
    img = cv2.imread(img_path)
    img = cv2.resize(img , img_size)
    img = img / 255.0 

    X_data.append(img)

    # on va mettre un mask global 
    mask_stack = []
    for attr in attributes : 
        mask_path = os.path.join(masks_dir, f"{img_id}_attribute_{attr}.png")
        if os.path.exists(mask_path):
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            mask = cv2.resize(mask , img_size)
            # on binarise les masques 
            mask = (mask > 127).astype(np.uint8)
        else : 
            mask= np.zeros(img_size, dtype= np.uint8)

        mask_stack.append(mask)
    mask_stack= np.stack(mask_stack, axis= -1)  #( 256, 256 ,5)
    y_data.append(mask_stack)

  0%|                                                                                                        | 0/200 [00:00<?, ?it/s]

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:13<00:00, 14.89it/s]


In [18]:
### petite verification de la fonction precédente

X_data_arr = np.array(X_data)  
y_data_arr = np.array(y_data)

print(X_data_arr.shape) # 

print(y_data_arr.shape)

(200, 256, 256, 3)
(200, 256, 256, 5)


### les dimmensions des images sont de 256*256 , 3 represente les canaux de couleurs (RGB)
### 

In [20]:
### on sauvegarde les données traitées dans "data/Proccesed"
import numpy as np
np.save("data/processed/X_data.npy", X_data)
np.save("data/processed/y_data.npy", y_data)

In [23]:
from sklearn.model_selection import train_test_split

X_train , X_temp , y_train , y_temp = train_test_split(X_data , y_data , test_size= 0.2, random_state=42)

X_val , X_test, y_val , y_test = train_test_split(X_temp , y_temp , test_size= 0.5 , random_state= 42)


print("train size :", np.array(X_train).shape)

print("val size :", np.array(X_val).shape)

print("test size :", np.array(X_test).shape)

train size : (160, 256, 256, 3)
val size : (20, 256, 256, 3)
test size : (20, 256, 256, 3)
