Generar el Split para el mendeley_dataset

In [1]:
import os
import cv2
import shutil
import numpy as np
import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
from sklearn.model_selection import train_test_split

from src.config import *
from src.data import *
from src.models.efficientnet import EfficientNetB5Custom
from src.utils import *
from src.data import OriginalOAIDataset
from src.train import train, train_model
from src.trainers.classification import Classification


np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)




In [None]:
ORIGINAL_PATH = MENDELEY_EXPERT1_PATH
NEW_MENDELEY_EXPERT1_PATH = 'dataset/mendeley_dataset/medical_expert_1_split'

In [None]:

# Crear split para el dataset 70 10 20

if not os.path.exists(NEW_MENDELEY_EXPERT1_PATH):
    os.makedirs(NEW_MENDELEY_EXPERT1_PATH)

NEW_TRAIN_PATH = os.path.join(NEW_MENDELEY_EXPERT1_PATH, 'train')
NEW_VAL_PATH = os.path.join(NEW_MENDELEY_EXPERT1_PATH, 'val')
NEW_TEST_PATH = os.path.join(NEW_MENDELEY_EXPERT1_PATH, 'test')

for split in ['train', 'val', 'test']:
    split_dir = os.path.join(NEW_MENDELEY_EXPERT1_PATH, split)
    if not os.path.exists(split_dir):
        os.makedirs(split_dir)
    for c in os.listdir(ORIGINAL_PATH):
        class_split_dir = os.path.join(split_dir, c)
        if not os.path.exists(class_split_dir):
            os.makedirs(class_split_dir)



In [None]:
classes = os.listdir(ORIGINAL_PATH)

for c in classes:
    class_dir = os.path.join(ORIGINAL_PATH, c)

    images = os.listdir(class_dir)
    n_images = len(images)
    n_train = int(n_images * 0.7)
    n_val = int(n_images * 0.1)
    n_test = n_images - n_train - n_val

    print("Generando split para la clase", c)
    print("Train:", n_train)
    print("Val:", n_val)
    print("Test:", n_test)

    train_images, val_test_images = train_test_split(images, test_size=n_val + n_test, random_state=RANDOM_SEED)
    val_images, test_images = train_test_split(val_test_images, test_size=n_test, random_state=RANDOM_SEED)

    for img in train_images:
        shutil.move(os.path.join(class_dir, img), os.path.join(NEW_MENDELEY_EXPERT1_PATH, 'train', c, img))
    for img in val_images:
        shutil.move(os.path.join(class_dir, img), os.path.join(NEW_MENDELEY_EXPERT1_PATH, 'val', c, img))
    for img in test_images:
        shutil.move(os.path.join(class_dir, img), os.path.join(NEW_MENDELEY_EXPERT1_PATH, 'test', c, img))


In [None]:
data = explorar_split_data(NEW_MENDELEY_EXPERT1_PATH)