In [7]:
import os
import cv2
import shutil
import numpy as np
import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
from sklearn.model_selection import train_test_split

from src.config import *
from src.data import *
from src.models.efficientnet import EfficientNetB5Custom
from src.utils import *
from src.data import OriginalOAIDataset, dataset_augmentation
from src.train import train, train_model
from src.trainers.classification import Classification


np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)




In [2]:
DATASET_PATH_1 = MENDELEY_OAI_BRIGHT_200_PATH
DATASET_PATH_2 = MENDELEY_EXPERT1_PATH # Resize to 224x224
NEW_DATASET_PATH = 'dataset/OAI_mix_B200_E1'
ORIGINAL_PATH = NEW_DATASET_PATH
NEW_MIX_SPLIT_PATH = 'dataset/OAI_mix_B200_E1_split'

In [None]:
# Copy dataset 1
if not os.path.exists(NEW_DATASET_PATH):
    shutil.copytree(DATASET_PATH_1, NEW_DATASET_PATH)


In [None]:
for class_name in os.listdir(DATASET_PATH_2):
    class_path = os.path.join(DATASET_PATH_2, class_name)
    print(class_path)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (224, 224))
        new_img_path = os.path.join(NEW_DATASET_PATH, class_name, img_name)
        cv2.imwrite(new_img_path, img)

In [None]:
explorar_data(NEW_DATASET_PATH)

In [None]:
ORIGINAL_PATH = NEW_DATASET_PATH
NEW_MIX_SPLIT_PATH = 'dataset/OAI_mix_B200_E1_split'
# Crear split para el dataset 70 10 20

if not os.path.exists(NEW_MIX_SPLIT_PATH):
    os.makedirs(NEW_MIX_SPLIT_PATH)

NEW_TRAIN_PATH = os.path.join(NEW_MIX_SPLIT_PATH, 'train')
NEW_VAL_PATH = os.path.join(NEW_MIX_SPLIT_PATH, 'val')
NEW_TEST_PATH = os.path.join(NEW_MIX_SPLIT_PATH, 'test')

for split in ['train', 'val', 'test']:
    split_dir = os.path.join(NEW_MIX_SPLIT_PATH, split)
    if not os.path.exists(split_dir):
        os.makedirs(split_dir)
    for c in os.listdir(ORIGINAL_PATH):
        class_split_dir = os.path.join(split_dir, c)
        if not os.path.exists(class_split_dir):
            os.makedirs(class_split_dir)



In [None]:
ORIGINAL_PATH = NEW_DATASET_PATH
NEW_MIX_SPLIT_PATH = 'dataset/OAI_mix_B200_E1_split'
classes = os.listdir(ORIGINAL_PATH)

for c in classes:
    class_dir = os.path.join(ORIGINAL_PATH, c)

    images = os.listdir(class_dir)
    n_images = len(images)
    n_train = int(n_images * 0.7)
    n_val = int(n_images * 0.1)
    n_test = n_images - n_train - n_val

    print("Generando split para la clase", c)
    print("Train:", n_train)
    print("Val:", n_val)
    print("Test:", n_test)

    train_images, val_test_images = train_test_split(images, test_size=n_val + n_test, random_state=RANDOM_SEED)
    val_images, test_images = train_test_split(val_test_images, test_size=n_test, random_state=RANDOM_SEED)

    for img in train_images:
        shutil.move(os.path.join(class_dir, img), os.path.join(NEW_MIX_SPLIT_PATH, 'train', c, img))
    for img in val_images:
        shutil.move(os.path.join(class_dir, img), os.path.join(NEW_MIX_SPLIT_PATH, 'val', c, img))
    for img in test_images:
        shutil.move(os.path.join(class_dir, img), os.path.join(NEW_MIX_SPLIT_PATH, 'test', c, img))


In [14]:
n1 = sum(explorar_data(MENDELEY_EXPERT1_PATH))
n2 = sum(explorar_data(MENDELEY_OAI_BRIGHT_200_PATH))
n3 = explorar_split_data('dataset/OAI_mix_B200_E1_split')
print(n1, n2)

╒═════════╤════════════╕
│   Clase │   Cantidad │
╞═════════╪════════════╡
│       0 │        514 │
├─────────┼────────────┤
│       1 │        477 │
├─────────┼────────────┤
│       2 │        232 │
├─────────┼────────────┤
│       3 │        221 │
├─────────┼────────────┤
│       4 │        206 │
╘═════════╧════════════╛
╒═════════╤════════════╕
│   Clase │   Cantidad │
╞═════════╪════════════╡
│       0 │       3115 │
├─────────┼────────────┤
│       1 │       1432 │
├─────────┼────────────┤
│       2 │       2056 │
├─────────┼────────────┤
│       3 │        998 │
├─────────┼────────────┤
│       4 │        225 │
╘═════════╧════════════╛
╒═════════╤══════╤══════╤══════╤═════╤═════╕
│ Clase   │    0 │    1 │    2 │   3 │   4 │
╞═════════╪══════╪══════╪══════╪═════╪═════╡
│ train   │ 2540 │ 1336 │ 1601 │ 853 │ 301 │
├─────────┼──────┼──────┼──────┼─────┼─────┤
│ val     │  362 │  190 │  228 │ 121 │  43 │
├─────────┼──────┼──────┼──────┼─────┼─────┤
│ test    │  727 │  383 │  459 │ 24

In [18]:
n3 

9476

In [21]:
n1 = explorar_split_data('dataset/OAI_mix_B200_E1_augmented')
num_total = sum(sum(n1[x]) for x in n1)
print(num_total)

╒═════════╤══════╤══════╤══════╤══════╤══════╕
│ Clase   │    0 │    1 │    2 │    3 │    4 │
╞═════════╪══════╪══════╪══════╪══════╪══════╡
│ train   │ 3000 │ 3000 │ 3000 │ 3000 │ 3000 │
├─────────┼──────┼──────┼──────┼──────┼──────┤
│ val     │  362 │  190 │  228 │  121 │   43 │
├─────────┼──────┼──────┼──────┼──────┼──────┤
│ test    │  727 │  383 │  459 │  245 │   87 │
╘═════════╧══════╧══════╧══════╧══════╧══════╛
17845


In [22]:
n1 = explorar_split_data('dataset/mendeleyOAI_dataset/brightness_200_split_aug_3k')
num_total = sum(sum(n1[x]) for x in n1)
print(num_total)

╒═════════╤══════╤══════╤══════╤══════╤══════╕
│ Clase   │    0 │    1 │    2 │    3 │    4 │
╞═════════╪══════╪══════╪══════╪══════╪══════╡
│ train   │ 3000 │ 3000 │ 3000 │ 3000 │ 3000 │
├─────────┼──────┼──────┼──────┼──────┼──────┤
│ val     │  311 │  143 │  205 │   99 │   22 │
├─────────┼──────┼──────┼──────┼──────┼──────┤
│ test    │  624 │  287 │  412 │  201 │   46 │
╘═════════╧══════╧══════╧══════╧══════╧══════╛
17350


In [10]:
NEW_MIX_SPLIT_PATH

'dataset/OAI_mix_B200_E1_split'

In [3]:
dataset_augmentation(NEW_MIX_SPLIT_PATH, 'dataset/OAI_mix_B200_E1_augmented', 3000)

Copying original images...
Se han copiado 2540 imágenes de la clase 0
Generando 460 imágenes aumentadas para la clase 0...
Probabilidad de que se genere una imagen aumentada: 0.2311023622047244
Se han generado 3000 imágenes aumentadas para la clase 0
-----------------------------------

Copying original images...
Se han copiado 1336 imágenes de la clase 1
Generando 1664 imágenes aumentadas para la clase 1...
Probabilidad de que se genere una imagen aumentada: 1.2955089820359282
Probabilidad de que se genere una imagen aumentada: 0.3486526946107784
Probabilidad de que se genere una imagen aumentada: 0.0687125748502994
Se han generado 3000 imágenes aumentadas para la clase 1
-----------------------------------

Copying original images...
Se han copiado 1601 imágenes de la clase 2
Generando 1399 imágenes aumentadas para la clase 2...
Probabilidad de que se genere una imagen aumentada: 0.9238288569643973
Probabilidad de que se genere una imagen aumentada: 0.05687070580886946
Se han generad