In [26]:
import os
import random
from collections import Counter
from PIL import Image

# Pytorch
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, transforms

# Pytorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# Hydra
import hydra
from omegaconf import DictConfig

In [27]:
DATASET_DIR = 'data/train'

In [28]:
class CustomLoader(pl.LightningDataModule):
    def __init__(self, dataset, batch_size=32, labeled_percentage=0.3):
        super().__init__()
        self.dataset = dataset
        self.batch_size = batch_size
        self.labeled_percentage = labeled_percentage

    def setup(self):
        total_size = len(self.dataset)
        labeled_size = int(total_size * self.labeled_percentage)
        unlabeled_size = total_size - labeled_size
        self.labeled_data, self.unlabeled_data = random_split(self.dataset, [labeled_size, unlabeled_size])

    def train_dataloader(self):
        return DataLoader(self.labeled_data, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.labeled_data, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.labeled_data, batch_size=self.batch_size)

In [29]:
# (U Net Autoencoder)
class Autoencoder(pl.LightningModule):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Define las capas del encoder y del decoder con skip connections
        self.encoder = nn.Sequential
        (
            nn.Conv2d(1, 64, 3, padding=1),
            nn.ReLU(),
            # Agrega más capas de encoder
        )
        self.decoder = nn.Sequential
        (
            nn.ConvTranspose2d(64, 1, 3, padding=1),
            nn.ReLU(),
            # Agrega más capas de decoder
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

    def training_step(self, batch, batch_idx):
        x, _ = batch
        reconstruction = self(x)
        loss = nn.MSELoss()(reconstruction, x)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)


In [30]:
class Classifier_A(pl.LightningModule):
    def __init__(self):
        super(Classifier_A, self).__init__()
        self.model = nn.Sequential
        (
            nn.Flatten(),
            nn.Linear(512, 128),  # Ajusta según sea necesario
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)


class Classifier_B(pl.LightningModule):
    def __init__(self, encoder, fine_tune=False):
        super(Classifier_B, self).__init__()
        self.encoder = encoder

        if not fine_tune:
            for param in self.encoder.parameters():
                param.requires_grad = False  # Congelar el encoder si es necesario
        
        self.classifier = nn.Sequential
        (
            nn.Flatten(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.encoder(x)
        return self.classifier(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)


In [31]:
early_stop_callback = EarlyStopping \
(
    monitor='val_loss',
    patience=3,
    verbose=True,
    mode='min'
)

In [None]:
# Paso 1: Cargar y preprocesar el dataset
transform = transforms.Compose
(
    [
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

# Cargar el dataset completo (sin filtrar aún por las 20 especies)
full_dataset = datasets.ImageFolder(root=DATASET_DIR, transform=transform)

# Paso 2: Contar las imágenes por especie
species_counts = Counter([sample[1] for sample in full_dataset.samples])

# Seleccionar las 20 especies con más muestras
top_20_species = [species for species, count in species_counts.most_common(20)]

# Filtrar el dataset solo con las 20 especies seleccionadas
filtered_samples = [sample for sample in full_dataset.samples if sample[1] in top_20_species]
filtered_dataset = torch.utils.data.DatasetFolder(root=DATASET_DIR, samples=filtered_samples, loader=full_dataset.loader, extensions=full_dataset.extensions, transform=transform)

# Paso 3: Dividir los datos de entrenamiento y prueba
train_samples = []
test_samples = []

# Para cada especie seleccionada, mover 20 muestras de entrenamiento a prueba
for species in top_20_species:
    # Obtener todas las imágenes de una especie
    species_samples = [sample for sample in filtered_samples if sample[1] == species]
    
    # Dividir 20 muestras para testing
    random.shuffle(species_samples)
    train_samples_species = species_samples[:-20]
    test_samples_species = species_samples[-20:]

    train_samples.extend(train_samples_species)
    test_samples.extend(test_samples_species)

# Crear datasets de entrenamiento y prueba
train_dataset = torch.utils.data.DatasetFolder(root=DATASET_DIR, samples=train_samples, loader=full_dataset.loader, extensions=full_dataset.extensions, transform=transform)
test_dataset = torch.utils.data.DatasetFolder(root=DATASET_DIR, samples=test_samples, loader=full_dataset.loader, extensions=full_dataset.extensions, transform=transform)

# Paso 4: Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f'Training samples: {len(train_samples)}')
print(f'Testing samples: {len(test_samples)}')


AttributeError: module 'torch.utils.data' has no attribute 'DatasetFolder'

In [None]:
trainer = Trainer(callbacks=[early_stop_callback], max_epochs=10)
trainer.fit(model, datamodule)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\aleja\.conda\envs\env_IC6200_AI_P2_pytorch\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


NameError: name 'model' is not defined

In [None]:
@hydra.main(config_path=".", config_name="config")
def main(cfg: DictConfig):
    # Aquí usas los valores de cfg para ajustar los hiperparámetros
    print(cfg.model.lr)