In [1]:
pip install torch torchvision matplotlib


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torchvision.utils import make_grid


In [3]:
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
dataset = datasets.ImageFolder(root='/sample_data', transform=transform)


In [None]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


In [None]:
# Obtener un lote de imágenes
images, labels = next(iter(dataloader))

# Desnormalizar las imágenes (si es necesario)
# Asegurémonos de que las imágenes estén en el rango [0, 1] para visualizarlas correctamente
images = images / 2 + 0.5  # Desnormaliza las imágenes

# Crear una cuadrícula de imágenes
grid_img = make_grid(images)

# Visualizar la cuadrícula de imágenes
plt.figure(figsize=(10, 10))
plt.imshow(grid_img.permute(1, 2, 0))  # Cambiar el orden de las dimensiones para visualizar
plt.axis('off')  # No mostrar los ejes
plt.show()


In [None]:
import torchvision.transforms as transforms

# Transformaciones con aumento de datos
transform_augment = transforms.Compose([
    transforms.RandomRotation(degrees=30),  # Rotaciones aleatorias hasta 30 grados
    transforms.RandomHorizontalFlip(p=0.5),  # Flip horizontal con 50% de probabilidad
    transforms.RandomResizedCrop(size=(150, 150), scale=(0.8, 1.0)),  # Recorte aleatorio
    transforms.ColorJitter(brightness=0.5),  # Cambio en el brillo
    transforms.RandomGrayscale(p=0.3),  # Convierte en escala de grises con 30% de probabilidad
    transforms.ToTensor(),  # Convertir a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizar
])

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader

# Cargar dataset con las nuevas transformaciones
dataset_augmented = datasets.ImageFolder(root='/sample_data', transform=transform_augment)

# Crear DataLoader
dataloader_augmented = DataLoader(dataset_augmented, batch_size=4, shuffle=True)

In [None]:
import matplotlib.pyplot as plt
from torchvision.utils import make_grid

# Obtener un lote de imágenes transformadas
images_aug, labels_aug = next(iter(dataloader_augmented))

# Desnormalizar las imágenes para visualización
images_aug = images_aug * 0.5 + 0.5  # Desnormalización

# Crear cuadrícula de imágenes
grid_img_aug = make_grid(images_aug)

# Mostrar imágenes transformadas
plt.figure(figsize=(10, 10))
plt.imshow(grid_img_aug.permute(1, 2, 0))
plt.axis('off')
plt.show()


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt


In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes=6):  # 6 clases: buildings, forest, glacier, mountain, sea, street
        super(CNN, self).__init__()

        # Capas convolucionales
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)

        # MaxPooling
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Cálculo del tamaño de entrada para la capa densa
        self.flatten_size = 64 * (150 // 4) * (150 // 4)  # Imagen se reduce por los 2 MaxPool

        # Capas totalmente conectadas
        self.fc1 = nn.Linear(self.flatten_size, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # Capa 1
        x = self.pool(torch.relu(self.conv2(x)))  # Capa 2
        x = x.view(-1, self.flatten_size)  # Aplanar la salida para la capa densa
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)  # Sin activación (CrossEntropyLoss la aplica internamente)
        return x


In [None]:
# Transformaciones
transform_train = transforms.Compose([
    transforms.RandomRotation(degrees=30),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomResizedCrop(size=(150, 150), scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.5),
    transforms.RandomGrayscale(p=0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Cargar dataset
dataset_train = datasets.ImageFolder(root='ruta/a/tu/carpeta/images', transform=transform_train)
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)  # Lotes de 32 imágenes


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Usar GPU si está disponible
model = CNN(num_classes=6).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)


In [None]:
num_epochs = 30

for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()  # Modo entrenamiento

    for images, labels in dataloader_train:
        images, labels = images.to(device), labels.to(device)  # Mover a GPU si está disponible

        optimizer.zero_grad()  # Resetear gradientes

        outputs = model(images)  # Forward
        loss = criterion(outputs, labels)  # Calcular pérdida
        loss.backward()  # Backpropagation
        optimizer.step()  # Actualizar pesos

        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader_train)
    print(f"Época [{epoch+1}/{num_epochs}], Pérdida: {avg_loss:.4f}")

print("Entrenamiento finalizado 🚀")
