In [3]:
import torch
import torchvision.transforms as transforms
from torchvision import models, io
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from torch import nn, optim
from sklearn.preprocessing import LabelEncoder
from PIL import Image

print("PyTorch version:", torch.__version__)
print("GPU available:", torch.cuda.is_available())

labels_df = pd.read_csv('labels/labels.csv')

label_encoder = LabelEncoder()
labels_df['label'] = label_encoder.fit_transform(labels_df['label'])

base_dir = 'images/'

labels_df['image_path'] = labels_df['image_name'].apply(lambda x: os.path.join(base_dir, x))

train_df, test_df = train_test_split(labels_df, test_size=0.2, random_state=42)

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.RandomAffine(degrees=30, translate=(0.1, 0.1), scale=(0.8, 1.2)),
    transforms.RandomPerspective(distortion_scale=0.3, p=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_path']
        image = Image.open(img_name).convert('RGB')
        label = self.df.iloc[idx]['label']
        label = torch.tensor(label, dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label

train_dataset = CustomImageDataset(train_df, transform=train_transform)
test_dataset = CustomImageDataset(test_df, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Cargar el modelo InceptionV3 preentrenado
model = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1)
model.aux_logits = True
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(label_encoder.classes_))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

train_losses = []
train_accuracy = []
val_accuracy = []

best_acc = 0.0

# Entrenamiento
for epoch in range(70):
    model.train()
    model.aux_logits = True
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs, aux_outputs = model(images)
        loss1 = criterion(outputs, labels)
        loss2 = criterion(aux_outputs, labels)
        loss = loss1 + 0.4 * loss2
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_losses.append(running_loss / len(train_loader))
    train_accuracy.append(100 * correct / total)

    # Evaluación
    model.eval()
    model.aux_logits = False
    val_correct = 0
    val_total = 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            if outputs.dim() == 1:
                outputs = outputs.unsqueeze(0)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = 100 * val_correct / val_total
    val_accuracy.append(val_acc)

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Train Accuracy: {100 * correct / total}%, Val Accuracy: {val_acc}%')

    scheduler.step(val_loss / len(test_loader))

    # Checkpointing
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')


PyTorch version: 2.1.2+cu121
GPU available: True
Epoch 1, Loss: 4.850700923374721, Train Accuracy: 29.325842696629213%, Val Accuracy: 49.327354260089685%
Epoch 2, Loss: 4.566884585789272, Train Accuracy: 41.17977528089887%, Val Accuracy: 50.44843049327354%
Epoch 3, Loss: 4.290960907936096, Train Accuracy: 46.01123595505618%, Val Accuracy: 50.672645739910315%
Epoch 4, Loss: 4.046322797025953, Train Accuracy: 47.41573033707865%, Val Accuracy: 51.12107623318386%
Epoch 5, Loss: 3.795798029218401, Train Accuracy: 48.03370786516854%, Val Accuracy: 51.79372197309417%
Epoch 6, Loss: 3.5325985295431956, Train Accuracy: 48.48314606741573%, Val Accuracy: 52.690582959641254%
Epoch 7, Loss: 3.240062747682844, Train Accuracy: 50.842696629213485%, Val Accuracy: 54.48430493273543%
Epoch 8, Loss: 3.112003445625305, Train Accuracy: 53.59550561797753%, Val Accuracy: 56.053811659192824%
Epoch 9, Loss: 2.922296941280365, Train Accuracy: 54.943820224719104%, Val Accuracy: 56.72645739910314%
Epoch 10, Loss: 

KeyboardInterrupt: 

In [None]:
import torch
import pandas as pd
import os
from torchvision import io, transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.models import inception_v3
from sklearn.preprocessing import LabelEncoder

model_path = 'best_model.pth'
model = inception_v3(weights=None)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(label_encoder.classes_))
model.load_state_dict(torch.load(model_path))
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

test_df = pd.read_csv('labels/test.csv')
base_dir = 'images/'
test_df['image_path'] = test_df['image_name'].apply(lambda x: os.path.join(base_dir, x))


class TestImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image_path']
        image = io.read_image(img_name).float() / 255.0
        if self.transform:
            image = self.transform(image)
        return image, self.df.iloc[idx]['image_name'], self.df.iloc[idx]['place'], self.df.iloc[idx]['date']


test_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


test_dataset = TestImageDataset(test_df, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


results = []
with torch.no_grad():
    for images, names, places, dates in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        labels = [label_encoder.inverse_transform([pred.item()])[0] for pred in predicted]

        for name, place, date, label in zip(names, places, dates, labels):
            results.append([name, place, date, label])


results_df = pd.DataFrame(results, columns=['image_name', 'place', 'date', 'label'])
results_df.to_csv('resultado.csv', index=False)

