In [None]:
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
from PIL import Image
import io
import json
import time

In [None]:
# https://huggingface.co/datasets/huggan/wikiart/tree/main/data
parquets = [
    "./data/train-00000-of-00072.parquet",
    "./data/train-00001-of-00072.parquet",
    "./data/train-00002-of-00072.parquet",
]

df_dataset = pd.DataFrame([])
for p in parquets:
    df_dataset = pd.concat([df_dataset, pq.read_table(p).to_pandas()], ignore_index=True)
df_dataset

In [None]:
with open("./data/dataset_infos.json", 'r') as file:
    dataset_info = json.load(file)

artists = dataset_info["huggan--wikiart"]["features"]["artist"]["names"]
genres = dataset_info["huggan--wikiart"]["features"]["genre"]["names"]
styles = dataset_info["huggan--wikiart"]["features"]["style"]["names"]

In [None]:
image = Image.open(io.BytesIO(df_dataset.image[0]["bytes"]))
print(artists[df_dataset["artist"][0]], genres[df_dataset["genre"][0]], styles[df_dataset["style"][0]])
image

In [None]:
dataset_artist_values = pd.DataFrame(df_dataset["artist"].value_counts()).reset_index().reset_index(names="new_artist_label")
dataset_artist_values['artist_name'] = dataset_artist_values["artist"].apply(lambda x: artists[x])
dataset_artist_values

In [None]:
dataset_genres_values = pd.DataFrame(df_dataset["genre"].value_counts()).reset_index().reset_index(names="new_genre_label")
dataset_genres_values['genre_name'] = dataset_genres_values["genre"].apply(lambda x: genres[x])
dataset_genres_values

In [None]:
dataset_style_values = pd.DataFrame(df_dataset["style"].value_counts()).reset_index().reset_index(names="new_style_label")
dataset_style_values['style_name'] = dataset_style_values["style"].apply(lambda x: styles[x])
dataset_style_values

In [None]:
df_dataset_labeled = pd.merge(df_dataset, dataset_artist_values[["artist", "new_artist_label"]], on="artist", how="left")
df_dataset_labeled = pd.merge(df_dataset_labeled, dataset_genres_values[["genre", "new_genre_label"]], on="genre", how="left")
df_dataset_labeled = pd.merge(df_dataset_labeled, dataset_style_values[["style", "new_style_label"]], on="style", how="left")
df_dataset_labeled

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, df_dataset, target_name, transform=None, target_transform=None):
        self.img_labels = df_dataset[target_name]
        self.img_bytes = df_dataset["image"]
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        image = Image.open(io.BytesIO(self.img_bytes[idx]["bytes"]))
        label = self.img_labels[idx]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [None]:
data_transforms =  transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

train_dataset = CustomImageDataset(df_dataset=df_dataset_labeled, target_name="new_artist_label", transform=data_transforms)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [None]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0]
label = train_labels[0]
img = transforms.ToPILImage()(img)
print(f"Label: {label}")
img


In [None]:
from sklearn.model_selection import train_test_split

train_idx, valid_idx= train_test_split(
np.arange(len(df_dataset_labeled["new_artist_label"])),
test_size=0.2,
shuffle=True,
stratify=None)

In [None]:
train_idx, valid_idx

In [None]:
df_dataset_labeled["new_artist_label"].value_counts()

In [None]:
df_dataset_labeled.iloc[train_idx, :]["new_artist_label"].value_counts()

In [None]:
339 * 0.8, 339 * 0.2

In [None]:
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

dataset = CustomImageDataset(df_dataset_labeled, target_name="new_artist_label", transform=data_transforms)
batch_size = 32

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)

In [None]:
train_features, train_labels = next(iter(train_loader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0]
label = train_labels[0]
img = transforms.ToPILImage()(img)
print(f"Label: {label}")
img


In [None]:
train_labels.dtype

In [None]:
model = models.resnet18(pretrained=True)
device = torch.device("cpu")

# Freeze base layers
# for param in model.parameters():
#     param.requires_grad = False

# Replace classifier layer
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(dataset_artist_values))  # For your number of classes
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

dataloaders = {
    'train': train_loader,
    'val': valid_loader
}

dataset_lens = {
    'train': len(train_idx),
    'val': len(valid_idx)
}


In [None]:
def train_model(model, criterion, optimizer, num_epochs=3):
    since = time.time()
    for epoch in range(num_epochs):
        print(f"\n\n-------\nEpoch {epoch+1}/{num_epochs}")
        for phase in ['train', 'val']:
            # Save checkpoint
            if phase == 'val':
                torch.save(model.state_dict(), f"./data/resnet18_artwiki_model_state_epoch_{epoch+1}.pth")
                
            model.train() if phase == 'train' else model.eval()
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_lens[phase]
            epoch_acc = running_corrects.double() / dataset_lens[phase]

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
    print(f"Training complete in {(time.time() - since):.0f}s")

In [None]:
num_epochs = 10
train_model(model, criterion, optimizer, num_epochs=num_epochs)

# Load model from previous checkpoint state

In [None]:
model = models.resnet18(pretrained=True)

num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(dataset_artist_values))  # For your number of classes
model = model.to(device)

model.load_state_dict(torch.load("./data/resnet18_artwiki_model_state.pth", weights_only=True, map_location=torch.device('cpu')))

model.eval()


In [None]:
def predict(model, image_path, device='cpu', input_size=(224, 224)):
    """
    Predict the class of an image using the given PyTorch model.

    Args:
        model: The trained PyTorch model.
        image_path: Path to the image file.
        device: Device to run the model on ('cpu' or 'cuda').
        input_size: Tuple specifying the target size for the image (default is (224, 224)).

    Returns:
        The predicted class or probabilities.
    """
    # Define the preprocessing pipeline
    preprocess = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor()
    ])
    
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    image = preprocess(image).unsqueeze(0)  # Add batch dimension
    
    # Move the model and image to the specified device
    model = model.to(device)
    image = image.to(device)
    
    # Set the model to evaluation mode
    model.eval()
    
    # Perform inference
    with torch.no_grad():
        outputs = model(image)
        probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
    return probabilities, outputs

In [None]:
probabilities, outputs = predict(model, "./data/beach-monet.png")
predicted_class = torch.argmax(probabilities).item()
print(f"Predicted class probability: {torch.max(probabilities) * 100}")
dataset_artist_values[dataset_artist_values["new_artist_label"] == predicted_class].head(1)


In [None]:
values, indices = torch.topk(probabilities, k=5)

print(f"Top 5 predicted classes probabilities:")
for p in list(zip(values.numpy().tolist(), indices.numpy().tolist())):
    print(f"{p[0] * 100:.2f}% - {dataset_artist_values[dataset_artist_values['new_artist_label'] == p[1]].head(1).artist_name.values[0]}")

# Multilabel

# Dos tipos:

## 1. Múltiples labels representan múltiples objetos en una sola imagen:

![multilabel-1](./images/multilabel.png)

## 2. Múltiples labels representan un mismo objeto en una **jerarquía** de labels:


<img src="./images/multilabel-jerarquia.png" alt="drawing" width="500"/>

In [None]:
class MultiLabelResNet(torch.nn.Module):
    def __init__(self, backbone_model, num_features):
        super(MultiLabelResNet, self).__init__()
        self.backbone_model = backbone_model

        num_ftrs = self.backbone_model.fc.in_features
        self.backbone_model.fc = torch.nn.Identity()
        
        # Three independent classification heads
        self.head_artist = torch.nn.Linear(num_ftrs, num_features[0])
        self.head_genre = torch.nn.Linear(num_ftrs, num_features[1])
        self.head_style = torch.nn.Linear(num_ftrs, num_features[2])

    def forward(self, x):
        features = self.backbone_model(x)  # Get features from ResNet
        
        out1 = self.head_artist(features)
        out2 = self.head_genre(features)
        out3 = self.head_style(features)
        
        return out1, out2, out3

In [None]:
df_dataset_multilabeled = df_dataset_labeled.copy()
df_dataset_multilabeled['multilabels'] = df_dataset_labeled.apply(lambda row: [row['new_artist_label'], row['new_genre_label'], row['new_style_label']], axis=1)
df_dataset_multilabeled

In [None]:
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx)

dataset = CustomImageDataset(df_dataset_multilabeled, target_name="multilabels", transform=data_transforms, 
                             target_transform=lambda x: torch.tensor(x, dtype=torch.int64))
batch_size = 32

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)

dataloaders = {
    'train': train_loader,
    'val': valid_loader
}

dataset_lens = {
    'train': len(train_idx),
    'val': len(valid_idx)
}

In [None]:
train_features, train_labels = next(iter(train_loader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0]
label = train_labels[0]
img = transforms.ToPILImage()(img)
print(f"Label: {label}")
img

In [None]:
train_labels

In [None]:
train_labels[:, 0]

In [None]:
[len(dataset_artist_values), len(dataset_genres_values), len(dataset_style_values)]

In [None]:
resnet_model = models.resnet18(pretrained=True)
device = torch.device("cpu")

model = MultiLabelResNet(resnet_model, [len(dataset_artist_values), len(dataset_genres_values), len(dataset_style_values)])

# Freeze base layers
# for param in model.parameters():
#     param.requires_grad = False

model = model.to(device)

criterion_artists = torch.nn.CrossEntropyLoss()
criterion_genres = torch.nn.CrossEntropyLoss()
criterion_styles = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train_model(model, criterion_artists, criterion_genres, criterion_styles, optimizer, num_epochs=3):
    since = time.time()
    for epoch in range(num_epochs):
        print(f"\n\n-------\nEpoch {epoch+1}/{num_epochs}")
        for phase in ['train', 'val']:
            if phase == 'val':
                torch.save(model.state_dict(), f"./data/resnet18_artwiki_model_state_multilabel_epoch_{epoch+1}.pth")
            model.train() if phase == 'train' else model.eval()
            running_loss_artist = 0.0
            running_loss_genre = 0.0
            running_loss_style = 0.0
            running_corrects_artist = 0.0
            running_corrects_genre = 0.0
            running_corrects_style = 0.0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    out_artist, out_genre, out_style = model(inputs)
                    loss_artist = criterion_artists(out_artist, labels[:, 0])
                    loss_genre = criterion_genres(out_genre, labels[:, 1])
                    loss_style = criterion_styles(out_style, labels[:, 2])
                    loss_total = loss_artist + loss_genre + loss_style

                    if phase == 'train':
                        (loss_total).backward()
                        optimizer.step()

                running_loss_artist += loss_artist.item() * inputs.size(0)
                running_corrects_artist += torch.sum(torch.argmax(out_artist, 1) == labels[:, 0].data)

                running_loss_genre += loss_genre.item() * inputs.size(0)
                running_corrects_genre += torch.sum(torch.argmax(out_genre, 1) == labels[:, 1].data)

                running_loss_style += loss_style.item() * inputs.size(0)
                running_corrects_style += torch.sum(torch.argmax(out_style, 1) == labels[:, 2].data)

            epoch_loss_artist = running_loss_artist / dataset_lens[phase]
            epoch_acc_artist = running_corrects_artist.double() / dataset_lens[phase]
            print(f"{phase} Artist Loss: {epoch_loss_artist:.4f} Acc: {epoch_acc_artist:.4f}")

            epoch_loss_genre = running_loss_genre / dataset_lens[phase]
            epoch_acc_genre = running_corrects_genre.double() / dataset_lens[phase]
            print(f"{phase} Genre Loss: {epoch_loss_genre:.4f} Acc: {epoch_acc_genre:.4f}")

            epoch_loss_style = running_loss_style / dataset_lens[phase]
            epoch_acc_style = running_corrects_style.double() / dataset_lens[phase]
            print(f"{phase} Style Loss: {epoch_loss_style:.4f} Acc: {epoch_acc_style:.4f}")

    print(f"Training complete in {(time.time() - since):.0f}s")

In [None]:
num_epochs = 10
train_model(model, criterion_artists, criterion_genres, criterion_styles, optimizer, num_epochs=num_epochs)