In [None]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, ConcatDataset
import matplotlib.pyplot as plt
import random
from PIL import Image
from tqdm import tqdm
import numpy as np

from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline

In [None]:
data_dir = './data'

train_dataset_pil = datasets.OxfordIIITPet(
    root=data_dir, split='trainval', download=True,
    transform=None, target_types='category'
)

test_dataset_pil = datasets.OxfordIIITPet(
    root=data_dir, split='test', download=True,
    transform=None, target_types='category'
)

In [None]:
def show_images(dataset, n=5):
    fig, axs = plt.subplots(1, n, figsize=(15, 5))
    for i in range(n):
        img, label = dataset[random.randint(0, len(dataset)-1)]
        axs[i].imshow(img)
        axs[i].set_title(f"Label: {label}")
        axs[i].axis('off')
    plt.show()

show_images(train_dataset_pil)

In [None]:
aug_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomRotation(20),
    transforms.ToTensor()
])

augmented_imgs = [aug_transforms(train_dataset_pil[i][0]) for i in range(5)]

fig, axs = plt.subplots(1, 5, figsize=(15, 5))
for i, img in enumerate(augmented_imgs):
    axs[i].imshow(img.permute(1,2,0))
    axs[i].axis('off')
plt.suptitle("Esempi di Data Augmentation")
plt.show()

In [None]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def generate_caption(image):
    inputs = processor(image, return_tensors="pt")
    out = blip_model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

for i in range(3):
    img, _ = train_dataset_pil[i]
    caption = generate_caption(img)
    print(f"Caption {i+1}: {caption}")

In [None]:
text_gen = pipeline("text2text-generation", model="google/flan-t5-small")

def generate_text_variants(caption, n=3):
    prompts = [f"Riscrivi la seguente descrizione in modo diverso: {caption}" for _ in range(n)]
    variants = [text_gen(prompt, max_length=50)[0]['generated_text'] for prompt in prompts]
    return variants

caption = generate_caption(train_dataset_pil[0][0])
variants = generate_text_variants(caption)
print("Caption originale:", caption)
print("Varianti:", variants)

In [None]:
print("Qui si genererebbero immagini sintetiche a partire dalle caption variate (richiede API o modelli esterni).")

In [None]:
augmented_dataset = []
for i in range(100):
    img, label = train_dataset_pil[i]
    aug_img = aug_transforms(img)
    augmented_dataset.append((aug_img, label))

class TensorDataset(torch.utils.data.Dataset):
    def __init__(self, tensor_tuples):
        self.data = tensor_tuples
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
train_dataset_tensor = datasets.OxfordIIITPet(
    root=data_dir, split='trainval', download=True,
    transform=train_transform, target_types='category'
)

extended_train_dataset = ConcatDataset([
    train_dataset_tensor,
    TensorDataset(augmented_dataset)
])

In [None]:
batch_size = 32

train_loader = DataLoader(train_dataset_tensor, batch_size=batch_size, shuffle=True)
extended_train_loader = DataLoader(extended_train_dataset, batch_size=batch_size, shuffle=True)

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
test_dataset = datasets.OxfordIIITPet(
    root=data_dir, split='test', download=True,
    transform=test_transform, target_types='category'
)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 37)
model = model.to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    return running_loss / len(loader.dataset)

In [None]:
def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    accuracy = correct / total
    from sklearn.metrics import precision_score, recall_score
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    print(f"Accuracy: {accuracy:.3f}, Precision: {precision:.3f}, Recall: {recall:.3f}")
    return accuracy, precision, recall

In [None]:
print("Training su dataset originale...")
for epoch in range(1):
    loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
    print(f"Epoca {epoch+1}, Loss: {loss:.4f}")

print("Valutazione su dataset originale:")
evaluate(model, test_loader, device)

In [None]:
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 37)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

print("Training su dataset esteso...")
for epoch in range(1):
    loss = train_one_epoch(model, extended_train_loader, criterion, optimizer, device)
    print(f"Epoca {epoch+1}, Loss: {loss:.4f}")

print("Valutazione su dataset esteso:")
evaluate(model, test_loader, device)