In [3]:
import pandas as pd

csv_path = '/content/annotations.csv'  # Update this path if needed
df = pd.read_csv(csv_path)

# Filter only training partition
train_df = df[df['partition'] == 'train']


In [4]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.data = dataframe.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform
        self.class2idx = {k: i for i, k in enumerate(self.data['class'].unique())}
        self.subclass2idx = {k: i for i, k in enumerate(self.data['sub_class'].unique())}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        path = self.data.iloc[idx]['path_rgb_original']
        if path.startswith("RGB/"):
            path = path[4:]
        img_path = os.path.join(self.root_dir, path)
        image = Image.open(img_path).convert('RGB')
        label_class = self.class2idx[self.data.iloc[idx]['class']]
        label_subclass = self.subclass2idx[self.data.iloc[idx]['sub_class']]
        if self.transform:
            image = self.transform(image)
        return image, label_class, label_subclass


In [5]:
from torchvision import transforms
from torch.utils.data import DataLoader

root_dir = '/content/drive/MyDrive/dataset'
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Use only training rows
dataset = CustomDataset(dataframe=train_df, root_dir=root_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)


In [6]:
import torchvision.models as models
import torch.nn as nn

class MultiOutputResNet(nn.Module):
    def __init__(self, num_classes, num_subclasses):
        super().__init__()
        self.base = models.resnet18(weights='IMAGENET1K_V1')
        self.base.fc = nn.Identity()
        self.fc_class = nn.Linear(512, num_classes)
        self.fc_subclass = nn.Linear(512, num_subclasses)

    def forward(self, x):
        features = self.base(x)
        out_class = self.fc_class(features)
        out_subclass = self.fc_subclass(features)
        return out_class, out_subclass

num_classes = len(dataset.class2idx)
num_subclasses = len(dataset.subclass2idx)
model = MultiOutputResNet(num_classes, num_subclasses)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 121MB/s]


In [8]:
import torch.nn.functional as F
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10  # adjust as needed

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    num_batches = 0
    for images, labels_class, labels_subclass in dataloader:
        images = images.to(device)
        labels_class = labels_class.to(device)
        labels_subclass = labels_subclass.to(device)

        optimizer.zero_grad()
        out_class, out_subclass = model(images)
        loss_class = F.cross_entropy(out_class, labels_class)
        loss_subclass = F.cross_entropy(out_subclass, labels_subclass)
        loss = loss_class + loss_subclass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        num_batches += 1

    avg_loss = running_loss / num_batches
    print(f"Epoch {epoch+1}/{num_epochs} completed - Loss: {avg_loss:.4f}")

# Save the model weights
torch.save(model.state_dict(), '/content/drive/MyDrive/dataset/model_weights2.pth')
print("Model weights saved!")



Epoch 1/10 completed - Loss: 2.4642
Epoch 2/10 completed - Loss: 1.5350
Epoch 3/10 completed - Loss: 0.7390
Epoch 4/10 completed - Loss: 0.2636
Epoch 5/10 completed - Loss: 0.1220
Epoch 6/10 completed - Loss: 0.0669
Epoch 7/10 completed - Loss: 0.0513
Epoch 8/10 completed - Loss: 0.0739
Epoch 9/10 completed - Loss: 0.1037
Epoch 10/10 completed - Loss: 0.0871
Model weights saved!


In [10]:
import torch
from torchvision import transforms
from PIL import Image
import pandas as pd
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1. Get image path
#csv = pd.read_csv('/content/annotations.csv')
#row = csv[(csv['partition'] == 'test')].iloc[0]
#img_path = '/content/drive/MyDrive/dataset/' + row['path_rgb_original'][6:]
img_path = '/content/sofia.jpg'
print("Testing image path:", img_path)

# 2. Define transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 3. Load and preprocess image
image = Image.open(img_path).convert('RGB')
image_tensor = transform(image).unsqueeze(0).to(device)

# 4. Load model
model = MultiOutputResNet(num_classes, num_subclasses)
model.load_state_dict(torch.load('/content/drive/MyDrive/dataset/model_weights.pth', map_location=device))
model.to(device)
model.eval()

# 5. Inference
with torch.no_grad():
    out_class, out_subclass = model(image_tensor)
    pred_class = torch.argmax(out_class, dim=1).item()
    pred_subclass = torch.argmax(out_subclass, dim=1).item()

# 6. Map to names
idx2class = {v: k for k, v in dataset.class2idx.items()}
idx2subclass = {v: k for k, v in dataset.subclass2idx.items()}

print("Predicted class:", idx2class[pred_class])
print("Predicted subclass:", idx2subclass[pred_subclass])


Testing image path: /content/sofia.jpg
Predicted class: autunno
Predicted subclass: soft


In [None]:
# Make sure your model class and mappings are defined as during training
model = MultiOutputResNet(num_classes, num_subclasses)
model.load_state_dict(torch.load('/content/drive/MyDrive/dataset/model_weights.pth', map_location=device))
model.to(device)
model.eval()


MultiOutputResNet(
  (base): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tr

In [None]:
with torch.no_grad():
    out_class, out_subclass = model(image_tensor)
    pred_class_idx = torch.argmax(out_class, dim=1).item()
    pred_subclass_idx = torch.argmax(out_subclass, dim=1).item()


Predicted class: autunno
Predicted subclass: deep
