In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import transforms
import torchvision.models as models
from torchvision.transforms import ToTensor, Resize
import os
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder



First model "ConceptModel" (image(x) -> concept(c)) could just be a pretrained resnet. It should take an image as input and output a vector of size 112 representing the concepts (binary attributes)

In [None]:
class ConceptModel(nn.Module):
    def __init__(self):
        super(ConceptModel, self).__init__()
        # Pre-trained ResNet50
        self.base_model = models.resnet50(pretrained=True)
        self.base_model.fc = nn.Linear(self.base_model.fc.in_features, 112) #Updated last layer to 112
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.base_model(x)
        x = self.sigmoid(x)  # Sigmoid for probabilities of concept?
        return x

Second part of the model "PredictionModel" (concepts(c) -> prediction(y)) should take the output vector from the conceptmodel in the first layer.

In [None]:
class PredictionModel(nn.Module):
    def __init__(self):
        super(PredictionModel, self).__init__()
        self.fc1 = nn.Linear(112, 256)  # Concept vector as input in the first layer
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 200)  # Output layer for 200 bird species
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, c):
        c = self.relu(self.fc1(c))
        c = self.softmax(self.fc2(c))
        return c

## Bottleneck model (the two combined in one module)

In [None]:
class BottleneckModel(nn.Module):
    def __init__(self):
        super(BottleneckModel, self).__init__()
        self.concept_model = ConceptModel()
        self.prediction_model = PredictionModel()

    def forward(self, x):
        concepts = self.concept_model(x)
        predictions = self.prediction_model(concepts)
        return predictions

### Insert * NICE DATALOADER *

## Loss Function and Optimizer

I was thinking CrossEntropyLoss. Since the bottleneck model includes pre-trained components we might want to use different learning rates for different parts of the model? but I think it is maybe possible with PyTorch optimizers..

In [None]:
model = BottleneckModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
num_epochs = 10 

for epoch in range(num_epochs):
    model.train()  
    running_loss = 0.0
    
    for images, labels in train_loader: # placeholder ("train_loader")
        optimizer.zero_grad()  
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()  
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')