In [1]:
from preprocessing.preprocessing import image_scaling, convert_image, label_conversion
import torch
import torchinfo
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
import os
import json
import gc
from PIL import Image
import numpy as np
import timm
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


In [2]:
image_folder = 'images'
BATCH_SIZE = 32
# Load calorie database (for food labels)
CALORIE_DB_FILE = os.path.join('preprocessing', "calories_database.json")
with open(CALORIE_DB_FILE, "r") as f:
    calorie_db = json.load(f)

FOOD_LABELS = sorted(list(calorie_db.keys()))
NUM_CLASSES = len(FOOD_LABELS)


In [4]:
# Define Custom Dataset
class FoodPortionDataset(Dataset):
    def __init__(self, json_path, img_dir, transform=None):
        with open(json_path, "r") as f:
            self.data = json.load(f)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        img_path = os.path.join(self.img_dir, item["name"] + ".png")
        image = Image.open(img_path).convert("RGB")

        # Convert food types to a one-hot encoded vector
        food_vector = torch.zeros(NUM_CLASSES)
        portion_vector = torch.zeros(NUM_CLASSES)

        for food, portion in zip(item["food type"], item["portion"]):
            if food in FOOD_LABELS:
                food_idx = FOOD_LABELS.index(food)
                food_vector[food_idx] = 1  # Detected food category
                portion_vector[food_idx] = float(portion)  # Ground truth portion size

        if self.transform:
            image = self.transform(image)

        return image, food_vector, portion_vector  # Return image, detected foods, and portion sizes

In [None]:
# Define Data Transforms
transform = transforms.Compose([
    transforms.Resize((400, 400)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load Datasets
train_dataset = FoodPortionDataset(json_path=os.path.join('preprocessing', "train.json"), 
                            img_dir="images_resized",
                            transform=transform)
test_dataset = FoodPortionDataset(json_path=os.path.join('preprocessing', "test.json"), 
                          img_dir="images_resized",
                          transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
class CustomModel(nn.Module):
    def __init__(self, model, num_classes):
        super(CustomModel, self).__init__()
        self.model = timm.create_model('resnet34', pretrained=True, num_classes=32)
        self.fc_food = nn.Linear(num_classes, 16)
        self.fc_combine = model
    
    def forward(self, img, food_vec):
        model_out = self.model(img)
        food_vec_embed = self.fc_food(food_vec)
        concat = torch.cat((model_out, food_vec_embed), dim=1)
        return self.fc_combine(concat)

In [None]:
def train(model, trainloader, optimizer, criterion, device, epochs=10):
    min_loss = np.inf
    for epoch in range(epochs):
        running_loss = 0
        model.train()
        for images, labels, portions in trainloader:
            images, labels, portions = images.to(device), labels.to(device).float(), portions.to(device).float()
            optimizer.zero_grad()
            out = model(images, labels)
            loss = criterion(out, portions)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # delete from memory
            del images
            del labels
            torch.cuda.empty_cache()
            gc.collect()
        loss = running_loss/len(train_loader)
        if loss < min_loss: min_loss = loss
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss}")
    return min_loss

In [8]:
def evaluate(model, testloader, device):
    model.eval() # Set the model to evaluation mode
    mse_loss = 0
    criterion = nn.MSELoss()
    total_size = 0
    with torch.no_grad():
        for images, labels, portions in testloader:
            images, labels, portions = images.to(device), labels.to(device).float(), portions.to(device).float()
            outputs = model(images, labels)
            print(f"real out: {portions}")
            print(f"model out: {outputs}")
            loss = criterion(outputs, portions)
            mse_loss += loss.item() * images.size(0)  # batch size
            total_size += images.size(0)

            del images
            del labels
            torch.cuda.empty_cache()
            gc.collect()

    return mse_loss / total_size

In [9]:
results = []

fc_models = [
    nn.Sequential(
        nn.Linear(16 + 32, 128),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 256),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 256),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(256, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 32),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(32, 32),
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 32),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 32),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(32, 128),
        nn.ReLU(),
        nn.Linear(128, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 64),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 64),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 64),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 128),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(128, 32),
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 128),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 128),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.ReLU(),
        nn.Linear(128, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 256),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(256, 32),
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 256),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(256, 64),
        nn.ReLU(),
        nn.Linear(64, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 256),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 32),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(32, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 64),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(64, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 128),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(128, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    ),
    nn.Sequential(
        nn.Linear(16 + 32, 256),  # Image features + food class vector
        nn.ReLU(),
        nn.Linear(256, NUM_CLASSES),  # Predict portion sizes for each food type
        nn.ReLU()
    )
]

for fc_model in fc_models:
    model = CustomModel(fc_model, NUM_CLASSES).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
    print(f"Training model with default ingredient vector")
    train_loss = train(model, train_loader, optimizer, criterion, device, epochs=20)
    print("Training complete")
    avg_loss = evaluate(model, test_loader, device)
    print(f"Average Loss: {avg_loss}")
    results.append({
        # 'model embed features': img_features,
        'best training loss': train_loss,
        'test avg loss': avg_loss
    })
    del model
    torch.cuda.empty_cache()
    gc.collect()

print('refer to index for type of model used')
results_df = pd.DataFrame(results)
results_df

Training model with default ingredient vector
Epoch 1/20, Loss: 726.9807281494141
Epoch 2/20, Loss: 696.6320223127093
Epoch 3/20, Loss: 681.3351015363421
Epoch 4/20, Loss: 687.3224051339286
Epoch 5/20, Loss: 676.5740214756557
Epoch 6/20, Loss: 671.793705531529
Epoch 7/20, Loss: 664.473870413644
Epoch 8/20, Loss: 642.718242100307
Epoch 9/20, Loss: 636.6130599975586
Epoch 10/20, Loss: 613.1898640223911
Epoch 11/20, Loss: 613.068728855678
Epoch 12/20, Loss: 591.5949358258929
Epoch 13/20, Loss: 558.6756627219064
Epoch 14/20, Loss: 525.68259538923
Epoch 15/20, Loss: 541.0177890232632
Epoch 16/20, Loss: 522.2557819911411
Epoch 17/20, Loss: 504.57474681309293
Epoch 18/20, Loss: 514.3509712219238
Epoch 19/20, Loss: 479.5446444920131
Epoch 20/20, Loss: 458.5270554678781
Training complete
real out: tensor([[  0.,   0.,   0.,   0.,  38.,   0.,   0.,  35.,   0.,   0.,   5.,  35.,
           0.,   0.,   0.,   0.,  16.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.],
        [  0.,  

Unnamed: 0,best training loss,test avg loss
0,458.527055,467.887873
1,260.589862,235.049065
2,287.82192,291.630475
3,305.132613,267.239719
4,390.142278,404.205053
5,318.987287,269.071572
6,262.692546,228.649336
7,332.335957,294.518729
8,394.77683,351.811865
9,389.149558,308.630373
