In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
import os
import pandas as pd

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
class NutritionNet(nn.Module):
    def __init__(self):
        super(NutritionNet, self).__init__()
        self.resnet = models.resnet50(pretrained=True)  # Using a larger ResNet model for more complexity
        self.resnet.fc = nn.Sequential(
            nn.Linear(self.resnet.fc.in_features, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 5)  # Predicting 5 outputs: calories, mass, fat, carbs, protein
        )

    def forward(self, x):
        x = self.resnet(x)
        return x

In [18]:
from torch.utils.data import DataLoader, Dataset, TensorDataset

# Step 3: Preprocess the Data
image_path = '../data/nutrition5k_revised/images/'
meta_path = '../data/nutrition5k_revised/metadata.csv'
nutrition_label_path = '../data/nutrition5k_revised/labels/nutrition.csv'

images, labels = [], []

# Load metadata and labels
data = []
with open(meta_path, 'r') as f:
    lines = f.readlines()
    for line in lines:
        data.append(line.strip())

nutrition_labels = pd.read_csv(nutrition_label_path)

for idx in data:
    img = Image.open(os.path.join(image_path, idx, 'rgb.png')).convert('RGB')
    img = transform(img)
    images.append(img)

labels = nutrition_labels.iloc[:, 1:].values.astype(float)

# Convert to tensor
images = torch.stack(images)
labels = torch.tensor(labels, dtype=torch.float32)

# Split data into training and testing
train_images, test_images = images[:2500], images[2500:]
train_labels, test_labels = labels[:2500], labels[2500:]

# Data Loader
train_dataset = TensorDataset(train_images, train_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [21]:
# Step 5: Train the model
model = NutritionNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item()}')


Epoch [1/10], Step [1/79], Loss: 27345.541015625
Epoch [1/10], Step [2/79], Loss: 62603.8125
Epoch [1/10], Step [3/79], Loss: 27436.15234375
Epoch [1/10], Step [4/79], Loss: 32733.009765625
Epoch [1/10], Step [5/79], Loss: 40739.6640625
Epoch [1/10], Step [6/79], Loss: 16034.5263671875
Epoch [1/10], Step [7/79], Loss: 14706.298828125
Epoch [1/10], Step [8/79], Loss: 35345.19921875


KeyboardInterrupt: 