In [6]:
!pip install torchvision

60.88s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Collecting torchvision
  Downloading torchvision-0.21.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.1 kB)
Downloading torchvision-0.21.0-cp310-cp310-macosx_11_0_arm64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torchvision
Successfully installed torchvision-0.21.0


In [53]:
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, random_split, DataLoader
from sklearn.preprocessing import LabelEncoder

In [54]:
using_pretrained_model = False

# Data Pre Processing


In [55]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

all_labels = [os.path.basename(os.path.dirname(os.path.dirname(p))) for p in image_paths]
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

class NutritionDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform
        all_labels = [os.path.basename(os.path.dirname(os.path.dirname(p))) for p in image_paths]
        label_encoder = LabelEncoder()
        label_encoder.fit(all_labels)
        self.encoder = label_encoder

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label_str = os.path.basename(os.path.dirname(os.path.dirname(img_path)))
        label = self.encoder.transform([label_str])[0]

        if self.transform:
            image = self.transform(image)
        else:
            image = transform(image)

        return image, label

In [70]:
# Create images paths
print("Creating image paths list...")
dir_path = 'raw_images'

image_paths = []

for dish_dir in os.listdir(dir_path):
    img_dir = os.path.join(dir_path, dish_dir, 'frames_sampled30')

    if not os.path.exists(img_dir):
        continue
    
    for image in os.listdir(img_dir):
        img_path = os.path.join(img_dir, image)
        image_paths.append(img_path)
import random

# Reduce dataset size for quicker experimentation
# random.seed(42)
# image_paths = random.sample(image_paths, min(10000, len(image_paths)))  # adjust 1000 to a smaller number if needed

print(len(image_paths))

Creating image paths list...
54845


In [63]:
# Calculating statistics (DO NOT RUN! WILL TAKE FOREVER)
# stats_dataset = NutritionDataset(image_paths)
# data_loader = DataLoader(stats_dataset, batch_size=32, shuffle=False)

# mean = 0.0
# std = 0.0
# total_images_count = 0

# for images, _ in data_loader:
#     batch_samples = images.size(0)
#     images = images.view(batch_samples, images.size(1), -1)
#     mean += images.mean(dim=2).sum(dim=0)
#     std += images.std(dim=2).sum(dim=0)
#     total_images_count += batch_samples

# mean /= total_images_count
# std /= total_images_count

# print(f"Mean: {mean}")
# print(f"Std: {std}")

In [64]:
# Transforms/Data Augmentation

input_size = (225, 225)

if using_pretrained_model:
    # Replace with the pretrained model's stats
    data_normals = {
        'mean': [0.485, 0.456, 0.406],
        'std': [0.229, 0.224, 0.225]
    }
else:
    # mean and std of the entire dataset
    data_normals = {
        'mean': [0.5005, 0.4726, 0.3732],
        'std': [0.2193, 0.2296, 0.2398]
    }
    

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(input_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.1
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=data_normals['mean'],
        std=data_normals['std']
    )
])

test_transform = transforms.Compose([
    transforms.Resize(input_size),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=data_normals['mean'],
        std=data_normals['std']
    )
])

In [65]:
train_size = int(0.7 * len(image_paths))
test_size = len(image_paths) - train_size
train_imgs, test_imgs = random_split(image_paths, [train_size, test_size])

train_set = NutritionDataset(train_imgs, transform=train_transform)
test_set = NutritionDataset(test_imgs, transform=test_transform)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

# Checking Train loader
print("Train Loader...")
for i, (images, labels) in enumerate(train_loader):
    if i == 5:
        break
    print(images.shape, len(labels))

# Checking Test loader
print("Test Loader...")
for i, (images, labels) in enumerate(test_loader):
    if i == 5:
        break
    print(images.shape, len(labels))




Train Loader...
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
Test Loader...
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32
torch.Size([32, 3, 225, 225]) 32


In [66]:
import torch
import numpy as np
from tqdm import tqdm
def extract_features(dataloader):
    features = []
    labels = []
    with torch.no_grad():
        for images, lbls in tqdm(dataloader):
            flat_imgs = images.view(images.size(0), -1)
            features.append(flat_imgs.numpy())
            labels.append(lbls.numpy())
    features = np.concatenate(features)
    labels = np.concatenate(labels)
    return features, labels

In [42]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

train_features, train_labels = extract_features(train_loader)
test_features, test_labels = extract_features(test_loader)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(train_features, train_labels)
test_preds = knn.predict(test_features)
acc = accuracy_score(test_labels, test_preds)

print(f"Test Accuracy with kNN: {acc:.4f}")


100%|██████████| 1200/1200 [09:31<00:00,  2.10it/s]
100%|██████████| 515/515 [04:19<00:00,  1.99it/s]


Test Accuracy with kNN: 0.0009


In [43]:
import pickle

with open('knn.pkl', 'wb') as loc:
    pickle.dump(knn, loc)

KeyboardInterrupt: 

In [None]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
temp = set()
i = 1
for val, label in train_set:
    if i % 1000 == 0:
        print(i)
    i += 1
    temp.add(label)

num_classes = len(temp)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

epochs = 20
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    i = 0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)
        i += 1
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        temp, preds = outputs.max(1)
        correct += (preds == labels).sum().item()

    train_acc = correct / len(train_set)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}, Train Accuracy: {train_acc:.4f}")

model.eval()
correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()

test_acc = correct / len(test_set)
print(f"Test Accuracy with ResNet: {test_acc:.4f}")


In [None]:
import pickle

with open('resnet.pkl', 'wb') as loc:
    pickle.dump(model, loc)

7000
