# Data Preprocessing
This notebook is used to preprocess the data (e.g.: extracting golden labels, generating image captions)

## Food101

In [38]:
import pandas as pd
import json
import os
import time
import random
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

In [39]:
import skimage.io as io
from PIL import Image 

In [40]:
food101_imgpath = 'Food101/food-101/images'
food101_train_json = 'Food101/food-101/meta/train.json'
food101_test_json = 'Food101/food-101/meta/test.json'



In [41]:
import json

with open(food101_train_json, 'r') as f:
    train_data_ann = json.load(f)

with open(food101_test_json, 'r') as f:
    test_data_ann = json.load(f)

print("Number of classes in train and test dataset")
print(len(train_data_ann.keys()))
print(len(test_data_ann.keys()))

print("Number of images in train and test dataset")
num_train = 0
num_test = 0

for k in train_data_ann.keys():
    num_train += len(train_data_ann[k])

for k in test_data_ann.keys():
    num_test += len(test_data_ann[k])

print(num_train)
print(num_test)


Number of classes in train and test dataset
101
101
Number of images in train and test dataset
75750
25250


In [42]:
# What is stored in the ann file:
print(train_data_ann.keys())
print(test_data_ann['churros'])

dict_keys(['churros', 'hot_and_sour_soup', 'samosa', 'sashimi', 'pork_chop', 'spring_rolls', 'panna_cotta', 'beef_tartare', 'greek_salad', 'foie_gras', 'tacos', 'pad_thai', 'poutine', 'ramen', 'pulled_pork_sandwich', 'bibimbap', 'beignets', 'apple_pie', 'crab_cakes', 'risotto', 'paella', 'steak', 'baby_back_ribs', 'miso_soup', 'frozen_yogurt', 'club_sandwich', 'carrot_cake', 'falafel', 'bread_pudding', 'chicken_wings', 'gnocchi', 'caprese_salad', 'creme_brulee', 'escargots', 'chocolate_cake', 'tiramisu', 'spaghetti_bolognese', 'mussels', 'scallops', 'baklava', 'edamame', 'macaroni_and_cheese', 'pancakes', 'garlic_bread', 'beet_salad', 'onion_rings', 'red_velvet_cake', 'grilled_salmon', 'chicken_curry', 'deviled_eggs', 'caesar_salad', 'hummus', 'fish_and_chips', 'lasagna', 'peking_duck', 'guacamole', 'strawberry_shortcake', 'clam_chowder', 'croque_madame', 'french_onion_soup', 'beef_carpaccio', 'fried_rice', 'donuts', 'gyoza', 'ravioli', 'fried_calamari', 'spaghetti_carbonara', 'french_

In [43]:
for churros in test_data_ann['churros'][:10]:
    print(io.imread(f'Food101/food-101/images/{churros}.jpg'))

[[[203 239 253]
  [205 241 255]
  [207 243 255]
  ...
  [ 71  80 137]
  [ 73  79 137]
  [ 77  83 141]]

 [[203 239 253]
  [204 240 254]
  [205 241 255]
  ...
  [ 75  84 141]
  [ 78  84 142]
  [ 81  87 145]]

 [[203 239 253]
  [203 239 253]
  [202 238 252]
  ...
  [ 77  87 146]
  [ 79  88 147]
  [ 82  91 150]]

 ...

 [[229 220 225]
  [228 219 224]
  [226 220 222]
  ...
  [ 44  61 105]
  [ 42  57 100]
  [ 41  54  98]]

 [[222 216 228]
  [223 217 229]
  [224 218 230]
  ...
  [ 48  68 119]
  [ 45  63 113]
  [ 44  60 109]]

 [[238 234 249]
  [239 235 250]
  [241 237 252]
  ...
  [ 53  74 129]
  [ 49  69 122]
  [ 48  65 117]]]
[[[187 105   3]
  [188 106   4]
  [188 106   6]
  ...
  [100  47   3]
  [101  48   4]
  [101  48   4]]

 [[188 106   4]
  [190 108   6]
  [191 109   9]
  ...
  [ 99  46   2]
  [100  47   3]
  [101  48   4]]

 [[187 106   1]
  [188 107   2]
  [188 109   6]
  ...
  [ 98  45   1]
  [ 99  46   2]
  [100  47   3]]

 ...

 [[174 108  11]
  [163  98   0]
  [162  97   0]
  ..

In [70]:
# Check an actual image
img = Image.open(f'{os.path.join(food101_imgpath, 'samosa/1027474.jpg')}')
img.show()

#### Create new annotations with captions

annotation format: <br>
{<br>
  image_id (name of image without .jpg): <br>
      { <br>
    'label': the image category, <br>
    'filepath': the filepath of the image <br>
    'caption': generated caption to describe the image <br>
   } <br>
   ...<br>
}

In [45]:
train_full_annotation = dict()

for label, filepaths in train_data_ann.items():
    for filepath in filepaths:
        img_id = filepath.replace(f"{label}/", "")
        img_id = int(img_id)

        train_full_annotation[img_id] = {
            "img_id": img_id,
            "label": label,
            "filepath": f"{food101_imgpath}/{filepath}.jpg",
            "caption": None
        }

In [46]:
test_full_annotation = dict()

for label, filepaths in test_data_ann.items():
    for filepath in filepaths:
        img_id = filepath.replace(f"{label}/", "")
        img_id = int(img_id)
        
        test_full_annotation[img_id] = {
            "img_id": img_id,
            "label": label,
            "filepath": f"{food101_imgpath}/{filepath}.jpg",
            "caption": None
        }

In [47]:
for example_img in [k for k in train_full_annotation.keys()][:10]:
    print(train_full_annotation[example_img])

{'img_id': 1004234, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1004234.jpg', 'caption': None}
{'img_id': 1013460, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1013460.jpg', 'caption': None}
{'img_id': 1016791, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1016791.jpg', 'caption': None}
{'img_id': 102100, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/102100.jpg', 'caption': None}
{'img_id': 1025494, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1025494.jpg', 'caption': None}
{'img_id': 1029245, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1029245.jpg', 'caption': None}
{'img_id': 1029669, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1029669.jpg', 'caption': None}
{'img_id': 1031729, 'label': 'churros', 'filepath': 'Food101/food-101/images/churros/1031729.jpg', 'caption': None}
{'img_id': 1037793, 'label': 'churros', 'filepath': 'Food101/food-101/imag

In [48]:
print(len(train_full_annotation))
print(len(test_full_annotation))

75750
25250


In [49]:
json.dump(train_full_annotation, open('Food101/food-101/meta/train_full_annotation.json', 'w'))
json.dump(test_full_annotation, open('Food101/food-101/meta/test_full_annotation.json', 'w'))


#### Generate captions for each image

In [50]:
from transformers import BlipProcessor, BlipForConditionalGeneration

In [51]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")