# Preprocessing image for Food classification training

In [1]:
import shutil # copy, move file
import os # miscellaneous operation system interfaces
import pathlib
import pandas as pd
from tqdm import tqdm
import cv2
import pandas as pd
import numpy as np


In [2]:
!pip install pandarallel
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True,verbose=0,nb_workers=3)




In [3]:
%cd ../

/home/jupyter/FoodDetector


In [4]:
print(os.getcwd())
root_path = os.getcwd()

/home/jupyter/FoodDetector


# Download image from OpenImage

Enter a script below to terminal in same folder of this file.

``python openimages/src/openimages/download.py --csv_dir csv_dir --base_dir images --format darknet --labels Apple Beer Cucumber Radish Pancake Waffle Bagel Popcorn Burrito Cheese Muffin Snack Juice Cookie Cocktail Guacamole Coffee Food Fruit Grape Milk Mushroom Pizza Seafood Sushi Tea Taco Strawberry Tomato Wine Cream Bread Lemon Banana Hamburger Orange Peach Coconut Vegetable Cabbage Carrot Mango Pineapple Cake Honeycomb Candy Salad Grapefruit Turkey Doughnut Sandwich Watermelon 'Fast food' 'Hot dog' 'French fries' 'Ice cream' 'Egg (Food)' 'Pizza cutter' 'Submarine sandwich' 'Bell pepper'
``

In [33]:
open_images_directory = 'datasets/images'
open_images_path = os.path.join(root_path,open_images_directory)
p = pathlib.Path(open_images_path)

labels = [x.name for x in p.iterdir() if x.is_dir()]

In [34]:
data_labels = []
data_paths = []
for label in labels:
    p_label = pathlib.Path(os.path.join(open_images_path,label))
    paths = list(p_label.glob('**/*.jpg'))
    for path in paths:
        data_labels.append(label)
        data_paths.append(path)

data = {'Label': data_labels, 'Path': data_paths}
data_df = pd.DataFrame(data, columns=['Label','Path'])
print(data_df.sample(5))


           Label                                               Path
3115    cocktail  /home/jupyter/FoodDetector/datasets/images/coc...
9200      tomato  /home/jupyter/FoodDetector/datasets/images/tom...
23992  pineapple  /home/jupyter/FoodDetector/datasets/images/pin...
19549      bread  /home/jupyter/FoodDetector/datasets/images/bre...
5393      carrot  /home/jupyter/FoodDetector/datasets/images/car...


In [35]:
data_df.shape[0]

24055

In [36]:
data_df['Label'].nunique()

46

In [37]:
data_df['Label'].value_counts()

beer            3399
cocktail        1973
cake            1910
bread           1708
ice cream       1066
coffee           930
orange           883
juice            847
tomato           759
wine             686
strawberry       684
candy            682
sushi            663
banana           589
pizza            588
lemon            540
cookie           532
carrot           499
mushroom         471
cucumber         468
hamburger        443
apple            393
sandwich         335
tea              292
cheese           284
grapefruit       249
doughnut         198
bagel            155
french fries     144
watermelon       131
turkey           129
pineapple        119
honeycomb        116
mango            115
pancake          113
taco             111
grape            108
milk             107
peach             99
waffle            97
burrito           82
cream             79
cabbage           76
coconut           75
hot dog           72
popcorn           56
Name: Label, dtype: int64

# Preprocessing UECFOOD256 dataset

In [38]:
uecfood256_directory = 'datasets/UECFOOD256crop'
uecfood256_path = os.path.join(root_path,uecfood256_directory)
print(uecfood256_path)

/home/jupyter/FoodDetector/datasets/UECFOOD256crop


In [39]:
# !unzip -qq datasets/UECFOOD256crop.zip > UECFOOD256crop

In [40]:
uecfood256_zip_path = os.path.join(root_path,'datasets/UECFOOD256crop.zip')
if not os.path.isdir(uecfood256_path):
    print(f"{uecfood256_path} does not exist!")
    !unzip -qq $uecfood256_zip_path -d $uecfood256_path
else:
    print(f"{uecfood256_path} exist!")

/home/jupyter/FoodDetector/datasets/UECFOOD256crop exist!


In [41]:
root_uecfood256_directory = os.path.join(uecfood256_path,'UECFOOD256')
print(root_uecfood256_directory)

/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256


In [42]:
root_uecfood256_directory_path = pathlib.Path(root_uecfood256_directory)
root_uecfood256_directory_path

PosixPath('/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256')

In [43]:
category_path_txt = os.path.join(root_path, 'datasets/UECFOOD256/category.txt')
category_path_txt

'/home/jupyter/FoodDetector/datasets/UECFOOD256/category.txt'

In [44]:
category_df = pd.read_csv(category_path_txt, sep='	', header=0, names=['id', 'name'])
category_df.nunique()

id      256
name    255
dtype: int64

In [45]:
def get_path(row):
    id = str(row[0])
    path = os.path.join(root_uecfood256_directory_path, str(id))
    p_label = pathlib.Path(path)
    paths = list(p_label.glob('**/*.jpg'))
    
    return paths

category_df["Path"] =  category_df.parallel_apply(get_path, axis=1)


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=86), Label(value='0 / 86'))), HBox…

In [46]:
category_df.sample(5)

Unnamed: 0,id,name,Path
2,3,pilaf,[/home/jupyter/FoodDetector/datasets/UECFOOD25...
15,16,chip butty,[/home/jupyter/FoodDetector/datasets/UECFOOD25...
123,124,samul,[/home/jupyter/FoodDetector/datasets/UECFOOD25...
33,34,sauteed spinach,[/home/jupyter/FoodDetector/datasets/UECFOOD25...
30,31,sauteed vegetables,[/home/jupyter/FoodDetector/datasets/UECFOOD25...


In [47]:
category_df['name'].value_counts()

omelet                    2
bean curd family style    1
waffle                    1
lemon fig jelly           1
bagel                     1
                         ..
malasada                  1
natto                     1
coconut milk soup         1
rice gratin               1
teriyaki grilled fish     1
Name: name, Length: 255, dtype: int64

In [48]:
def unnest(df, col, reset_index=False):
    import pandas as pd
    col_flat = pd.DataFrame([[i, x] 
                       for i, y in df[col].apply(list).iteritems() 
                           for x in y], columns=['I', col])
    col_flat = col_flat.set_index('I')
    df = df.drop(col, 1)
    df = df.merge(col_flat, left_index=True, right_index=True)
    if reset_index:
        df = df.reset_index(drop=True)
    return df

category_df = unnest(category_df,'Path',True)
category_df.head(10)

Unnamed: 0,id,name,Path
0,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
1,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
2,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
3,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
4,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
5,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
6,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
7,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
8,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
9,1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...


In [49]:
category_df.shape[0]

31402

In [50]:
category_df.rename(columns={'name':'Label'}, inplace=True)
category_df.drop(columns=['id'], inplace=True)


In [51]:
category_df.tail(5)

Unnamed: 0,Label,Path
31397,hot & sour soup,/home/jupyter/FoodDetector/datasets/UECFOOD256...
31398,hot & sour soup,/home/jupyter/FoodDetector/datasets/UECFOOD256...
31399,hot & sour soup,/home/jupyter/FoodDetector/datasets/UECFOOD256...
31400,hot & sour soup,/home/jupyter/FoodDetector/datasets/UECFOOD256...
31401,hot & sour soup,/home/jupyter/FoodDetector/datasets/UECFOOD256...


In [52]:
category_df['Label'].value_counts()

miso soup          728
rice               627
ramen noodle       353
green salad        342
beef curry         246
                  ... 
parfait            100
chop suey          100
green curry        100
chicken rice       100
Pork with lemon    100
Name: Label, Length: 255, dtype: int64

In [54]:
category_df.to_csv(os.path.join(root_path,'FoodClassification/UECFOOD256crop_food_labels_paths.csv'), index=False)

In [55]:
data_df = pd.concat([data_df,category_df],ignore_index = True)
# data_df = category_df
data_df.nunique()

Label      291
Path     55457
dtype: int64

In [56]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55457 entries, 0 to 55456
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Label   55457 non-null  object
 1   Path    55457 non-null  object
dtypes: object(2)
memory usage: 866.6+ KB


# Preprocessing food-101 

In [15]:
print(os.getcwd())

/home/jupyter/FoodDetector


In [16]:
food101_directory = 'datasets/food-101'
food101_path = os.path.join(root_path,food101_directory)
print(food101_path)
if not os.path.isdir(food101_path):
    print(f"{food101_path} does not exist!")
    !tar -xf food-101.tar.gz
else:
    print(f"{food101_path} exist!")

/home/jupyter/FoodDetector/datasets/food-101
/home/jupyter/FoodDetector/datasets/food-101 exist!


In [17]:
food101_meta_path = os.path.join(food101_path,'meta')
food101_classes_path = os.path.join(food101_meta_path,'classes.txt')
food101_labels_path = os.path.join(food101_meta_path,'labels.txt')
food101_images_path = os.path.join(food101_path,'images')
print(food101_classes_path)
print(food101_labels_path)
print(food101_images_path)



/home/jupyter/FoodDetector/datasets/food-101/meta/classes.txt
/home/jupyter/FoodDetector/datasets/food-101/meta/labels.txt
/home/jupyter/FoodDetector/datasets/food-101/images


In [30]:
labels_df = pd.read_csv(food101_labels_path,header=0,names=['Label'])
print(labels_df.shape[0])
classes_df = pd.read_csv(food101_classes_path,header=0,names=['Classes'])
print(classes_df.shape[0])

food101_df = pd.concat([labels_df,classes_df],axis=1)
food101_df.sample(5)

100
100


Unnamed: 0,Label,Classes
92,Steak,steak
26,Creme brulee,creme_brulee
5,Beignets,beignets
50,Guacamole,guacamole
31,Dumplings,dumplings


In [31]:
def get_path(row):
    classes = str(row[1])
    path = os.path.join(food101_images_path, classes)
    p_label = pathlib.Path(path)
    paths = list(p_label.glob('**/*.jpg'))
    return paths

food101_df["Classes"] =  food101_df.parallel_apply(get_path, axis=1)
food101_df = unnest(food101_df,"Classes",True)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=34), Label(value='0 / 34'))), HBox…

In [32]:
food101_df.rename(columns={'Classes':'Path'}, inplace=True)
food101_df.tail(5)

Unnamed: 0,Label,Path
99995,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
99996,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
99997,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
99998,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
99999,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...


In [33]:
print(food101_df.shape[0])
print(data_df.shape[0])
data_df = pd.concat([data_df,food101_df],ignore_index=True)
print(data_df.shape[0])

100000
31395
131395


In [34]:
data_df.tail(5)

Unnamed: 0,Label,Path
131390,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
131391,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
131392,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
131393,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...
131394,Waffles,/home/jupyter/FoodDetector/datasets/food-101/i...


In [57]:
data_df.to_csv(os.path.join(root_path,'FoodClassification/food_labels_paths.csv'), index=False)