In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import csv
import ast

In [2]:
with open('imagenet1000_clsidx_to_labels.txt', 'r') as f:
    imagenet_classes = ast.literal_eval(f.read())

In [3]:
imagenet_classes

{0: 'tench, Tinca tinca',
 1: 'goldfish, Carassius auratus',
 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
 3: 'tiger shark, Galeocerdo cuvieri',
 4: 'hammerhead, hammerhead shark',
 5: 'electric ray, crampfish, numbfish, torpedo',
 6: 'stingray',
 7: 'cock',
 8: 'hen',
 9: 'ostrich, Struthio camelus',
 10: 'brambling, Fringilla montifringilla',
 11: 'goldfinch, Carduelis carduelis',
 12: 'house finch, linnet, Carpodacus mexicanus',
 13: 'junco, snowbird',
 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
 15: 'robin, American robin, Turdus migratorius',
 16: 'bulbul',
 17: 'jay',
 18: 'magpie',
 19: 'chickadee',
 20: 'water ouzel, dipper',
 21: 'kite',
 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
 23: 'vulture',
 24: 'great grey owl, great gray owl, Strix nebulosa',
 25: 'European fire salamander, Salamandra salamandra',
 26: 'common newt, Triturus vulgaris',
 27: 'eft',
 28: 'spotted salamander, Ambystoma 

In [4]:
for key,value in imagenet_classes.items():
    if value == 'banana':
        print(key)

954


In [5]:
import warnings
warnings.filterwarnings('ignore')

## Food names from NLTK

In [6]:
from nltk.corpus import wordnet as wn
food = wn.synset('food.n.02')
food_list = list(set([w for s in food.closure(lambda s:s.hyponyms()) for w in s.lemma_names()]))

In [7]:
food_list

['Armerican_cheddar',
 'shrimp',
 'sparerib',
 'side',
 'green_pepper',
 'cooky',
 'pot_roast',
 'blade_roast',
 'striped_bass',
 'oyster',
 'drop_biscuit',
 'English_sole',
 'grey_mullet',
 'blue_point',
 'scallop',
 'sorrel',
 'freshwater_bass',
 'oysters_Rockefeller',
 'hot_cake',
 'leaf_lettuce',
 'halibut',
 'journey_cake',
 'yellow_bean',
 'cushaw',
 'veg',
 'red_salmon',
 'honey_cake',
 'boeuf',
 'corn_muffin',
 'brook_trout',
 'sweetbreads',
 'haricot',
 'sapsago',
 'poulet',
 'penne',
 'papaw',
 'gingerbread_man',
 'whiting',
 'bing_cherry',
 'Rock_Cornish_hen',
 'pinwheel_roll',
 'chicken',
 'string_bean',
 'shellfish',
 'American_smelt',
 'rollmops',
 'beurre_noisette',
 'quahog',
 'sea_biscuit',
 'winter_melon',
 'bay_scallop',
 'aguacate',
 'twister',
 'flapjack',
 'sausage_meat',
 'pea_bean',
 'polony',
 'Stayman',
 'smoked_eel',
 'bully_beef',
 'cut_of_veal',
 'kipper',
 'Parker_House_roll',
 'cowberry',
 'longanberry',
 'chocolate',
 'jack',
 'vol-au-vent',
 'sapodilla'

## Filter foods out of ImageNet list

In [8]:
# remove punctuation and lower
food_list = [x.lower().split("_") for x in food_list]
flat_food_list = [food for food_sub_list in food_list for food in food_sub_list]
flat_food_list[:10]

['armerican',
 'cheddar',
 'shrimp',
 'sparerib',
 'side',
 'green',
 'pepper',
 'cooky',
 'pot',
 'roast']

In [9]:
imagenet_food_classes = {}
for k,v in imagenet_classes.items():
    imagenet_class_set = set([space_word.strip(" ") for space_word in v.lower().split(",")])
    if imagenet_class_set.intersection(flat_food_list):
        imagenet_food_classes[k] = v
len(imagenet_food_classes)   

49

In [10]:
non_food_classes = [457,494,567,626,723,738,760,923,972,976]

In [11]:
filtered_imagenet = {k:v for k,v in imagenet_food_classes.items() if k not in non_food_classes}
len(filtered_imagenet)

39

## Get list of food and non-fodd classes from ImageNet

In [12]:
food_class_keys = list(filtered_imagenet.keys())
food_class_keys[:10]

[8, 82, 85, 86, 99, 113, 123, 124, 331, 339]

In [13]:
imagenet_non_food_classes = {}
for k,v in imagenet_classes.items():
    if k not in food_class_keys:
        imagenet_non_food_classes[k] = v
imagenet_non_food_classes

{0: 'tench, Tinca tinca',
 1: 'goldfish, Carassius auratus',
 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
 3: 'tiger shark, Galeocerdo cuvieri',
 4: 'hammerhead, hammerhead shark',
 5: 'electric ray, crampfish, numbfish, torpedo',
 6: 'stingray',
 7: 'cock',
 9: 'ostrich, Struthio camelus',
 10: 'brambling, Fringilla montifringilla',
 11: 'goldfinch, Carduelis carduelis',
 12: 'house finch, linnet, Carpodacus mexicanus',
 13: 'junco, snowbird',
 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
 15: 'robin, American robin, Turdus migratorius',
 16: 'bulbul',
 17: 'jay',
 18: 'magpie',
 19: 'chickadee',
 20: 'water ouzel, dipper',
 21: 'kite',
 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
 23: 'vulture',
 24: 'great grey owl, great gray owl, Strix nebulosa',
 25: 'European fire salamander, Salamandra salamandra',
 26: 'common newt, Triturus vulgaris',
 27: 'eft',
 28: 'spotted salamander, Ambystoma maculatum',

## ImageNet class names

In [14]:
df = pd.read_csv('https://raw.githubusercontent.com/mf1024/ImageNet-Datasets-Downloader/master/classes_in_imagenet.csv')
df["class_name"] = df["class_name"].str.lower()
df.head()

Unnamed: 0,synid,class_name,urls,flickr_urls
0,n00004475,organism,8,6
1,n00005787,benthos,1264,626
2,n00006024,heterotroph,1,0
3,n00006484,cell,1251,628
4,n00007846,person,1242,1138


In [15]:
df.dropna(inplace=True)

In [16]:
not_food_list = ["ball", "puppy", "dog", "bar", "blade", "garden", "hand", "head", "jacket", "junk", "key", 
                 "oven", "pin", "leg", "pinwheel", "pot", "plate", "rack", "refrigerator", "saddle",
                 "shank", "spring", "steamer", "stick", "temple", "truck", "turban", "ring","cup", "rock", "shell",
                 "pilot", "runner", "smith", "ash", "sand"]

In [17]:
df_non_food = df[~df["class_name"].isin(flat_food_list)]
df_food = df[df["class_name"].isin(flat_food_list)]
df_food = df_food[~df["class_name"].isin(not_food_list)]

In [21]:
imagenet_food_class_ids = df_food["synid"].tolist()
imagenet_food_class_names = df_food["class_name"].tolist()
imagenet_food_class_ids_and_names_dict = dict(zip(imagenet_food_class_ids, imagenet_food_class_names))
len(imagenet_food_class_ids_and_names_dict)

862

In [22]:
imagenet_non_food_class_ids = df_non_food["synid"].tolist()
imagenet_non_food_class_names = df_non_food["class_name"].tolist()
imagenet_non_food_class_ids_and_names_dict = dict(zip(imagenet_non_food_class_ids, imagenet_non_food_class_names))
len(imagenet_non_food_class_ids_and_names_dict)

20898