In [6]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import csv
import ast

In [7]:
with open('imagenet1000_clsidx_to_labels.txt', 'r') as f:
    imagenet_classes = ast.literal_eval(f.read())

In [8]:
imagenet_classes

{0: 'tench, Tinca tinca',
 1: 'goldfish, Carassius auratus',
 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
 3: 'tiger shark, Galeocerdo cuvieri',
 4: 'hammerhead, hammerhead shark',
 5: 'electric ray, crampfish, numbfish, torpedo',
 6: 'stingray',
 7: 'cock',
 8: 'hen',
 9: 'ostrich, Struthio camelus',
 10: 'brambling, Fringilla montifringilla',
 11: 'goldfinch, Carduelis carduelis',
 12: 'house finch, linnet, Carpodacus mexicanus',
 13: 'junco, snowbird',
 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
 15: 'robin, American robin, Turdus migratorius',
 16: 'bulbul',
 17: 'jay',
 18: 'magpie',
 19: 'chickadee',
 20: 'water ouzel, dipper',
 21: 'kite',
 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
 23: 'vulture',
 24: 'great grey owl, great gray owl, Strix nebulosa',
 25: 'European fire salamander, Salamandra salamandra',
 26: 'common newt, Triturus vulgaris',
 27: 'eft',
 28: 'spotted salamander, Ambystoma 

In [9]:
for key,value in imagenet_classes.items():
    if value == 'banana':
        print(key)

954


In [14]:
import warnings
warnings.filterwarnings('ignore')

## Food names from NLTK

In [32]:
from nltk.corpus import wordnet as wn
food = wn.synset('food.n.02')
food_list = list(set([w for s in food.closure(lambda s:s.hyponyms()) for w in s.lemma_names()]))

In [33]:
food_list

['tangelo',
 'Winesap',
 'ecrevisse',
 "Bramley's_Seedling",
 'swede',
 'whelk',
 'ananas',
 'vegetable_marrow',
 'side',
 'butter_bean',
 'Bermuda_onion',
 'Empire',
 'bird',
 'fricandeau',
 'guava',
 'kidney_bean',
 'julienne',
 'chocolate_candy',
 'bagel',
 'ricotta',
 'linguini',
 'long-neck_clam',
 'quark_cheese',
 'sausage',
 'pork_belly',
 'ravioli',
 'crawfish',
 'sand_dab',
 'onion_bagel',
 'tunny',
 'crescent_roll',
 'turtle_bean',
 'pattypan_squash',
 'romaine',
 'seckel_pear',
 'pullet',
 'cherimoya',
 'celery_root',
 'taro',
 'pepper',
 'summer_crookneck',
 'citron',
 'Chinese_cabbage',
 'stockfish',
 'spoon_bread',
 'pufferfish',
 'romaine_lettuce',
 'stew_meat',
 'petit_four',
 'winkle',
 'stone_crab',
 'shortbread_cookie',
 'tenderloin',
 'courgette',
 'buttercrunch',
 'nan',
 'dove',
 'skillet_cake',
 'coral',
 'Mexican_husk_tomato',
 'salmon_trout',
 'melon_ball',
 'date_bar',
 'juneberry',
 'escarole',
 'liver',
 'carp',
 'ratafia',
 'weenie',
 'pea',
 'brandysnap',


## Filter foods out of ImageNet list

In [34]:
# remove punctuation and lower
food_list = [x.lower().split("_") for x in food_list]
flat_food_list = [food for food_sub_list in food_list for food in food_sub_list]
flat_food_list[:10]

['tangelo',
 'winesap',
 'ecrevisse',
 "bramley's",
 'seedling',
 'swede',
 'whelk',
 'ananas',
 'vegetable',
 'marrow']

In [48]:
imagenet_food_classes = {}
for k,v in imagenet_classes.items():
    imagenet_class_set = set([space_word.strip(" ") for space_word in v.lower().split(",")])
    if imagenet_class_set.intersection(flat_food_list):
        imagenet_food_classes[k] = v
len(imagenet_food_classes)   

49

In [44]:
non_food_classes = [457,494,567,626,723,738,760,923,972,976]

In [47]:
filtered_imagenet = {k:v for k,v in imagenet_food_classes.items() if k not in non_food_classes}
len(filtered_imagenet)

39

## Get list of food and non-fodd classes from ImageNet

In [49]:
food_class_keys = list(filtered_imagenet.keys())
food_class_keys[:10]

[8, 82, 85, 86, 99, 113, 123, 124, 331, 339]

In [55]:
imagenet_non_food_classes = {}
for k,v in imagenet_classes.items():
    if k not in food_class_keys:
        imagenet_non_food_classes[k] = v
imagenet_non_food_classes

{0: 'tench, Tinca tinca',
 1: 'goldfish, Carassius auratus',
 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
 3: 'tiger shark, Galeocerdo cuvieri',
 4: 'hammerhead, hammerhead shark',
 5: 'electric ray, crampfish, numbfish, torpedo',
 6: 'stingray',
 7: 'cock',
 9: 'ostrich, Struthio camelus',
 10: 'brambling, Fringilla montifringilla',
 11: 'goldfinch, Carduelis carduelis',
 12: 'house finch, linnet, Carpodacus mexicanus',
 13: 'junco, snowbird',
 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
 15: 'robin, American robin, Turdus migratorius',
 16: 'bulbul',
 17: 'jay',
 18: 'magpie',
 19: 'chickadee',
 20: 'water ouzel, dipper',
 21: 'kite',
 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
 23: 'vulture',
 24: 'great grey owl, great gray owl, Strix nebulosa',
 25: 'European fire salamander, Salamandra salamandra',
 26: 'common newt, Triturus vulgaris',
 27: 'eft',
 28: 'spotted salamander, Ambystoma maculatum',

## ImageNet class names

In [59]:
df = pd.read_csv('https://raw.githubusercontent.com/mf1024/ImageNet-Datasets-Downloader/master/classes_in_imagenet.csv')
df["class_name"] = df["class_name"].str.lower()
df.head()

Unnamed: 0,synid,class_name,urls,flickr_urls
0,n00004475,organism,8,6
1,n00005787,benthos,1264,626
2,n00006024,heterotroph,1,0
3,n00006484,cell,1251,628
4,n00007846,person,1242,1138


In [60]:
df.dropna(inplace=True)

In [69]:
not_food_list = ["ball", "puppy", "dog", "bar", "blade", "garden", "hand", "head", "jacket", "junk", "key", 
                 "oven", "pin", "leg", "pinwheel", "pot", "plate", "rack", "refrigerator", "saddle",
                 "shank", "spring", "steamer", "stick", "temple", "truck", "turban", "ring","cup", "rock", "shell",
                 "pilot", "runner", "smith", "ash", "sand"]

In [71]:
df_non_food = df[~df["class_name"].isin(flat_food_list)]
df_food = df[df["class_name"].isin(flat_food_list)]
df_food = df_food[~df["class_name"].isin(not_food_list)]

In [None]:
# stopped at 2:40 in video