In [23]:
%pip install -Uqq fastai duckduckgo_search ipywidgets

Note: you may need to restart the kernel to use updated packages.


Installing up to date version of fastai and duckduckgo_search

In [24]:
from duckduckgo_search import DDGS
from fastcore.all import *

food_list = {'normal_food' : ['fried chicken', 'french fries', 'pizza', 'chocolate', 'sushi roll', 'nigiri', 'sashimi', 'peking duck', 'baklava', 'burger', 'donut', 'dumpling', 'pasta', 'sandwhich', 'hot dog', 'cake', 'pie', 'burrito', 'taco', 'chicken wings'], 'indian_food' : ['indian curry', 'butter chicken', 'chicken tikka masala', 'tandoori chicken', 'biryani', 'dal makhni', 'samosa', 'pani puri', 'palak paneer', 'naan bread']}

def search_images(term, max_images=30):
    print(f"Searching for {term}")
    with DDGS() as ddgs:
        return L(ddgs.images(term, max_results=max_images)).itemgot('image')


# Testing
# urls = search_images('daily training running shoe photos', max_images=1)
# urls[0]

Searches for and downloads training images

In [25]:
from fastdownload import download_url
from fastai.vision.all import *
from time import sleep

path = Path("food_type")

def get_images(search_topics):
    for type, topic_list in search_topics.items():
        dest = (path/type)
        dest.mkdir(exist_ok=True, parents=True)
        for topic in topic_list:
            download_images(dest, urls=search_images(f'{topic} photos'))
            resize_images(dest, max_size=400, dest=dest)
    
get_images(food_list)

Searching for fried chicken photos
Searching for french fries photos
Searching for pizza photos
Searching for chocolate photos
Searching for sushi roll photos
Searching for nigiri photos


KeyboardInterrupt: 

Some photos might not download correctly which could cause our model training to fail, so we'll remove them:

In [None]:
failed = verify_images(get_image_files(path))
failed.map(Path.unlink)
len(failed)

To train a model, we'll need `DataLoaders`, which is an object that contains a *training set* (the images used to create a model) and a *validation set* (the images used to check the accuracy of a model -- not used during training). In `fastai` we can create that easily using a `DataBlock`, and view sample images from it:

In [None]:
food = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(128))


food = food.new(item_tfms=Resize(128, ResizeMethod.Squish))
dls = food.dataloaders(path)

dls.train.show_batch(max_n=8, nrows=2)

Trains image recognition model using the data and the pre-existing `resenet18` model. 

`fastai` comes with a helpful `fine_tune()` method which automatically uses best practices for fine tuning a pre-trained model, so we'll use that.

In [None]:
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(10)

Shows common errors in training

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

Shows images with top losses: Most confidently wrong predictions or Most unconfident right predicitions

In [None]:
interp.plot_top_losses(5, nrows=1)

Clean model based on `top_losses` images:

Create model with possible problems with data in mind so that data cleaning can be automated

In [None]:
from fastai.vision.widgets import *

cleaner = ImageClassifierCleaner(learn)
for idx in cleaner.delete(): cleaner.fns[idx].unlink()
for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)
     

Check downloaded images with model

In [None]:
def print_results(image_name, food_type, probability):
    print(f"{image_name[:-4]} is a: {food_type}")
    print(f"Probability: {probability[0]:.4f}")

for image_name in ['cuban_sandwhich.png', 'pizza_slice.png']:
    food_type, _, probs = learn.predict(image_name)
    print_results(image_name, food_type, probs)
        
