# Imports

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import json
from glob import glob
from pathlib import Path
from PIL import Image as Image_PIL
import imghdr # built in module
import numpy as np
from fastai import *
from fastai.vision import *
from fastai.metrics import error_rate, accuracy
import pandas as pd

In [None]:
!which python

# Paths

In [None]:
# Data
path_gdrive = Path.home() / 'Google Drive/mushrooms'
data_folder = path_gdrive / 'data'


# Project
project_path = Path.home() / 'repos_github/mushroom-identifier/'
app_path = project_path / 'app'
model_path = app_path / 'models'

## Load Mushroom Info

In [None]:
df_mushrooms = pd.read_excel(project_path / 'data/mushrooms.xlsx', index_col=0)
df_mushrooms.set_index('Index', inplace=True, drop=True)
df_mushrooms.head()

## Create `labels.csv` file

Settings to create dataset:

In [None]:
# Max number of images to train
n_img = 250

# minimal number of images to consider
min_n_img = 25

In [None]:
df_labels = pd.DataFrame(columns=['name', 'labels'])
for mushroom_type in df_mushrooms.index.tolist():
    
    current_mushroom = mushroom_type
    
    # Get list if images for this type of mushroom
    imgs_mushroom_type = glob(f'{data_folder}/{current_mushroom}*.jpg')
    nbr_images = len(imgs_mushroom_type)
    
    if len(imgs_mushroom_type) < min_n_img:
        print(f'- Skipping {current_mushroom} because has a number of available images '
              f'({nbr_images}), lower than {min_n_img} ...')
    else:
        print(f'- Adding {current_mushroom} images to df_label, ({nbr_images} available)...')
        i = 1
        for image in imgs_mushroom_type:
            # import pdb; pdb.set_trace()
            image = Path(image)
            dict_img = {
                    'name': Path(image).name,
                    'labels': (f'{df_mushrooms.loc[df_mushrooms.index==current_mushroom, "latin"][0]};'
                               #f'{df_mushrooms.loc[df_mushrooms.index==current_mushroom, "name"][0]};'
                               #f'{df_mushrooms.loc[df_mushrooms.index==current_mushroom, "edibility"][0]};'
                               f'{df_mushrooms.loc[df_mushrooms.index==current_mushroom, "poisonous"][0]}'
                               #f'{df_mushrooms.loc[df_mushrooms.index==mushroom_, "poisonous"][0]};'
                              )  
                }
            df_labels = df_labels.append(dict_img, ignore_index=True)
            i += 1
            if i >= n_img: break # print(f'  Reached n_img ({n_img})...')

df_labels.to_csv(path_gdrive / 'labels.csv', index=False)

In [None]:
len(df_labels)

In [None]:
# len(df_labels['labels'].unique()) * n_img

In [None]:
df_labels.head()

# Train the model

## generate databunch

Add some data augmentation with `get_transforms`:

In [None]:
tfms = get_transforms(
    flip_vert=True,
    max_lighting=0.1,
    max_zoom=1.05,
    max_warp=0.5,
)

Define batch size, image size:

In [None]:
bs = 32 # 64       # batch_size
size = 128
np.random.seed(42) # set random seed so we always get the same validation set

Create an `ImageDataBunch` from `path` by splitting the data in folder and labelled in a file `csv_labels` between a training and validation set.

Use `valid_pct` to indicate the percentage of the total images to use as the validation set. An optional test folder contains unlabelled data and suffix contains an optional suffix to add to the filenames in `csv_labels` (such as '.jpg').
* `fn_col` is the index (or the name) of the the column containing the filenames and 
* `label_col` is the index (indices) (or the name(s)) of the column(s) containing the labels.
* Use `header` to specify the format of the csv header, and
* `delimiter` to specify a non-standard csv-field separator.

In case your csv has no header, column parameters can only be specified as indices. If `label_delim` is passed, split what's in the label column according to that separator.

In [None]:
# src = (ImageList.from_csv(path_gdrive, 'labels.csv', folder=data_folder, suffix='.jpg')
src = (ImageList.from_csv(path_gdrive, 'labels.csv', folder='data')
       .split_by_rand_pct(0.2)
       .label_from_df(label_delim=';'))

##### Train resnet34; image size=128 

In [None]:
img_data = (src.transform(tfms, size=128)
            .databunch()
            .normalize(imagenet_stats))

First image of the training dataset:

First image of the validation dataset:

Show some images from the dataset:

In [None]:
img_data.show_batch(rows=3, figsize=(10,8))

Number of classes in the dataset

In [None]:
# Number of categories
img_data.c

Set a learner with resnet34 architecture

In [None]:
# model = cnn_learner(img_data, models.resnet34, metrics=[accuracy, error_rate])
# model = cnn_learner(img_data, models.resnet34, metrics=[error_rate])
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
model = cnn_learner(img_data, models.resnet34, metrics=[acc_02, f_score])

In [None]:
model.fit_one_cycle(1)

In [None]:
model.save(path_gdrive / 'mushrooms_1_cycle')

In [None]:
model.fit_one_cycle(2)

In [None]:
model.save('mushrooms_3_cycles')

In [None]:
interpret = ClassificationInterpretation.from_learner(model)

In [None]:
interpret.plot_top_losses(4, figsize=(20, 25))

In [None]:
interpret.plot_confusion_matrix(figsize=(20,20), dpi=60)

In [None]:
model.lr_find()

In [None]:
model.recorder.plot()

In [None]:
model.unfreeze()
model.fit_one_cycle(3, max_lr=slice(1e-03, 5e-02))

In [None]:
model.save('stage-1-128-rn34_lr_6_cycles')

In [None]:
model.fit_one_cycle(5, max_lr=slice(1e-03, 1e-02))

## Train resnet34; image size=256

In [None]:
img_data = (src.transform(tfms, size=256)
            .databunch()
            .normalize(imagenet_stats))

In [None]:
model.data = img_data

In [None]:
Veure minut 51 aprox del video Lesson 3, 
Do some data augmentation.
Create new databunch with images of size=256

In [None]:
model.freeze()
model.lr_find()
model.recorder.plot()

In [None]:
lr = 1e-3/2
model.fit_one_cycle(2, slice(lr))

In [None]:
model.fit_one_cycle(3, slice(lr))

In [None]:
model.save('train_final5_cycles')

In [None]:
model.load('train_final5_cycles')

In [None]:
model.export('export_resnet34_model.pkl')

# Test model with other images:

In [None]:
learn = load_learner(model_path)

In [None]:
image_path = Path.home() / 'rovello.jpg'

In [None]:
img = open_image(image_path)

In [None]:
Path.home() / 'rovello.jpg'

In [None]:
learn.predict(img)

In [None]:
Path.home()

# Deploy Web Page with Elastic Beanstalk

Tutorial Client: https://docs.aws.amazon.com/elasticbeanstalk/latest/dg/eb-cli3.html

Titorial Bundle: https://docs.aws.amazon.com/elasticbeanstalk/latest/dg/applications-sourcebundle.html

# References

* [Fastai multi label](https://gilberttanner.com/blog/fastai-multi-label-image-classification)
* [Google images scrapping](https://medium.com/@intprogrammer/how-to-scrape-google-for-images-to-train-your-machine-learning-classifiers-on-565076972ce)
* [Basic mushroom info](https://bolets.info/)
* [Mushroom names in Latin and additional info](https://ca.wikipedia.org/)
* [Instagram @natros56](https://www.instagram.com/explore/tags/indexboletsnatros56_cat/)




# Not Used