In [None]:
from fastai.vision import *

# DATA INSPECTION

In [None]:
classes = os.listdir("data/mountains")[1:]
print(classes)

## View/Load data

In [None]:
path = 'data/mountains'

In [None]:
np.random.seed(42)
data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.2,
        ds_tfms=get_transforms(), size=224, num_workers=4, bs=32).normalize(imagenet_stats)
# if we don't have a trn, val, test set => passing 'train="."' says that the current folder contains the trn data,
# and 'valid_pct=0.2' will set aside randomely 20% of the data.

In [None]:
data.show_batch(rows=3, figsize=(7,8))

In [None]:
print("classes: ", data.classes)
print("nb classes: ", data.c)
print("nb training examples: ", len(data.train_ds))
print("nb val examples", len(data.valid_ds))

# TRAINING

Create models folder and grant access if not already done

In [None]:
#! sudo mkdir /home/jupyter/tutorials/fastai/course-v3/nbs/dl1/data/mountains/models
#! sudo chmod -R 777 /home/jupyter/tutorials/fastai/course-v3/nbs/dl1/data/mountains/models

Load a pre-trained imagenet RN-50 model and train on the mountains dataset.

In [None]:
learn = cnn_learner(data, models.resnet50, metrics=error_rate)

Load model trained on raw data for 40 epochs.

In [None]:
learn.load("body-40-epochs");

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(1, max_lr=slice(4e-6,1e-3))

In [None]:
learn.save("head-41-epochs")

In [None]:
learn.fit_one_cycle(1, max_lr=slice(1e-6,1e-4))

In [None]:
learn.save("head-42-epochs")

In [None]:
learn.load("head-42-epochs");

In [None]:
learn.unfreeze()

In [None]:
learn.freeze()

## Interpretation

In [None]:
learn.load('body-42-epochs');

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_confusion_matrix(figsize=(8,8))

In [None]:
interp.most_confused(min_val=2)

In [None]:
interp.plot_top_losses(9, heatmap=True, figsize=(12,12))

## Cleaning Up

In [None]:
from fastai.widgets import *

First we need to get the file paths from our top_losses. We can do this with `.from_toplosses`. We then feed the top losses indexes and corresponding dataset to `ImageCleaner`.

Notice that the widget will not delete images directly from disk but it will create a new csv file `cleaned.csv` from where you can create a new ImageDataBunch with the corrected labels to continue training your model.

In [None]:
db = (ImageList.from_folder(path)
                   .split_none()
                   .label_from_folder()
                   .transform(get_transforms(), size=224)
                   .databunch()
     )

In [None]:
learn_cln = cnn_learner(db, models.resnet50, metrics=error_rate)

learn_cln.load('head-42-epochs');

In [None]:
ds, idxs = DatasetFormatter().from_toplosses(learn_cln)

In [None]:
#losses,idxs = interp.top_losses(k=200)

In [None]:
#! sudo chmod -R 777 /home/jupyter/tutorials/fastai/course-v3/nbs/dl1/data/mountains

In [None]:
print(losses[:10])

In [None]:
ImageCleaner(ds, idxs, path)

In [None]:
ds, idxs = DatasetFormatter().from_similars(learn_cln)

In [None]:
ImageCleaner(ds, idxs, path, duplicates=True)

Remember to recreate your ImageDataBunch from your `cleaned.csv` to include the changes you made in your data!

## Clean data

In [None]:
df = pd.read_csv(path + '/cleaned.csv')
data_cln = ImageDataBunch.from_df(path, df=df, ds_tfms=get_transforms(), size=224, bs=32).normalize(imagenet_stats)

In [None]:
data_cln.show_batch(rows=3, figsize=(7,8))

In [None]:
learn_cln = cnn_learner(data_cln, models.resnet50, metrics=error_rate)

In [None]:
learn_cln.load('head-42-epochs');

In [None]:
learn_cln.lr_find()

In [None]:
learn_cln.recorder.plot()

In [None]:
learn_cln.fit_one_cycle(1, max_lr=slice(3e-4,1e-3))

In [None]:
learn_cln.fit_one_cycle(1, max_lr=slice(1e-6,1e-4))

In [None]:
learn_cln.unfreeze()

In [None]:
learn_cln.lr_find()

In [None]:
learn_cln.recorder.plot()

In [None]:
learn_cln.fit_one_cycle(1, max_lr=slice(4e-5,2e-4))

In [None]:
learn_cln.save("body-46-epochs-cln")

In [None]:
learn_cln.freeze()

## Interpretation

In [None]:
learn_cln.load("body-46-epochs-cln");

In [None]:
interp = ClassificationInterpretation.from_learner(learn_cln)

In [None]:
interp.plot_confusion_matrix(figsize=(8,8))

In [None]:
interp.most_confused(min_val=2)

In [None]:
interp.plot_top_losses(9, heatmap=True, figsize=(12,12))

## Putting your model in production

First thing first, let's export the content of our `Learner` object for production:

In [None]:
learn_cln.export()

This will create a file named 'export.pkl' in the directory where we were working that contains everything we need to deploy our model (the model, the weights but also some metadata like the classes or the transforms/normalization used).

You probably want to use CPU for inference, except at massive scale (and you almost certainly don't need to train in real-time). If you don't have a GPU that happens automatically. You can test your model on CPU like so:

In [None]:
defaults.device = torch.device('cpu')
print(defaults.device)

In [None]:
img = open_image(path+'/Grand_Combin'+'/00000038.jpg')
img

We create our `Learner` in production enviromnent like this, just make sure that `path` contains the file 'export.pkl' from before.

In [None]:
learn = load_learner(path)

In [None]:
pred_class,pred_idx,outputs = learn.predict(img)
pred_class.obj

So you might create a route something like this ([thanks](https://github.com/simonw/cougar-or-not) to Simon Willison for the structure of this code):

```python
@app.route("/classify-url", methods=["GET"])
async def classify_url(request):
    bytes = await get_bytes(request.query_params["url"])
    img = open_image(BytesIO(bytes))
    _,_,losses = learner.predict(img)
    return JSONResponse({
        "predictions": sorted(
            zip(cat_learner.data.classes, map(float, losses)),
            key=lambda p: p[1],
            reverse=True
        )
    })
```

(This example is for the [Starlette](https://www.starlette.io/) web app toolkit.)