#**INFORMAÇÕES DO ALUNO:**

###**NOME: Iago Akio Saito**
###**RGA: 2020.1906.060-1**
###**CURSO: Engenharia de software**

In [None]:
!pip install -q kaggle

from google.colab import files 
files.upload() # upload kaggle.json file to colab environment

!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# download the dataset
!kaggle datasets download -d iagoasaito/animais
!unzip -q animais.zip -d dataset
!rm animais.zip

Saving kaggle.json to kaggle.json
Downloading animais.zip to /content
 86% 41.0M/47.4M [00:00<00:00, 57.7MB/s]
100% 47.4M/47.4M [00:00<00:00, 64.9MB/s]


In [None]:
def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


## Analyzing the data

Let's use MobileNetV3 and MNASNet to train our model. We're going to use the implementation from [torchvision](https://pytorch.org/vision/stable/models.html).


In [None]:
import os
import time
import torch
import numpy as np
import pandas as pd

from PIL import Image
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit


In [None]:
dataset = {'filepaths':[], 'labels':[]}
for curpath, parent, files in os.walk('dataset/train'):
  if files == []: continue
  samples = len(files)
  dataset['labels'].extend([os.path.basename(curpath)] * samples)
  dataset['filepaths'].extend([os.path.join(curpath, f) for f in files])
for curpath, parent, files in os.walk('dataset/valid'):
  if files == []: continue
  samples = len(files)
  dataset['labels'].extend([os.path.basename(curpath)] * samples)
  dataset['filepaths'].extend([os.path.join(curpath, f) for f in files])
data = pd.DataFrame(dataset)

test_dataset = {'filepaths':[], 'labels':[]}
for curpath, parent, files in os.walk('dataset/test'):
  if files == []: continue
  samples = len(files)
  test_dataset['labels'].extend([os.path.basename(curpath)] * samples)
  test_dataset['filepaths'].extend([os.path.join(curpath, f) for f in files])
test_data = pd.DataFrame(test_dataset)

In [None]:
#removing wrong data
birds = ('Cats', 'Dogs', 'MaleLions', 'Sparrows')
to_remove = []
for index, (filepath, label) in enumerate(data.values):
  if label not in birds:
    to_remove.append(index)

data = data.drop(labels = to_remove, axis = 0)
data.reset_index(inplace=True)
data.tail(15)

Unnamed: 0,index,filepaths,labels
1303,1303,dataset/valid/Cats/27131803876_9af3edf5c7_n.jpg,Cats
1304,1304,dataset/valid/Cats/33522734484_655cb6c80c_n.jpg,Cats
1305,1305,dataset/valid/Cats/24119958367_69117845b1_n.jpg,Cats
1306,1306,dataset/valid/Cats/32862691805_6efa2d6284_n.jpg,Cats
1307,1307,dataset/valid/Cats/31389231292_e2444d0260_m.jpg,Cats
1308,1308,dataset/valid/Cats/30038389868_bc56a9057c_n.jpg,Cats
1309,1309,dataset/valid/Cats/29191475061_e2415892d0_n.jpg,Cats
1310,1310,dataset/valid/Cats/26853880879_3f3de6ba26_n.jpg,Cats
1311,1311,dataset/valid/Cats/25161907065_81766634dc_n.jpg,Cats
1312,1312,dataset/valid/Cats/38785964744_4eab8c0319_n.jpg,Cats


In [None]:
# converting label to number
label_to_index = {}
index_to_label = []
for i, label in enumerate(set(data['labels'].values)):
  label_to_index[label] = i
  index_to_label.append(label)

# generate labels file
with open("labels.txt", "w") as f:
  for idx in index_to_label:
    f.write(idx+'\n')

In [None]:
# We must create a custom Dataset

class AnimaisDataset(Dataset):
    """AnimaisDataset."""
    def __init__(self, filepaths, labels, transform=None):
        self.filepaths = filepaths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = pil_loader(self.filepaths[idx])

        if self.transform:
            x = self.transform(x)

        return x, label_to_index[self.labels[idx]]

In [None]:
# define the image transformations 

train_tfms = transforms.Compose([
                            transforms.CenterCrop((224, 224)),
                            transforms.RandomHorizontalFlip(p=0.5),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], 
                                                 [0.229, 0.224, 0.225])
             ])

test_tfms = transforms.Compose([
                            transforms.Resize((256, 256)),
                            transforms.CenterCrop((224, 224)),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], 
                                                [0.229, 0.224, 0.225])
])


## Training with Cross-validation

Training with 3-fold cross-validation over the train and valid dataset.

Our stopping criteria will be early stopping after 10 epochs without loss improve.

We'll use two metrics: accuracy and f1-score

In [None]:
lr = 0.0004 
n_splits = 3 
STOP = 10 # early stopping after STOP epochs without val loss improvement

filepaths = data['filepaths'].values
labels = data['labels'].values

num_classes = len(set(data['labels'].values))
device = torch.device('cuda:0')
skf = StratifiedKFold(n_splits=n_splits, shuffle = True, random_state = 42)
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)

In [None]:
# evaluate model with accuracy
import sklearn.metrics as metrics

metric2func = {
    'accuracy'  : metrics.accuracy_score,
    'confusion' : metrics.confusion_matrix,
    'f1_score'  : metrics.f1_score,
    'precision' : metrics.precision_score,
    'recall'    : metrics.recall_score
}

def evaluate(model, dataloader, metrics = ['accuracy']):
    acc = 0
    test_loss = 0
    
    real = []
    pred = []

    model.eval()
    with torch.no_grad():
      for i, (x, y) in enumerate(dataloader):
        real.extend(y)

        x = x.to(device)
        y = y.to(device)

        outputs = model(x)
        _, predicted = torch.max(outputs.data, 1)
        
        loss = loss_func(outputs, y)

        test_loss += loss.item() * x.size(0)
        pred.extend(predicted.cpu().numpy())

    test_loss /= i+1
    computed_metric = {}
    for metric in metrics:
      if metric == 'f1_score':
        computed_metric[metric] = metric2func[metric](real, pred, average='micro')
      elif metric == 'confusion':
        computed_metric[metric] = metric2func[metric](real, pred)
      elif metric == 'precision':
        computed_metric[metric] = metric2func[metric](real, pred, average='micro')
      elif metric == 'recall':
        computed_metric[metric] = metric2func[metric](real, pred, average='micro')
      else:
        computed_metric[metric] = metric2func[metric](real, pred)


    return test_loss, computed_metric

In [None]:
def train_model(model, train_dl, val_dl, epochs, loss_func, optimizer, show_info=5, thresh = 1e-3):
  epoch = 1
  last_best = -1
  start = time.time()
  while epoch < epochs:
    train_loss = 0

    model.train()
    values = 0
    for x, y in train_dl:
      x, y = x.to(device), y.to(device)

      optimizer.zero_grad()

      output = model(x)
      current_loss = loss_func(output, y)

      current_loss.backward()
      optimizer.step()

      train_loss += current_loss.item() * x.size(0)
      values += x.size(0)

    train_loss /= values
    elapsed = int(time.time()-start)

    if val_dl is not None:
      val_loss, metrics = evaluate(model, val_dl)
      val_acc = metrics['accuracy']

      if epoch%show_info == 0:
          print(f"\tEpoch {epoch}/{epochs}: train_loss = {train_loss:.3f} "\
            f"val_loss {val_loss:.3f} val_acc {val_acc:.2f} "\
            f"time elapsed = {elapsed//60}m:{elapsed%60}s")
        
      if last_best <= epoch-STOP:
        print('Early stopping...')
        break
      if last_best == -1 or val_loss + thresh < last_best_loss:
        last_best = epoch
        last_best_loss = val_loss

    else:
      if epoch%show_info == 0:
          print(f"\tEpoch {epoch}/{epochs}: train_loss = {train_loss:.3f} "\
            f"time elapsed = {elapsed//60}m:{elapsed%60}s")  
      if last_best <= epoch-STOP:
        print('Early stopping...')
        break
      if last_best == -1 or train_loss + thresh < last_best_loss:
        last_best = epoch
        last_best_loss = train_loss  
    
    epoch += 1
  
  return model

In [None]:
# training with MobileNet V3 Large

confusion_matrix = []
precision = []
recall = []
f1_score = []

for fold, (train_val, test) in enumerate(skf.split(filepaths, labels)):
  print(f'Fold {fold+1}/{n_splits}\n')

  train, val = next(sss.split(np.zeros(len(train_val)), labels[train_val]))
  train = train_val[train]
  val = train_val[val]

  X_train, X_val, X_test = filepaths[train], filepaths[val], filepaths[test]
  y_train, y_val, y_test = labels[train]   , labels[val]   , labels[test]

  # datasets
  train_dataset = AnimaisDataset(X_train, y_train, train_tfms)
  val_dataset   = AnimaisDataset(X_val, y_val, test_tfms)
  test_dataset  = AnimaisDataset(X_test, y_test, test_tfms)

  # dataloaders
  train_dl = DataLoader(train_dataset, batch_size=128, shuffle=True)
  val_dl = DataLoader(val_dataset, batch_size=128)
  test_dl = DataLoader(test_dataset, batch_size=128)

  # getting model
  model = models.mobilenet_v3_large(pretrained = True)
  model.classifier[3] = torch.nn.Linear(model.classifier[3].in_features, num_classes)
  model = model.to(device)

  # loss
  loss_func = torch.nn.CrossEntropyLoss()

  # optimizer
  optimizer = torch.optim.Adam(model.parameters(), lr)

  # training
  model = train_model(model, train_dl, val_dl, 500, loss_func, optimizer, 1)
  
  # metrics
  _, metrics = evaluate(model, test_dl, metrics=['confusion', 'precision', 'recall', 'f1_score'])
  test_confusion, test_precision, test_recall, test_f1 = metrics['confusion'], metrics['precision'], metrics['recall'], metrics['f1_score']
  print(f'Model saved - fold {fold}: '\
        f'test_confusion = \n{test_confusion}\ntest_precision = {test_precision:.2f}\ntest_recall = {test_recall:.2f}\ntest_f1_score = {test_f1:.2f}\n')

  confusion_matrix.append(test_confusion)
  precision.append(test_precision)
  recall.append(test_recall)
  f1_score.append(test_f1)

  # saving model
  torch.save(model.state_dict(), f'mobilenetv3_large_f{fold}.pt')

precision = np.array(precision)
recall = np.array(recall)
f1_score = np.array(f1_score)
n = 1
print(f'MobileNet V3 Large - precision: {precision.mean():.3f} += {precision.std():.3f} \nrecall: {recall.mean():.3f} += {recall.std():.3f} \nf1_score: {f1_score.mean():.3f} += {f1_score.std():.3f}')

print(f'Confusion Matrix, por época: ')
for i in confusion_matrix:
  print(f'{n}:\n {i}\n')
  n+=1

Fold 1/3



Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth


  0%|          | 0.00/21.1M [00:00<?, ?B/s]

	Epoch 1/500: train_loss = 0.746 val_loss 22.304 val_acc 0.89 time elapsed = 0m:8s
	Epoch 2/500: train_loss = 0.066 val_loss 8.649 val_acc 0.97 time elapsed = 0m:16s
	Epoch 3/500: train_loss = 0.010 val_loss 3.885 val_acc 0.99 time elapsed = 0m:25s
	Epoch 4/500: train_loss = 0.002 val_loss 4.770 val_acc 0.97 time elapsed = 0m:33s
	Epoch 5/500: train_loss = 0.001 val_loss 6.068 val_acc 0.97 time elapsed = 0m:42s
	Epoch 6/500: train_loss = 0.001 val_loss 5.551 val_acc 0.98 time elapsed = 0m:50s
	Epoch 7/500: train_loss = 0.001 val_loss 11.042 val_acc 0.94 time elapsed = 0m:59s
	Epoch 8/500: train_loss = 0.001 val_loss 5.830 val_acc 0.95 time elapsed = 1m:7s
	Epoch 9/500: train_loss = 0.001 val_loss 4.931 val_acc 0.95 time elapsed = 1m:16s
	Epoch 10/500: train_loss = 0.001 val_loss 4.011 val_acc 0.98 time elapsed = 1m:24s
	Epoch 11/500: train_loss = 0.001 val_loss 3.750 val_acc 0.99 time elapsed = 1m:33s
	Epoch 12/500: train_loss = 0.001 val_loss 4.514 val_acc 0.98 time elapsed = 1m:42s
	

In [None]:
n=1
print(f'Confusion Matrix, por época: ')
for i in confusion_matrix:
  print(f'{n}:\n {i}\n')
  n+=1

Confusion Matrix, por época: 
1:
 [[151   0   1   1]
 [  1  89   1   2]
 [  1   0  93   0]
 [  8   0   0  92]]

2:
 [[143   4   3   3]
 [  0  89   2   2]
 [  0   0  93   0]
 [  1   0   0  99]]

3:
 [[152   0   1   0]
 [  7  85   1   0]
 [  1   0  92   0]
 [  4   2   1  93]]



## Deploying

After analyzing metrics from last section, we've to train a final model to be used on our Android app.

In [None]:
## Training final model with MobileNet V3 Large

# datasets
train_dataset = AnimaisDataset(filepaths, labels, train_tfms)

# dataloaders
train_dl = DataLoader(train_dataset, batch_size=128, shuffle=True)

# getting model
model = models.mobilenet_v3_large(pretrained = True)
model.classifier[3] = torch.nn.Linear(model.classifier[3].in_features, num_classes)
model = model.to(device)

# loss
loss_func = torch.nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr)

# training
model = train_model(model, train_dl, None, 20, loss_func, optimizer, 1)
model.to(torch.device('cpu'))

# saving model
torch.save(model.state_dict(), f'mobilenetv3_large_deploy.pt')

	Epoch 1/20: train_loss = 0.495 time elapsed = 0m:13s
	Epoch 2/20: train_loss = 0.021 time elapsed = 0m:27s
	Epoch 3/20: train_loss = 0.002 time elapsed = 0m:41s
	Epoch 4/20: train_loss = 0.001 time elapsed = 0m:55s
	Epoch 5/20: train_loss = 0.001 time elapsed = 1m:8s
	Epoch 6/20: train_loss = 0.001 time elapsed = 1m:22s
	Epoch 7/20: train_loss = 0.002 time elapsed = 1m:36s
	Epoch 8/20: train_loss = 0.001 time elapsed = 1m:50s
	Epoch 9/20: train_loss = 0.003 time elapsed = 2m:3s
	Epoch 10/20: train_loss = 0.008 time elapsed = 2m:17s
	Epoch 11/20: train_loss = 0.027 time elapsed = 2m:31s
	Epoch 12/20: train_loss = 0.009 time elapsed = 2m:45s
	Epoch 13/20: train_loss = 0.007 time elapsed = 2m:58s
	Epoch 14/20: train_loss = 0.006 time elapsed = 3m:12s
	Epoch 15/20: train_loss = 0.001 time elapsed = 3m:26s
Early stopping...


# Converting to ONNX

In [None]:
!pip install onnx onnxruntime

Collecting onnx
  Downloading onnx-1.10.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (12.7 MB)
[K     |████████████████████████████████| 12.7 MB 5.0 MB/s 
[?25hCollecting onnxruntime
  Downloading onnxruntime-1.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)
[K     |████████████████████████████████| 4.8 MB 35.8 MB/s 
Installing collected packages: onnxruntime, onnx
Successfully installed onnx-1.10.2 onnxruntime-1.9.0


In [None]:
import onnx
import onnxruntime

In [None]:
model_pytorch = models.mobilenet_v3_large(pretrained = True)
model_pytorch.classifier[3] = torch.nn.Linear(model_pytorch.classifier[3].in_features, num_classes)
model_pytorch.load_state_dict(torch.load('mobilenetv3_large_deploy.pt', map_location='cpu'))
model_pytorch.eval()

dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True)

torch.onnx.export(model_pytorch,                                 # model being run
                  dummy_input,                                   # model input (or a tuple for multiple inputs)
                  "mobilenetv3_large_deploy.onnx",               # where to save the model (can be a file or file-like object)
                  export_params=True,                            # store the trained parameter weights inside the model file
                  opset_version=12,                              # the ONNX version to export the model to
                  do_constant_folding=True,                      # whether to execute constant folding for optimization
                  input_names = ['input'],                       # the model's input names
                  output_names = ['output'],                     # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

We must build the ORT from the ONNX model

In [None]:
!python -m onnxruntime.tools.convert_onnx_models_to_ort . --optimization_level basic

Converting optimized ONNX model /content/mobilenetv3_large_deploy.onnx to ORT format model /content/mobilenetv3_large_deploy.basic.ort
Converted 1 models. 0 failures.
2021-11-30 13:13:59,090 ort_format_model.utils [INFO] - Processed /content/mobilenetv3_large_deploy.basic.ort
2021-11-30 13:13:59,096 ort_format_model.utils [INFO] - Created config in /content/required_operators.config


In [None]:
%ls 

[0m[01;34mdataset[0m/                            mobilenetv3_large_f0.pt
kaggle.json                         mobilenetv3_large_f1.pt
labels.txt                          mobilenetv3_large_f2.pt
mobilenetv3_large_deploy.basic.ort  required_operators.config
mobilenetv3_large_deploy.onnx       [01;34msample_data[0m/
mobilenetv3_large_deploy.pt
