# IMPORT PACKAGES

In [17]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import model_from_config
import itertools 

import json
import random
random.seed(1)
from collections import Counter

import numpy as np
import pandas as pd

from tqdm import tqdm
import pickle

import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# OPEN FILES

In [18]:
def load_data(data_path):
    with open(data_path, 'rb') as handle:
        m, mf, ml = pickle.load(handle)
        
    #index_puaioh = [i for i in range(len(m)) if ml[i] == 'puaioh']
    
    #for idx in index_puaioh:
     #   m.append(m[idx])
     #   mf.append(mf[idx])
     #   ml.append(ml[idx])
        
    temp = list(zip(m, mf, ml))
    random.Random(1).shuffle(temp)
    res1, res2, res3 = zip(*temp)
    res1, res2, res3 = list(res1), list(res2), list(res3)
    
    res1b, res2b, res3b = [], [], []
    train_size = 100
    
    for bird in tqdm(list(set(res3))):
        l1 = [] #mels
        l2 = [] #mels filename
        l3 = [] #mels label
        
        for i, lab in enumerate(res3):
            if lab == bird:
                l1.append(res1[i])
                l2.append(res2[i])
                l3.append(res3[i])
        
        if len(l3) > train_size:
            l1, _, l2, _, l3, _ = train_test_split(l1, l2, l3, train_size=train_size)
        
        res1b += l1
        res2b += l2
        res3b += l3
        
    
    print(Counter(res3b))
    
    return res1b, res2b, res3b

# LOAD FEATURE MODEL

In [19]:
def load_feature_model():
    model_path = '../input/dataset-private'

    model = model_from_config(json.load(open(model_path + '/cmi_mbam01.json', 'r')))
    model.load_weights(model_path + '/cmi_mbam01.h5')

    feature_layers = [layer.output for layer in model.layers[:-4]]
    feature_model = tf.keras.Model(inputs=[model.input], outputs=feature_layers)
    
    return feature_model

# GET FEATURES

In [20]:
def get_features(data, feature_model):

    X_all = np.array(data)
    scale = 33.15998

    X_train_all = X_all[:,:40,:] / scale

    batch_all = X_train_all.reshape(X_train_all.shape[0],
                                         X_train_all.shape[1],
                                         X_train_all.shape[2],
                                         1)
    
    
    embeddings_all = []

    for idx in tqdm(range(len(batch_all)//64+1)):
        embeddings_all += list(feature_model(batch_all[idx*64:(idx+1)*64])[-1].numpy())

    embeddings_all = np.array(embeddings_all)

    return embeddings_all

# GET CLASSIFIER PYTORCH

In [21]:
import torch
import torch.nn as nn
from sklearn.metrics import classification_report

class ClassifierNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ClassifierNetwork, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden_size)
        self.tanh = nn.Tanh()
        self.fc2 = nn.Linear(hidden_size, num_classes)


    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        return out
    
    
    
def train_model(model, features, labels, criterion, optimizer, num_epochs=50, use_tqdm=False):
    
    # Train the model
    
    x = torch.tensor(features, dtype=torch.float32, device=device)
    y = torch.tensor(labels, dtype=torch.long, device=device)
    
    if use_tqdm:
        for epoch in tqdm(range(num_epochs)):

            outputs = model(x)
            loss = criterion(outputs, y)

            c = torch.tensor(gamma, device=device)
            l2_reg = torch.tensor(0., device=device)

            for name, param in model.named_parameters():
                if 'weight' in name:
                    l2_reg += torch.norm(param)

            loss += c * l2_reg

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    else:
        for epoch in range(num_epochs):

            outputs = model(x)
            loss = criterion(outputs, y)

            c = torch.tensor(gamma, device=device)
            l2_reg = torch.tensor(0., device=device)

            for name, param in model.named_parameters():
                if 'weight' in name:
                    l2_reg += torch.norm(param)

            loss += c * l2_reg

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        
def test_model(model, features, labels):
    x = torch.tensor(features, dtype=torch.float32, device=device)
    y = torch.tensor(labels, dtype=torch.long, device=device)
    with torch.no_grad():
        correct = 0
        total = 0
        outputs = model(x)
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted==y).sum().item()
    
    return 100 * correct / total, predicted

In [22]:
def get_all_embeddings(data_path):

    mels, mels_fname, mels_label = load_data(data_path)
    feature_model = load_feature_model()
    embeddings_all = get_features(mels, feature_model)

    return embeddings_all, mels, mels_fname, mels_label

In [23]:
def get_proper_split(embeddings_all, mels, mels_fname, mels_label, test_size, random_seed, keep_amount=1e6):
    
    dict_label = {l:i for i, l in enumerate(sorted(set(mels_label)))}
    mels_label_new = [dict_label[m] for m in mels_label]
    
    random.seed(1)
    
    if keep_amount != 1e6:
        
        print('changement')
        
        idx_all = []
        mels_label_new_F = []
        
        for b in dict_label.keys():
            total = min(keep_amount, mels_label_new.count(dict_label[b]))
            idx_b = [i for i in range(len(mels)) if mels_label_new[i] == dict_label[b]][:total]
            idx_all += idx_b
            
        idx_all = sorted(idx_all)
        
        for i in idx_all:
            mels_label_new_F.append(mels_label_new[i])
        
    else:
        idx_all = [i for i in range(len(mels_fname))]
        mels_label_new_F = mels_label_new
    
    print(Counter(mels_label_new_F))
    
    train_idx, test_idx = train_test_split(idx_all, test_size=test_size, random_state=random_seed, shuffle=True, stratify=mels_label_new_F)
    
    X_train = np.array([embeddings_all[idx] for idx in train_idx])
    y_train = np.array([mels_label_new[idx] for idx in train_idx])
    
    X_test = np.array([embeddings_all[idx] for idx in test_idx])
    y_test = np.array([mels_label_new[idx] for idx in test_idx])
    
    
    return X_train, y_train, X_test, y_test, train_idx, test_idx, dict_label, mels_label_new


data_path = '../input/pretrained-create-data-pytorch/ALL_DATA.pickle'

test_size = 0.5
random_seed = 101  
qte_keep = 1e6

embeddings_all, mels, mels_fname, mels_label = get_all_embeddings(data_path=data_path)                          
X_train, y_train, X_test, y_test, index_train, index_test, dict_label, mels_label_new = get_proper_split(embeddings_all,  mels, mels_fname, mels_label, test_size, random_seed, keep_amount=qte_keep)

100%|██████████| 22/22 [00:00<00:00, 221.59it/s]


Counter({'omao': 100, 'apapan': 100, 'akiapo': 100, 'iiwi': 100, 'hawama': 100, 'jabwar': 100, 'houfin': 100, 'hawhaw': 100, 'elepai': 100, 'aniani': 100, 'maupar': 100, 'zVIDE': 100, 'yefcan': 100, 'warwhe1': 100, 'hawcre': 100, 'hawgoo': 100, 'skylar': 100, 'barpet': 100, 'crehon': 57, 'ercfra': 30, 'hawpet1': 24, 'puaioh': 9})


100%|██████████| 31/31 [00:00<00:00, 86.32it/s]

Counter({16: 100, 2: 100, 0: 100, 13: 100, 7: 100, 14: 100, 12: 100, 10: 100, 5: 100, 1: 100, 15: 100, 21: 100, 20: 100, 19: 100, 8: 100, 9: 100, 18: 100, 3: 100, 4: 57, 6: 30, 11: 24, 17: 9})





In [29]:
gamma = 0.5
lr = 0.01
num_epochs = 50
hl = 1024

gammas = [-0.1, 0] #Positive Gamma is shit
lrs = [1, 0.1, 0.01, 0.001]
num_epochs_s = [10, 30, 50, 100]
hidden_layers = [64, 128, 256, 512]
device = 'cuda:0'

a = [gammas, lrs, num_epochs_s, hidden_layers]
grid = list(itertools.product(*a))
print(len(grid))
accuracies = []


#RIGHT VALUES :

if True:
    gammas = [0] #Positive Gamma is shit
    lrs = [0.01]
    num_epochs_s = [50]
    hidden_layers = [64]
    device = 'cuda:0'
    a = [gammas, lrs, num_epochs_s, hidden_layers]
    grid = list(itertools.product(*a))


for gamma, lr, num_epochs, hl in tqdm(grid):
    
    
    model = ClassifierNetwork(512, hl, 22).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_model(model, X_train, y_train, criterion, optimizer, num_epochs)
    
    accuracy_test, outputs = test_model(model, X_test, y_test)
    accuracies.append(round(accuracy_test, 2))
    
    model_all = ClassifierNetwork(512, hl, 22).to(device)
    criterion_all = nn.CrossEntropyLoss()
    optimizer_all = torch.optim.Adam(model.parameters(), lr=lr)
    train_model(model_all, embeddings_all, mels_label_new, criterion_all, optimizer_all, num_epochs)
    #_, outputs_all = test_model(model_all, X_test, y_test)
    
    
df_summary = pd.DataFrame(grid, columns=['Gamma', 'Learning Rate', 'Num Epochs', 'Hidden Layers'])
df_summary['Accuracy'] = accuracies

128


100%|██████████| 1/1 [00:00<00:00,  6.27it/s]


In [30]:
df_summary.sort_values('Accuracy', ascending=False)

Unnamed: 0,Gamma,Learning Rate,Num Epochs,Hidden Layers,Accuracy
0,0,0.01,50,64,41.88


In [31]:
classification = classification_report(y_test, outputs.cpu().numpy(), target_names=dict_label)
print(classification)

              precision    recall  f1-score   support

      akiapo       0.30      0.28      0.29        50
      aniani       0.45      0.54      0.49        50
      apapan       0.10      0.10      0.10        50
      barpet       0.85      0.80      0.82        50
      crehon       0.38      0.39      0.39        28
      elepai       0.22      0.24      0.23        50
      ercfra       0.43      0.40      0.41        15
      hawama       0.37      0.36      0.36        50
      hawcre       0.31      0.32      0.31        50
      hawgoo       0.70      0.56      0.62        50
      hawhaw       0.84      0.94      0.89        50
     hawpet1       0.56      0.42      0.48        12
      houfin       0.38      0.34      0.36        50
        iiwi       0.33      0.30      0.31        50
      jabwar       0.40      0.38      0.39        50
      maupar       0.34      0.42      0.38        50
        omao       0.29      0.32      0.30        50
      puaioh       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# SAVE DICT and MODEL

In [32]:
with open("dico_birds.json", "w") as fp:
    json.dump(dict_label, fp, indent = 4)

PATH = 'model_pytorch.pt'
model_scripted = torch.jit.script(model.cpu()) # Export to TorchScript
model_scripted.save(PATH) # Save

PATH_ALL = 'model_pytorch_all.pt'
model_scripted_all = torch.jit.script(model_all.cpu()) # Export to TorchScript
model_scripted_all.save(PATH_ALL) # Save

# TRY RELOAD

In [33]:
model_reloaded = torch.jit.load(PATH)
print(model_reloaded)

x_load = torch.tensor(X_test, dtype=torch.float32, device='cpu')
outputs_loaded = model_reloaded(x_load)
_, predicted_loaded = torch.max(outputs_loaded.data, 1)

print(classification_report(y_test, predicted_loaded.numpy(), target_names=dict_label))

RecursiveScriptModule(
  original_name=ClassifierNetwork
  (fc1): RecursiveScriptModule(original_name=Linear)
  (tanh): RecursiveScriptModule(original_name=Tanh)
  (fc2): RecursiveScriptModule(original_name=Linear)
)
              precision    recall  f1-score   support

      akiapo       0.30      0.28      0.29        50
      aniani       0.45      0.54      0.49        50
      apapan       0.10      0.10      0.10        50
      barpet       0.85      0.80      0.82        50
      crehon       0.38      0.39      0.39        28
      elepai       0.22      0.24      0.23        50
      ercfra       0.43      0.40      0.41        15
      hawama       0.37      0.36      0.36        50
      hawcre       0.31      0.32      0.31        50
      hawgoo       0.70      0.56      0.62        50
      hawhaw       0.84      0.94      0.89        50
     hawpet1       0.56      0.42      0.48        12
      houfin       0.38      0.34      0.36        50
        iiwi       0.33   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
