# Inference for Model Testing

### INSTRUCTIONS

In [None]:
!pip install pyngrok
from pyngrok import ngrok

1. Install all required libraries.
2. Download weights for pre-trained model to extract feature representations of audio files
3. Load weights for rnn_epoch_100_R2 model provided.
4. Load all .npy files. This is required to map the prdicted genre id to a genre class as defined in the other notebook.
5. Ensure the flask_app folder provided in the zip folder is avaialble on your google drive. 
6. Sign up for ngrok and provide authentication code below.
7. Run python app.py.
8. You can provide the flask app with either a .WAV file or a youtube link as input.
9. Youtube link takes a few minutes to first download the song and then conducts inference.

In [None]:
!ngrok authtoken #add token

In [None]:
!pip install mirdata
!pip install essentia-tensorflow
!pip install youtube_dl
!pip install pydub

Collecting mirdata
  Downloading mirdata-0.3.6-py3-none-any.whl (13.1 MB)
[K     |████████████████████████████████| 13.1 MB 4.3 MB/s 
Collecting jams
  Downloading jams-0.3.4.tar.gz (51 kB)
[K     |████████████████████████████████| 51 kB 72 kB/s 
Collecting pretty-midi>=0.2.8
  Downloading pretty_midi-0.2.9.tar.gz (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 29.2 MB/s 
[?25hCollecting Deprecated>=1.2.13
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Collecting mido>=1.1.16
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 5.5 MB/s 
Collecting jsonschema>=3.0.0
  Downloading jsonschema-4.2.1-py3-none-any.whl (69 kB)
[K     |████████████████████████████████| 69 kB 6.5 MB/s 
[?25hCollecting mir_eval>=0.5
  Downloading mir_eval-0.6.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 6.2 MB/s 
Building wheels for collected packages: pretty-midi, jams, mir-eval
  Building wheel for pretty-midi

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

In [None]:
import torch

import torchaudio
import torch.nn.functional as F
from torch.utils.data import Dataset
from torchaudio.datasets import GTZAN
from torchaudio.datasets.utils import download_url
from torch.utils.data import DataLoader
import torchaudio.transforms as tt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [5]:
import os
from youtube_dl import YoutubeDL
import IPython
from pydub import AudioSegment

audio_downloder = YoutubeDL({'format':'bestaudio'})

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn


import essentia.standard as es
import mirdata
import numpy as np

import json

from collections import Counter
from sklearn import preprocessing

In [None]:
!curl -SLO https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-1.json
!curl -SLO https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-1.pb

MODEL_NAME = 'genre_tzanetakis-musicnn-msd-1'
MODEL_JSON = f'{MODEL_NAME}.json'
MODEL_PB = f'{MODEL_NAME}.pb'

musicnn_metadata = json.load(open(MODEL_JSON, 'r'))
for k, v in musicnn_metadata.items():
    print('{}: {}'.format(k , v))

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2092  100  2092    0     0    835      0  0:00:02  0:00:02 --:--:--   835
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3166k  100 3166k    0     0   331k      0  0:00:09  0:00:09 --:--:--  289k
name: genre GTZAN
type: multi-class classifier
link: https://essentia.upf.edu/models/classifiers/genre_tzanetakis/genre_tzanetakis-musicnn-msd-1.pb
version: 1
description: classification of music by genre
author: Pablo Alonso
email: pablo.alonso@upf.edu
release_date: 2020-03-31
framework: tensorflow
framework_version: 1.15.0
classes: ['blu', 'cla', 'cou', 'dis', 'hip', 'jaz', 'met', 'pop', 'reg', 'roc']
model_types: ['frozen_model']
dataset: {'name': 'the GTZAN Genre Collection', 'citation': '@article{tzanetakis2002music

In [None]:
MUSICNN_SR = 16000 #We will fix sample rate at 16 kHz as it is required for the input of MusiCNN model.
def extract_mean_embedding(filename):
  """
  Extract mean-temporal embedding from audio contained in filename

  Args:
    filename (str): Name of the audio file

  Return:
    Mean embedding of the song
  """
  
  # Load audiofile with essentia monoloader to resample the audios to the necessary sample rate in MusiCNN model
  audio = es.MonoLoader(filename=filename, sampleRate=MUSICNN_SR)()

  # Extract the embedding
  musicnn_emb = es.TensorflowPredictMusiCNN(graphFilename=MODEL_PB, output='model/dense/BiasAdd')(audio)

  # Compute mean-embedding across the frames
  mean_emb = np.mean(musicnn_emb, axis=0)
  mean_emb = mean_emb[np.newaxis, :]  # Each song is a 1x200 row vector

  return mean_emb

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=0.4)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x = x.unsqueeze(0)
        # print("Input Shape",x.shape)
        h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).float())
        c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size).float())
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out) 
        return out

In [None]:
model = torch.load('/content/rnn_epoch_100_R2',map_location=torch.device('cpu'))

In [None]:
with open('/content/embeddings.npy', 'rb') as f:
    embeddings = np.load(f)
with open('/content/labels.npy', 'rb') as f:
    labels = np.load(f)
with open('/content/labels_decoded.npy', 'rb') as f:
    labels_decoded = np.load(f)
with open('/content/track_ids.npy', 'rb') as f:
    track_ids = np.load(f)

genres = {genre_id: genre for genre_id, genre in zip(labels, labels_decoded)}

In [None]:
def get_genre(wav_file=None,youtube_link=None):
  if youtube_link:
    info = audio_downloder.extract_info(url=youtube_link, download=True)
    wav = AudioSegment.from_file(info['title']+'-'+info['display_id']+'.'+info['ext'])
    wav.export("temp.wav", format="wav")
    features = extract_mean_embedding("temp.wav")
    os.remove("temp.wav")
  else:
    features = extract_mean_embedding(wav_file)

  feature_tensor = torch.from_numpy(features)
  outputs = model(feature_tensor).squeeze(0)
  _, predicted = torch.max(outputs, 1)
  print(genres[predicted.item()])

In [None]:
get_genre(youtube_link="https://www.youtube.com/watch?v=qyYmS_iBcy4")

[youtube] qyYmS_iBcy4: Downloading webpage
[download] Destination: LORNA SHORE - To the Hellfire (OFFICIAL VIDEO)-qyYmS_iBcy4.m4a
[download] 100% of 5.64MiB in 01:12
[ffmpeg] Correcting container in "LORNA SHORE - To the Hellfire (OFFICIAL VIDEO)-qyYmS_iBcy4.m4a"
metal


### Flask Application

In [None]:
cd /content/gdrive/MyDrive/machine-learning-deployment/flask_app

/content/gdrive/MyDrive/machine-learning-deployment/predict sales


In [None]:
!python app.py

2021-12-05 22:10:16.181069: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
[0;32m[   INFO   ] [0mMusicExtractorSVM: no classifier models were configured by default
2021-12-05 22:10:16.901406: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-05 22:10:16.903151: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2021-12-05 22:10:16.916527: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-12-05 22:10:16.916600: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel d