In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! unzip /content/drive/MyDrive/CS2/train.zip -d train

Archive:  /content/drive/MyDrive/CS2/train.zip
  inflating: train/00204008d.flac    
  inflating: train/003b04435.flac    
  inflating: train/003bec244.flac    
  inflating: train/005f1f9a5.flac    
  inflating: train/006ab765f.flac    
  inflating: train/0072f0839.flac    
  inflating: train/0079ff47b.flac    
  inflating: train/007f87ba2.flac    
  inflating: train/00834f88e.flac    
  inflating: train/008c9c381.flac    
  inflating: train/0099c367b.flac    
  inflating: train/009aa1c8f.flac    
  inflating: train/009b760e6.flac    
  inflating: train/009fbc7b4.flac    
  inflating: train/00ad36516.flac    
  inflating: train/00b404881.flac    
  inflating: train/00d442df7.flac    
  inflating: train/00e3525a2.flac    
  inflating: train/00f3b3c1e.flac    
  inflating: train/011f25080.flac    
  inflating: train/013716dbf.flac    
  inflating: train/015113cad.flac    
  inflating: train/0151b7d20.flac    
  inflating: train/015aa6c7c.flac    
  inflating: train/016240b9a.flac    
  i

In [None]:
! pip install -q tensorflow-io

[K     |████████████████████████████████| 25.3MB 138kB/s 
[?25h

## 1. Importing necessary libraries

In [None]:
import tensorflow as tf
import numpy as np 
import tensorflow_io as tfio
import pandas as pd
from datetime import datetime,timedelta
import os

In [None]:
# Loading the dataset 
train = pd.read_csv('/content/drive/MyDrive/CS2/train_tp.csv')
train

Unnamed: 0,recording_id,species_id,songtype_id,t_min,f_min,t_max,f_max
0,003bec244,14,1,44.5440,2531.250,45.1307,5531.25
1,006ab765f,23,1,39.9615,7235.160,46.0452,11283.40
2,007f87ba2,12,1,39.1360,562.500,42.2720,3281.25
3,0099c367b,17,4,51.4206,1464.260,55.1996,4565.04
4,009b760e6,10,1,50.0854,947.461,52.5293,10852.70
...,...,...,...,...,...,...,...
1211,fe8d9ac40,13,1,53.4720,93.750,54.0960,843.75
1212,fea6b438a,4,1,43.5787,2531.250,45.7653,4031.25
1213,ff2eb9ce5,0,1,15.2267,5906.250,16.0213,8250.00
1214,ffb8d8391,5,1,14.3467,4781.250,16.6987,10406.20


## 2. Defining the functions

In [None]:
def final_fun_unquantized(file):
    # Saving the starting time
    start1 = datetime.now()

    # Creating mel-spectrogram
    audio = tfio.audio.AudioIOTensor('train/'+file['recording_id']+'.flac')
    audio_slice = audio[100:]

    # remove last dimension
    audio_tensor = tf.squeeze(audio_slice, axis=[-1])
    audio_tensor = tf.cast(audio_tensor, tf.float32)

    # Convert to spectrogram
    spectrogram = tfio.experimental.audio.spectrogram(audio_tensor, nfft=2048, window=2048, stride=512)

    # Convert to mel-spectrogram
    mel_spectrogram = tfio.experimental.audio.melscale(spectrogram, rate=48000, mels=384, fmin=40, fmax=24000)

    # Convert to db scale mel-spectrogram
    mel_spectrogram = tfio.experimental.audio.dbscale(mel_spectrogram, top_db=80)

    # Expanding the dimensions of spectrograms by 1 
    image = tf.expand_dims(mel_spectrogram, axis= -1)
    # Resizing the spectrogram
    image = tf.image.resize(image, [384, 768])
    # Converting the spectrogram to rgb
    image = tf.image.grayscale_to_rgb(image)
    # Expanding the dims for input data
    image = tf.expand_dims(image,axis = 0)

    start2 = datetime.now()
    # Creating the model
    backbone = tf.keras.applications.DenseNet121(include_top = False,input_shape = (384,768,3), weights="imagenet")

    for layer in backbone.layers[:0]:
        layer.trainable = False

    model = tf.keras.Sequential([
                backbone,
                tf.keras.layers.GlobalAveragePooling2D(),
                tf.keras.layers.Dense(128, activation='relu', kernel_initializer=tf.keras.initializers.he_normal()),
                tf.keras.layers.Dropout(0.3),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dense(24,bias_initializer=tf.keras.initializers.Constant(-2.))])
    
    # Loading the model weights and predicting the ouput
    model.load_weights('/content/drive/MyDrive/CS2/Models/Saved_model.h5')
    output = tf.sigmoid(model(image)).numpy()

    # Printing the time taken 
    end = datetime.now()
    print('Time taken to load the model and predict the value :',end - start2) 
    print('Time taken including preprocessing the audio files :',end - start1) 

    # Returning the output
    return output[0], (end - start1)

In [None]:
def final_fun_float16(file):
    # Saving the starting time
    start1 = datetime.now()

    # Creating mel-spectrogram
    audio = tfio.audio.AudioIOTensor('train/'+file['recording_id']+'.flac')
    audio_slice = audio[100:]

    # remove last dimension
    audio_tensor = tf.squeeze(audio_slice, axis=[-1])
    audio_tensor = tf.cast(audio_tensor, tf.float32)

    # Convert to spectrogram
    spectrogram = tfio.experimental.audio.spectrogram(audio_tensor, nfft=2048, window=2048, stride=512)

    # Convert to mel-spectrogram
    mel_spectrogram = tfio.experimental.audio.melscale(spectrogram, rate=48000, mels=384, fmin=40, fmax=24000)

    # Convert to db scale mel-spectrogram
    mel_spectrogram = tfio.experimental.audio.dbscale(mel_spectrogram, top_db=80)

    # Expanding the dimensions of spectrograms by 1 
    image = tf.expand_dims(mel_spectrogram, axis= -1)
    # Resizing the spectrogram
    image = tf.image.resize(image, [384, 768])
    # Converting the spectrogram to rgb
    image = tf.image.grayscale_to_rgb(image)
    # Expanding the dims for input data
    image = tf.expand_dims(image,axis = 0)

    start2 = datetime.now()
    # Load the TFLite model and allocate tensors.
    interpreter = tf.lite.Interpreter(model_path='/content/drive/MyDrive/CS2/Models/float16_quantization.tflite')
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], image)
    interpreter.invoke()

    # The function `get_tensor()` returns a copy of the tensor data.
    # Use `tensor()` in order to get a pointer to the tensor.
    pred = interpreter.get_tensor(output_details[0]['index'])
    output = tf.sigmoid(pred).numpy()

    # Printing the time taken 
    end = datetime.now()
    print('Time taken to load the model and predict the value :',end - start2) 
    print('Time taken including preprocessing the audio files :',end - start1) 

    return output[0], (end - start1)

In [None]:
def final_fun_dynamic(file):
    # Saving the starting time
    start1 = datetime.now()

    # Creating mel-spectrogram
    audio = tfio.audio.AudioIOTensor('train/'+file['recording_id']+'.flac')
    audio_slice = audio[100:]

    # remove last dimension
    audio_tensor = tf.squeeze(audio_slice, axis=[-1])
    audio_tensor = tf.cast(audio_tensor, tf.float32)

    # Convert to spectrogram
    spectrogram = tfio.experimental.audio.spectrogram(audio_tensor, nfft=2048, window=2048, stride=512)

    # Convert to mel-spectrogram
    mel_spectrogram = tfio.experimental.audio.melscale(spectrogram, rate=48000, mels=384, fmin=40, fmax=24000)

    # Convert to db scale mel-spectrogram
    mel_spectrogram = tfio.experimental.audio.dbscale(mel_spectrogram, top_db=80)

    # Expanding the dimensions of spectrograms by 1 
    image = tf.expand_dims(mel_spectrogram, axis= -1)
    # Resizing the spectrogram
    image = tf.image.resize(image, [384, 768])
    # Converting the spectrogram to rgb
    image = tf.image.grayscale_to_rgb(image)
    # Expanding the dims for input data
    image = tf.expand_dims(image,axis = 0)

    start2 = datetime.now()
    # Load the TFLite model and allocate tensors.
    interpreter = tf.lite.Interpreter(model_path='/content/drive/MyDrive/CS2/Models/dynamic_quantization.tflite')
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.set_tensor(input_details[0]['index'], image)
    interpreter.invoke()

    # The function `get_tensor()` returns a copy of the tensor data.
    # Use `tensor()` in order to get a pointer to the tensor.
    pred = interpreter.get_tensor(output_details[0]['index'])
    output = tf.sigmoid(pred).numpy()

    # Printing the time taken 
    end = datetime.now()
    print('Time taken to load the model and predict the value :',end - start2) 
    print('Time taken including preprocessing the audio files :',end - start1) 

    return output[0], (end - start1)

## 3.1 Running on CPU

### Unquantized model

In [None]:
with tf.device('/cpu:0'):
    time_list = [] 
    for i in range(5):
        index = np.random.randint(0,1216)
        val,time = final_fun_unquantized(train.iloc[index])
        print('actual value:', train.iloc[index]['species_id'])
        print('top 3 predicted values according to probabilities:', val.argsort()[-3:][::-1])
        print('-'*100)
        time_list.append(time)

    mysum = timedelta()
    for i in time_list:
        mysum += i
    print('Average time taken for prediction: ',str(mysum/5))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
Time taken to load the model and predict the value : 0:00:05.175729
Time taken including preprocessing the audio files : 0:00:07.079130
actual value: 14
top 3 predicted values according to probabilities: [14 21  8]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:03.897237
Time taken including preprocessing the audio files : 0:00:05.323181
actual value: 18
top 3 predicted values according to probabilities: [18  4 12]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:04.082207
Time taken including preprocessing the audio files : 0:00:05.464689
actual value: 17
top 3 predicted values according to probabilities: [17 11 23]
-----

### float16 quantized model

In [None]:
with tf.device('/cpu:0'):
    time_list = [] 
    for i in range(5):
        index = np.random.randint(0,1216)
        val,time = final_fun_float16(train.iloc[index])
        print('actual value:', train.iloc[index]['species_id'])
        print('top 3 predicted values according to probabilities:', val.argsort()[-3:][::-1])
        print('-'*100)
        time_list.append(time)

    mysum = timedelta()
    for i in time_list:
        mysum += i
    print('Average time taken for prediction: ',str(mysum/5))

Time taken to load the model and predict the value : 0:00:01.144907
Time taken including preprocessing the audio files : 0:00:02.510704
actual value: 22
top 3 predicted values according to probabilities: [23 22  9]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:00.652231
Time taken including preprocessing the audio files : 0:00:02.019836
actual value: 18
top 3 predicted values according to probabilities: [ 0 18  8]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:00.638172
Time taken including preprocessing the audio files : 0:00:02.005120
actual value: 3
top 3 predicted values according to probabilities: [2 3 0]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:0

### Dynamic Quantized model

In [None]:
with tf.device('/cpu:0'):
    time_list = [] 
    for i in range(5):
        index = np.random.randint(0,1216)
        val,time = final_fun_dynamic(train.iloc[index])
        print('actual value:', train.iloc[index]['species_id'])
        print('top 3 predicted values according to probabilities:', val.argsort()[-3:][::-1])
        print('-'*100)
        time_list.append(time)

    mysum = timedelta()
    for i in time_list:
        mysum += i
    print('Average time taken for prediction: ',str(mysum/5))

Time taken to load the model and predict the value : 0:00:43.578719
Time taken including preprocessing the audio files : 0:00:44.940260
actual value: 11
top 3 predicted values according to probabilities: [11  7  5]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:43.149642
Time taken including preprocessing the audio files : 0:00:44.554716
actual value: 21
top 3 predicted values according to probabilities: [21  8  4]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:43.045146
Time taken including preprocessing the audio files : 0:00:44.427445
actual value: 20
top 3 predicted values according to probabilities: [11 16  3]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value :

## 3.2 Running on GPU

### Unquantized model

In [None]:
with tf.device('/gpu:0'):
    time_list = [] 
    for i in range(5):
        index = np.random.randint(0,1216)
        val,time = final_fun_unquantized(train.iloc[index])
        print('actual value:', train.iloc[index]['species_id'])
        print('top 3 predicted values according to probabilities:', val.argsort()[-3:][::-1])
        print('-'*100)
        time_list.append(time)

    mysum = timedelta()
    for i in time_list:
        mysum += i
    print('Average time taken for prediction: ',str(mysum/5))

Time taken to load the model and predict the value : 0:00:09.089723
Time taken including preprocessing the audio files : 0:00:11.896693
actual value: 9
top 3 predicted values according to probabilities: [23 12 18]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:03.562916
Time taken including preprocessing the audio files : 0:00:03.673237
actual value: 23
top 3 predicted values according to probabilities: [11 20 23]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:03.413133
Time taken including preprocessing the audio files : 0:00:03.513629
actual value: 7
top 3 predicted values according to probabilities: [15  7 18]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0

### float16 quantized model

In [None]:
with tf.device('/gpu:0'):
    time_list = [] 
    for i in range(5):
        index = np.random.randint(0,1216)
        val,time = final_fun_float16(train.iloc[index])
        print('actual value:', train.iloc[index]['species_id'])
        print('top 3 predicted values according to probabilities:', val.argsort()[-3:][::-1])
        print('-'*100)
        time_list.append(time)

    mysum = timedelta()
    for i in time_list:
        mysum += i
    print('Average time taken for prediction: ',str(mysum/5))

Time taken to load the model and predict the value : 0:00:00.711897
Time taken including preprocessing the audio files : 0:00:00.839132
actual value: 14
top 3 predicted values according to probabilities: [14 21  8]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:00.654630
Time taken including preprocessing the audio files : 0:00:00.764751
actual value: 3
top 3 predicted values according to probabilities: [16  3  7]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:00.647121
Time taken including preprocessing the audio files : 0:00:00.744098
actual value: 14
top 3 predicted values according to probabilities: [14 21  1]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 

### Dynamic Quantized Model

In [None]:
with tf.device('/gpu:0'):
    time_list = [] 
    for i in range(5):
        index = np.random.randint(0,1216)
        val,time = final_fun_dynamic(train.iloc[index])
        print('actual value:', train.iloc[index]['species_id'])
        print('top 3 predicted values according to probabilities:', val.argsort()[-3:][::-1])
        print('-'*100)
        time_list.append(time)

    mysum = timedelta()
    for i in time_list:
        mysum += i
    print('Average time taken for prediction: ',str(mysum/5))

Time taken to load the model and predict the value : 0:00:43.279466
Time taken including preprocessing the audio files : 0:00:43.388766
actual value: 23
top 3 predicted values according to probabilities: [23  9  3]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:43.201110
Time taken including preprocessing the audio files : 0:00:43.301465
actual value: 11
top 3 predicted values according to probabilities: [11 10  7]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 0:00:43.160758
Time taken including preprocessing the audio files : 0:00:43.254544
actual value: 5
top 3 predicted values according to probabilities: [23 22 12]
----------------------------------------------------------------------------------------------------
Time taken to load the model and predict the value : 