# Dependencies

In [1]:
!wget 'https://zenodo.org/record/1188976/files/Audio_Song_Actors_01-24.zip?download=1'
!wget 'https://zenodo.org/record/1188976/files/Audio_Speech_Actors_01-24.zip?download=1'
!mkdir '/content/data'
!unzip '/content/Audio_Song_Actors_01-24.zip?download=1' -d '/content/data/'
!unzip '/content/Audio_Speech_Actors_01-24.zip?download=1' -d '/content/data/'
!rm 'Audio_Song_Actors_01-24.zip?download=1'
!rm 'Audio_Speech_Actors_01-24.zip?download=1'
data_dir = '/content/data'

--2020-12-13 13:32:38--  https://zenodo.org/record/1188976/files/Audio_Song_Actors_01-24.zip?download=1
Resolving zenodo.org (zenodo.org)... 137.138.76.77
Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 225505317 (215M) [application/octet-stream]
Saving to: ‘Audio_Song_Actors_01-24.zip?download=1’


2020-12-13 13:33:16 (5.95 MB/s) - ‘Audio_Song_Actors_01-24.zip?download=1’ saved [225505317/225505317]

--2020-12-13 13:33:16--  https://zenodo.org/record/1188976/files/Audio_Speech_Actors_01-24.zip?download=1
Resolving zenodo.org (zenodo.org)... 137.138.76.77
Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 208468073 (199M) [application/octet-stream]
Saving to: ‘Audio_Speech_Actors_01-24.zip?download=1’


2020-12-13 13:33:48 (6.61 MB/s) - ‘Audio_Speech_Actors_01-24.zip?download=1’ saved [208468073/208468073]

Archive:  /content/Audio_Son

In [51]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import Sequential,Model
from keras.layers import merge,concatenate,Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D, GlobalMaxPooling2D, GlobalAveragePooling1D, AveragePooling2D, Input, Add, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import normalize
from keras.models import model_from_json
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
from keras.callbacks import Callback
#from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
import matplotlib.pyplot as plt
from tensorflow.python.keras import backend as K
from sklearn.preprocessing import StandardScaler
from imgaug import augmenters as iaa
import imgaug as ia
from scipy.io.wavfile import write
from scipy import interp
import random
from tqdm import tqdm
import pandas as pd
import numpy as np
import librosa 
import pickle
import librosa.display
import uuid
import json
import pylab
import cv2
import os
import gc
import io
import functools 

import sys
import warnings
warnings.simplefilter("ignore")

In [3]:
!mkdir '/content/melspecs'

# GET MELSPECS

In [124]:
class feature_extractor:
  def __init__(self,datadir,savedir,song_speech ='02',splitnotsplit = True):
    self.data   = {}
    self.sdir   = savedir
    self.dir    = datadir
    self.sns    = splitnotsplit
    self.sp     = song_speech
    self.labels = {'01':['neutral',0],'02':['calm',1],'03':['happy',2],'04':['sad',3],'05':['angry',4],'06':['fearful',5],'07':['disgust',6],'08':['surprised',7]}


  def navigate(self,):
    ''' 
    ** Only with sound files
    This takes:
    1- Data directory: where to navigate to find audio files
    2- song_speech   : 02 to extract melspectogram from song audio files/ 01 to extract them from speech audio files

    This returns/formulates:
    1- A dictionary holds each melspectogram directory with its label/metadata 
    '''
    key = None
    for dirname, _, filenames in os.walk(self.dir):
      for filename in filenames:
        meta = self._splitter(filename)
        if (meta is None): continue

        meta['Dir'] = os.path.join(dirname,filename)

        key         = str (uuid.uuid1()).split('-')[0]
        self.data[key] = meta 

  def _splitter(self,filename,):
    '''
    this decodes file name into:
    1- VocalChannel 
    2- Class [number from 0 to 7]
    3- Intensity level: 0 for non intense -- 1 for intense
    4- WhichStatement : 0 for kids -- 1 for dogs
    5- Is it a repeated record: 0 for no -- 1 for yes
    6- Gender: 0 for male -- 1 for female
    '''
    codes = filename.split('.')[0].split('-')

    if (codes[1] != self.sp):
      return None
    
    meta = {}
    meta['VocalChannel']   = 'song' if codes[1] == self.sp else 'speech'
    meta['Class']          = self.labels[codes[2]]
    meta['IntenseVoice']   = 0 if codes[3] == '01' else 1
    meta['WhichStatement'] = 0 if codes[4] == '01' else 1
    meta['Repeated?']      = 0 if codes[5] == '01' else 1
    meta['gender']         = 0 if (int(codes[6])%2 != 0) else 1
  
    return meta
  
  def extract(self,removeaudio=False,whichto = 'both',chunk=3):
    '''
    takes: 
    1-removeaudio: True in case deletion of audio file after extracting features
    2-whichto    : 'both' for melspec+mfccs -- 'mel' for mel only -- 'mfcc' for mfccs only
    3-chunk      : chunk of audio file to split
    '''
    audio,sr = None,None   
    for key in tqdm(self.data.keys()):
      audio,sr,hop_length,win_length  = self._process_cough_file(self.data[key]['Dir'],self.sns,removeaudio,chunk=chunk)

      if (whichto =='mel'):
        self.data[key]['Dir']  = self._get_melspec(self.data[key]['Dir'],audio,sr,key)

      elif (whichto == 'mfcc'):
        self.data[key]['Mfcc'] = self._get_MFCCS(audio,sr,hop_length,win_length)

      else:
        self.data[key]['Dir']  = self._get_melspec(self.data[key]['Dir'],audio,sr,key)
        self.data[key]['Mfcc'] = self._get_MFCCS(audio,sr,hop_length,win_length)

  def _process_cough_file(self,path, splitnotsplit,removeaudio=False ,chunk=3):
    x,sr = librosa.load(path, sr=48000)        
    if len(x)/sr < 0.3 or len(x)/sr > 30:
        return None,None
    
    if removeaudio:
      os.remove(path)

    if (splitnotsplit == False):
      return x,sr
    
    x = self._trim_silence(x, pad=0.25*sr, db_max=50) 
    x = x[:np.floor(chunk*sr).astype(int)]
    
    #pads to chunk size if smaller
    x_pad = np.zeros(int(sr*chunk))
    x_pad[:min(len(x_pad), len(x))] = x[:min(len(x_pad), len(x))]
    
    hop_length = np.floor(0.010*sr).astype(int) #10ms
    win_length = np.floor(0.020*sr).astype(int) #32ms
    return x_pad,sr,hop_length,win_length

  # Trims leading and trailing silence
  def _trim_silence(self,x, pad=0, db_max=50):
    _, ints = librosa.effects.trim(x, top_db=db_max, frame_length=256, hop_length=64)
    start   = int(max(ints[0]-pad, 0))
    end     = int(min(ints[1]+pad, len(x)))
    return x[start:end]

  def _get_chromaCQT(self,dir,audio,sr,name):
    plt.ioff()
    fig    = plt.figure()
    chroma = librosa.feature.chroma_cqt(y=audio,sr=sr)
    librosa.display.specshow(chroma)
    fig.canvas.draw()
    chroma = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    chroma = chroma.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig=fig)
    #chroma = chroma[80:250,80:300]

    savepath = os.path.join(self.sdir,name+'.png')
    cv2.imwrite(savepath,chroma)

    return savepath

  def _get_melspec(self,dir,audio,sr,name):
    #Mel Spectogram
    plt.ioff()
    fig      = plt.figure()
    melspec  = librosa.feature.melspectrogram(y=audio,sr=sr)
    s_db     = librosa.power_to_db(melspec, ref=np.max)
    librosa.display.specshow(s_db)
    fig.canvas.draw()
    img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig=fig)
    #img = img[80:250,80:300]

    savepath = os.path.join(self.sdir,name+'.png')
    cv2.imwrite(savepath,img)

    return savepath

    
  def _get_MFCCS(self,audio,sr,hop_length,win_length,final_dim=(300,200)):
    #For MFCCS 
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mels=200, n_mfcc=200, n_fft=2048, 
                                hop_length=hop_length)
    mfcc = np.swapaxes(mfcc, 0, 1)
    mfcc = mfcc[:final_dim[0], :final_dim[1]]
    return mfcc

In [125]:
Master = feature_extractor(data_dir,savedir='/content/melspecs')

In [126]:
Master.navigate()

In [127]:
Master.extract()


  0%|          | 0/1012 [00:00<?, ?it/s][A
  0%|          | 1/1012 [00:00<08:40,  1.94it/s][A
  0%|          | 2/1012 [00:01<08:47,  1.91it/s][A
  0%|          | 3/1012 [00:01<09:03,  1.86it/s][A
  0%|          | 4/1012 [00:02<09:02,  1.86it/s][A
  0%|          | 5/1012 [00:02<09:03,  1.85it/s][A
  1%|          | 6/1012 [00:03<09:03,  1.85it/s][A
  1%|          | 7/1012 [00:03<09:06,  1.84it/s][A
  1%|          | 8/1012 [00:04<09:02,  1.85it/s][A
  1%|          | 9/1012 [00:04<09:04,  1.84it/s][A
  1%|          | 10/1012 [00:05<09:03,  1.84it/s][A
  1%|          | 11/1012 [00:05<09:06,  1.83it/s][A
  1%|          | 12/1012 [00:06<09:14,  1.80it/s][A
  1%|▏         | 13/1012 [00:07<09:12,  1.81it/s][A
  1%|▏         | 14/1012 [00:07<09:11,  1.81it/s][A
  1%|▏         | 15/1012 [00:08<09:10,  1.81it/s][A
  2%|▏         | 16/1012 [00:08<09:08,  1.82it/s][A
  2%|▏         | 17/1012 [00:09<09:11,  1.80it/s][A
  2%|▏         | 18/1012 [00:09<09:09,  1.81it/s][A
  2%|▏    

In [128]:
Data = Master.data
Data

{'8fe7612a': {'Class': ['fearful', 5],
  'Dir': '/content/melspecs/8fe7612a.png',
  'IntenseVoice': 0,
  'Mfcc': array([[-8.36504838e+02,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.36504838e+02,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.36504838e+02,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         ...,
         [-5.43770511e+02,  2.21891866e+02, -2.68621626e+00, ...,
           8.93588909e-01, -1.00147993e+00, -1.54949020e-01],
         [-5.55323260e+02,  2.15963884e+02, -8.89991327e-02, ...,
           3.76125560e-01, -3.74775655e+00, -3.90112734e+00],
         [-4.94707986e+02,  2.76318226e+02,  2.40514101e+01, ...,
           2.10740550e+00, -3.97435861e-01, -3.78873635e-01]]),
  'Repeated?': 1,
  'VocalChannel': 'song',
  'WhichStatement': 0,
  'gender': 0},
 '8fe76440': {'Class':

In [129]:
Data

{'8fe7612a': {'Class': ['fearful', 5],
  'Dir': '/content/melspecs/8fe7612a.png',
  'IntenseVoice': 0,
  'Mfcc': array([[-8.36504838e+02,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.36504838e+02,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.36504838e+02,  0.00000000e+00,  0.00000000e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         ...,
         [-5.43770511e+02,  2.21891866e+02, -2.68621626e+00, ...,
           8.93588909e-01, -1.00147993e+00, -1.54949020e-01],
         [-5.55323260e+02,  2.15963884e+02, -8.89991327e-02, ...,
           3.76125560e-01, -3.74775655e+00, -3.90112734e+00],
         [-4.94707986e+02,  2.76318226e+02,  2.40514101e+01, ...,
           2.10740550e+00, -3.97435861e-01, -3.78873635e-01]]),
  'Repeated?': 1,
  'VocalChannel': 'song',
  'WhichStatement': 0,
  'gender': 0},
 '8fe76440': {'Class':

# Generate Reusable Data

In [130]:
names  = ['UIID','VocalChannel','IntenseVoice','Gender','Statement#','Repeated?','ClassMax','ClassName','Dir']
RAVDESS_SONGs = pd.DataFrame(columns=names)

for i,key in enumerate(Data.keys()):
  Data[key][key] = key
  temp_df        = pd.DataFrame({'UIID':Data[key][key],'VocalChannel':Data[key]['VocalChannel'],'IntenseVoice':Data[key]['IntenseVoice'],'Gender':Data[key]['gender']
                                 ,'Statement#':Data[key]['WhichStatement'],'Repeated?':Data[key]['Repeated?'],'ClassMax':Data[key]['Class'][1]
                                 ,'ClassName':Data[key]['Class'][0]
                                 ,'Dir':Data[key]['Dir']},index=[i])
  RAVDESS_SONGs  = pd.concat([RAVDESS_SONGs,temp_df])


In [26]:
RAVDESS_SONGs.to_csv('/content/RAVDESS-Songs-Meta.csv')

from google.colab import files
!zip -r RAVDESS_SONGs.zip '/content/melspecs'

#Download files
files.download('RAVDESS_SONGs.zip')
files.download('RAVDESS-Songs-Meta.csv')

  adding: content/melspecs/ (stored 0%)
  adding: content/melspecs/0ef6c0b0.png (deflated 5%)
  adding: content/melspecs/0ef527fa.png (deflated 5%)
  adding: content/melspecs/014abde8.png (deflated 4%)
  adding: content/melspecs/0ef4d3c2.png (deflated 5%)
  adding: content/melspecs/014926d6.png (deflated 6%)
  adding: content/melspecs/0ef669d0.png (deflated 4%)
  adding: content/melspecs/0ef60d14.png (deflated 3%)
  adding: content/melspecs/014a47b4.png (deflated 6%)
  adding: content/melspecs/0ef5f914.png (deflated 6%)
  adding: content/melspecs/0ef84d72.png (deflated 4%)
  adding: content/melspecs/0ef81f00.png (deflated 6%)
  adding: content/melspecs/014a7ad6.png (deflated 6%)
  adding: content/melspecs/01490b06.png (deflated 3%)
  adding: content/melspecs/0149e0a8.png (deflated 6%)
  adding: content/melspecs/014a0dee.png (deflated 3%)
  adding: content/melspecs/014abcee.png (deflated 4%)
  adding: content/melspecs/0ef5942e.png (deflated 4%)
  adding: content/melspecs/014af754.png (d

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Split

In [70]:
def index_to_list(elem,n_classes=6):
  arr = np.zeros(shape = (1,6))
  arr[0,elem] = 1
  return arr

In [185]:
Chosen = RAVDESS_SONGs#.loc[RAVDESS_SONGs['IntenseVoice'] == 0] 
Chosen = Chosen.sample(frac=1).reset_index(drop=True)

In [200]:
Datax = Chosen['Dir'].loc[Chosen['Repeated?'] == 0].tolist()
Datay = Chosen['ClassMax'].loc[Chosen['Repeated?']==0].tolist()
Datay = np.squeeze(np.array(list(map(index_to_list,Datay))))

'''
ValTestx = Chosen['Dir'].loc[Chosen['Repeated?'] == 1].tolist()
ValTesty = Chosen['ClassMax'].loc[Chosen['Repeated?']==1].tolist()
ValTesty = np.squeeze(np.array(list(map(index_to_list,ValTesty))))
'''

"\nValTestx = Chosen['Dir'].loc[Chosen['Repeated?'] == 1].tolist()\nValTesty = Chosen['ClassMax'].loc[Chosen['Repeated?']==1].tolist()\nValTesty = np.squeeze(np.array(list(map(index_to_list,ValTesty))))\n"

In [201]:
#Trainx= Trainx + ValTestx[140:]
#Trainy= np.append(Trainy,ValTesty[140:],axis=0)

Trainx= Datax[:int(0.8*len(Datax))]
Trainy= Datay[:int(0.8*len(Datax))]
Valx  = Datax[int(0.8*len(Datax)):int(0.9*len(Datax))]
Valy  = Datay[int(0.8*len(Datax)):int(0.9*len(Datax))]
Testx = Datax[int(0.9*len(Datax)):]
Testy = Datay[int(0.9*len(Datax)):]

# Pipeline + Logs 

In [135]:
class image_aug:
  def vertical_shift(self,img, ratio=0.0):
    if ratio > 1 or ratio < 0:
        print('Value should be less than 1 and greater than 0')
        return img
    ratio = random.uniform(-ratio, ratio)
    h, w = img.shape[:2]
    to_shift = h*ratio
    if ratio > 0:
        img = img[:int(h-to_shift), :, :]
    if ratio < 0:
        img = img[int(-1*to_shift):, :, :]
    img = cv2.resize(img, (h, w), cv2.INTER_CUBIC)
    return img

  def brightness(self,img, value):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return img

  def horizontal_shift(self,img, ratio=0.0):
    if ratio > 1 or ratio < 0:
        print('Value should be less than 1 and greater than 0')
        return img
    ratio = random.uniform(-ratio, ratio)
    h, w = img.shape[:2]
    to_shift = w*ratio
    if ratio > 0:
        img = img[:, :int(w-to_shift), :]
    if ratio < 0:
        img = img[:, int(-1*to_shift):, :]
    img = cv2.resize(img, (h, w), cv2.INTER_CUBIC)
    return img
    
  def channel_shift(self,img, value):
    value = int(random.uniform(-value, value))
    img = img + value
    img[:,:,:][img[:,:,:]>255]  = 255
    img[:,:,:][img[:,:,:]<0]  = 0
    img = img.astype(np.uint8)
    return img
    
  def rotation(self,img, angle):
    angle = int(random.uniform(-angle, angle))
    h, w = img.shape[:2]
    M = cv2.getRotationMatrix2D((int(w/2), int(h/2)), angle, 1)
    img = cv2.warpAffine(img, M, (w, h))
    return img
  
  def zoom(self,img, value):
    if value > 1 or value < 0:
        print('Value for zoom should be less than 1 and greater than 0')
        return img
    value = random.uniform(value, 1)
    h, w = img.shape[:2]
    h_taken = int(value*h)
    w_taken = int(value*w)
    h_start = random.randint(0, h-h_taken)
    w_start = random.randint(0, w-w_taken)
    img = img[h_start:h_start+h_taken, w_start:w_start+w_taken, :]
    img = cv2.resize(img, (h, w), cv2.INTER_CUBIC)
    return img
    
class MasterPipeline(tf.keras.utils.Sequence):
  def __init__(self,imgfiles,labels,batch_size,target_size=(64,64),shuffle=False,scale=255,n_classes=2,n_channels=3,augment=False,aseed=[650,250]):
    self.batch_size = batch_size
    self.dim        = target_size
    self.labels     = labels
    self.imgfiles   = imgfiles
    self.n_classes  = n_classes
    self.shuffle    = shuffle
    self.n_channels = n_channels
    self.scale      = scale
    
    self.augment    = augment
    self.augmentinfo= aseed

    self.on_epoch_end()
  
  def __augment(self,img):
    key = random.randint(0,8)
    augmenter = image_aug()
    aug_img   = None
    return img

    if (key == 0):
      aug_img = augmenter.brightness(img,random.randint(0,100))
    elif (key == 1):
      aug_img = augmenter.rotation(img,random.randint(0,360))
    elif (key == 2):
      aug_img = augmenter.horizontal_shift(img,random.random())
    elif (key == 3):
      aug_img = augmenter.zoom(img,random.random())
    elif (key == 4):
      aug_img = augmenter.vertical_shift(img,random.random())
    else:
      aug_img = img

    del augmenter
    return np.array(aug_img)

  def __len__(self):
    return int(np.floor(len(self.imgfiles) / self.batch_size))

  def __getitem__(self, index):
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    X, y = self.__data_generation(indexes)
    return X, y

  def on_epoch_end(self):
    self.iaa        = 0
    self.aseed      = [random.randint(0,self.augmentinfo[0]) for _ in range(self.augmentinfo[1])]

    self.indexes = np.arange(len(self.imgfiles))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)
  
  def __data_generation(self, list_IDs_temp):
    X = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size,self.n_classes))

    # Generate data
    for i, ID in enumerate(list_IDs_temp):
      #Mel Spectogram
      img   = cv2.imread(self.imgfiles[ID])
      img   = cv2.resize(img,tuple(reversed(self.dim)),interpolation = cv2.INTER_CUBIC)
      X[i,] = img / self.scale
      
      # Store class
      y[i]  = self.labels[ID]
    
    if ((self.augment == True) and (self.iaa in self.aseed)):
      X = np.array(list(map(self.__augment, X))).astype(np.float32)
    
    self.iaa += 1
    return X, y

In [None]:
class model_logs:
  def __init__(self,model,path,name,labels,test_gen,n_classes=6):
    self.model = model #Pointer to the model
    self.path  = path
    self.name  = name

    self.preds = self.model.predict(test_gen)
    self.labels= labels
    self.n_classes = 6

  def Save_ROC(self,info,name,ROCdir='/content'):
    plt.figure()
    plt.plot([0, 1], [0, 1], 'k--')
    #FPR - TPR - AUC
    plt.plot(info[0], info[1], label='area = {:.3f}'.format(info[2]))
    plt.legend(loc='best')
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve')
    plt.savefig(os.path.join(ROCdir,name+'.png'))
    plt.close()

  def _save(self,):
    Model_JSON = self.model.to_json()
    with open(os.path.join(self.path,self.name+'.json'), "w") as json_file:
        json_file.write(Model_JSON)

    self.model.save_weights(os.path.join(self.path,self.name+'.h5'))

  def calc_auc(self,):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(self.n_classes):
        fpr[i], tpr[i], _ = roc_curve(self.labels[:, i], self.preds[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(self.labels.ravel(), self.preds.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    self.Save_ROC([fpr["micro"],tpr["micro"],roc_auc["micro"]],name=self.name,ROCdir=self.path)
    self.AUC_MICRO = roc_auc["micro"]

# Train Resnet50

In [44]:
class Evaluation(keras.callbacks.Callback):
  def __init__(self, val_data_gen, val_labels, test_data_gen, test_labels):
    super(Callback, self).__init__()
    self.test_data = test_data_gen
    self.val_labels = val_labels
    self.val_data = val_data_gen
    self.test_labels = test_labels

  def on_epoch_end(self, epoch, logs=None):
    y_preds = self.model.predict_generator(self.val_data)
    print(' | val_auc:', roc_auc_score(self.val_labels[:len(y_preds)], y_preds))

    y_preds = self.model.predict_generator(self.test_data)
    print(' | test_auc:', roc_auc_score(self.test_labels[:len(y_preds)], y_preds))

In [223]:
from keras.applications import resnet50
def MODEL_RESNET50(input_shape=(224,224,3),n_classes=80,act='softmax',weights=None,loss_weight=None):
  base_model = resnet50.ResNet50(input_shape=input_shape,weights=weights, include_top=False)
  x1         = keras.layers.GlobalAveragePooling2D()(base_model.output)
  x2         = keras.layers.GlobalMaxPooling2D()(base_model.output)
  x          = concatenate([x1,x2])

  x = BatchNormalization()(x)
  x = Dropout(0.5)(x)
  
  x = Dense(256, activation='relu')(x)
  x = BatchNormalization()(x)
  x = Dropout(0.5)(x)

  output = keras.layers.Dense(n_classes, activation=act)(x)
  RESNET = keras.models.Model(inputs=[base_model.input], outputs=[output])

  lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=1000,
    decay_rate=0.8)
  optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)
  RESNET.compile(optimizer='Adam',loss='CategoricalCrossentropy',
                 metrics=['CategoricalAccuracy',tf.keras.metrics.AUC(multi_label=True)])

  return RESNET

In [224]:
BASE = MODEL_RESNET50(n_classes=6,act='softmax',weights='imagenet')

TRAIN  = MasterPipeline(Trainx,Trainy,batch_size=8,target_size=(224,224),n_classes=6,augment=False,aseed=[50,25])
VAL    = MasterPipeline(Valx,Valy,batch_size=8,target_size=(224,224),n_classes=6,augment=False)
TEST   = MasterPipeline(Testx,Testy,batch_size=8,target_size=(224,224),n_classes=6,augment=False)

BASE.fit(TRAIN,epochs=20,validation_data=VAL)
Record = BASE.evaluate(TEST)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [234]:
TEST    = MasterPipeline(Testx,Testy,batch_size=1,target_size=(224,224),n_classes=6,augment=False)
Blogger = model_logs(BASE,'/content/drive/MyDrive/Pretraining',name='Pretrained_on_RAVDS',labels=Testy,test_gen=TEST)
Blogger._save()
Blogger.calc_auc()

# Train on MFCC

In [286]:
class ExtrPipeline(tf.keras.utils.Sequence):
  def __init__(self,imgfiles,labels,batch_size,target_size=(64,64),shuffle=False,scale=255,n_classes=2,n_channels=1):
    self.batch_size = batch_size
    self.dim        = target_size
    self.labels     = labels
    self.imgfiles   = imgfiles
    self.n_classes  = n_classes
    self.shuffle    = shuffle
    self.n_channels = n_channels
    self.scale      = scale
    
    self.on_epoch_end()
  
  def __len__(self):
    return int(np.floor(len(self.imgfiles) / self.batch_size))

  def __getitem__(self, index):
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    X, y = self.__data_generation(indexes)
    return X, y

  def on_epoch_end(self):
    self.indexes = np.arange(len(self.imgfiles))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)
  
  def __data_generation(self, list_IDs_temp):
    X = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size,self.n_classes))

    # Generate data
    for i, ID in enumerate(list_IDs_temp):
      #Mel Spectogram
      X[i,] = self.imgfiles[ID][:,:,np.newaxis]
      
      # Store class
      y[i]  = self.labels[ID]
    
    return X, y

In [260]:
MFCC  = []
labels= []

In [261]:
for i,key in enumerate(Data.keys()):
  if (Data[key]['Repeated?'] != 0):
    continue

  MFCC.append(Data[key]['Mfcc'])    
  labels.append(Data[key]['Class'][1])

In [262]:
labels = np.squeeze(np.array(list(map(index_to_list,labels))))
MFCC   = np.array(MFCC)

In [265]:
Trainx= MFCC[:int(0.8*len(MFCC))]
Trainy= labels[:int(0.8*len(MFCC))]
Valx  = MFCC[int(0.8*len(MFCC)):int(0.9*len(MFCC))]
Valy  = labels[int(0.8*len(MFCC)):int(0.9*len(MFCC))]
Testx = MFCC[int(0.9*len(MFCC)):]
Testy = labels[int(0.9*len(MFCC)):]

In [274]:
MFCC = MFCC[:,:,:,np.newaxis]

In [288]:
MFCC_B = MODEL_RESNET50(input_shape=(300,200,1),n_classes=6,act='softmax',weights=None)

TRAIN  = ExtrPipeline(Trainx,Trainy,batch_size=8,target_size=(300,200),n_classes=6)
VAL    = ExtrPipeline(Valx,Valy,batch_size=8,target_size=(300,200),n_classes=6)
TEST   = ExtrPipeline(Testx,Testy,batch_size=8,target_size=(300,200),n_classes=6)

MFCC_B.fit(TRAIN,epochs=30,validation_data=VAL)
Record = MFCC_B.evaluate(TEST)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30

KeyboardInterrupt: ignored

In [290]:
TEST    = ExtrPipeline(Testx,Testy,batch_size=1,target_size=(300,200),n_classes=6)
Blogger = model_logs(MFCC_B,'/content/drive/MyDrive/Pretraining',name='MFCC_on_RAVDS',labels=Testy,test_gen=TEST)
Blogger._save()
Blogger.calc_auc()