In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import IPython.display as ipd
from IPython.display import Audio 
from IPython.core.display import display
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
from sklearn import metrics 
from sklearn.model_selection import train_test_split 
from tensorflow import keras

In [3]:
def feature_extraction(filename):
  X, sample_rate = librosa.load(filename,res_type='kaiser_fast',duration=12,sr=48000,offset=0)
  feature = np.array([])
  #12 chroma bins reduced by mean along time axis
  # Use an energy (magnitude) spectrum instead of power spectrogram
  stft = np.abs(librosa.stft(X))
  chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
  #horizontal stack
  feature = np.hstack((feature, chroma_stft))
  #13 mfccs
  mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate,n_mfcc=13).T, axis=0)
  feature = np.hstack((feature, mfccs))
  # MelSpectogram 128 coefs
  melspec = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)
  feature = np.hstack((feature, melspec))
  # Root Mean Square Value
  rms = np.mean(librosa.feature.rms(y=X,frame_length=1024, hop_length=512)[0], axis=0)
  feature = np.hstack((feature, rms))
  #Tonetz
  temp = librosa.effects.harmonic(X) 
  tonnetz = np.mean(librosa.feature.tonnetz(y=temp, sr=sample_rate).T,axis = 0)
  feature = np.hstack((feature, tonnetz))
  #Zero-crossing
  zcr = np.mean(librosa.feature.zero_crossing_rate(y=X,frame_length=1024, hop_length=512)[0], axis=0)
  feature=np.hstack((feature, zcr))
  return feature

'/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/1dcnn_iemocap_aug.h5'

In [6]:
path_iemo = '/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/data/Utterances_iemocap/'

In [207]:
# saved_model = keras.models.load_model('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5')
def predict(model_path, audio_path):
  feat_aug = pd.read_csv ('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/features_augmented.csv',index_col=0)
  y = feat_aug.class_label
  yy = np.array(y)
  from sklearn.preprocessing import LabelEncoder
  from tensorflow.keras.utils import to_categorical
  lb = LabelEncoder()
  yy = to_categorical(lb.fit_transform(yy))
  ipd.Audio(audio_path)
  display(Audio(audio_path))
  saved_model = keras.models.load_model(model_path)
  qwerty = pd.read_csv ('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/features_augmented.csv',index_col=0)
  X = qwerty.drop(['path','class_label','path','method','gender'],axis=1)
  mean = np.mean(X, axis=0)
  std = np.std(X, axis=0)
  temp_feat = feature_extraction(audio_path)
  rounded_labels= np.unique(lb.inverse_transform(np.argmax(yy, axis=1)))
  temp_feat = (temp_feat - mean)/std
  temp_feat = np.array(temp_feat)
  predict_arr = saved_model.predict(temp_feat.reshape(1,161,1))
  print(f'Possibly the emotion is ... {rounded_labels[np.argmax(predict_arr)]}')
  print(f'Array with corresponding emotions is ... {rounded_labels}')
  print(f'The array with predictions is ... {predict_arr}')
  return rounded_labels[np.argmax(predict_arr)]

In [208]:
path = '/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/data/IEMOCAP_full_release/'
temp = 'Session1/sentences/wav/Ses01F_impro07/Ses01F_impro07_F003.wav'

'/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/testhappy.m4a'

In [254]:
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/1dcnn_iemocap_aug.h5',path+temp)
x

Possibly the emotion is ... hap
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[1.5292866e-14 1.0000000e+00 4.2772779e-13 1.1724255e-19]]


'hap'

In [222]:
# all imports
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode


RECORD = """
const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader()
  reader.onloadend = e => resolve(e.srcElement.result)
  reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
  stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  recorder = new MediaRecorder(stream)
  chunks = []
  recorder.ondataavailable = e => chunks.push(e.data)
  recorder.start()
  await sleep(time)
  recorder.onstop = async ()=>{
    blob = new Blob(chunks)
    text = await b2text(blob)
    resolve(text)
  }
  recorder.stop()
})
"""
def record(sec=5):
  display(Javascript(RECORD))
  s = output.eval_js('record(%d)' % (sec*1000))
  b = b64decode(s.split(',')[1])
  with open('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav','wb') as f:
    f.write(b)
  return '/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav'  # or webm ?



In [198]:
import warnings
warnings.filterwarnings('ignore')

In [230]:
# audio, sr = get_audio()

sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... hap
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[4.3123990e-01 5.6752521e-01 1.1476519e-03 8.7213302e-05]]


In [235]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... sad
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[1.2848754e-03 1.3067683e-03 3.6805258e-07 9.9740797e-01]]


In [241]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... sad
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[8.2756503e-07 1.2445719e-04 1.2194247e-02 9.8768044e-01]]


In [245]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... neu
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[4.6261584e-07 1.3192632e-06 9.9999785e-01 3.3943934e-07]]


In [251]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... neu
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[2.1843549e-02 1.7621590e-02 9.6053481e-01 1.6851477e-08]]


In [252]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... neu
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[6.6628058e-07 3.9323178e-04 9.9960607e-01 2.8773728e-08]]


In [255]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... neu
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[2.1635864e-09 2.5518775e-01 7.4481231e-01 4.6462064e-13]]


In [256]:
sst = record()
print(sst)
import time
time.sleep(1)

# sss = feature_extraction(sst)
x = predict('/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/best_1dcnn_iemocap_aug.h5',sst)

<IPython.core.display.Javascript object>

/content/drive/MyDrive/Colab_Notebooks/CNN_IEMOCAP_Experiment2/Experiments/audio.wav


Possibly the emotion is ... ang
Array with corresponding emotions is ... ['ang' 'hap' 'neu' 'sad']
The array with predictions is ... [[9.9992824e-01 7.1727380e-05 7.3406255e-15 9.8492259e-14]]
