<a href="https://colab.research.google.com/github/Karthick47v2/mock-buddy/blob/base-dev/audio_dv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 !pip install kaggle
 !mkdir ~/.kaggle 
 !cp kaggle.json ~/.kaggle/
 !chmod 600 ~/.kaggle/kaggle.json
 !kaggle datasets download uwrfkaggler/ravdess-emotional-speech-audio
 !kaggle datasets download ejlok1/surrey-audiovisual-expressed-emotion-savee
 !kaggle datasets download ejlok1/toronto-emotional-speech-set-tess

 !unzip ravdess-emotional-speech-audio
 !unzip surrey-audiovisual-expressed-emotion-savee
 !unzip toronto-emotional-speech-set-tess

In [None]:
!pip install librosa --upgrade

In [4]:
import glob
import pandas as pd
import numpy as np
import librosa 
import librosa.display
import matplotlib.pyplot as plt

import IPython.display as ipd 

In [None]:
# 1 - neutral 
# 2 - calm 
# 3 - happy
# 4 - sad
# 5 - angry
# 6 - fear
# 7 - disgust 
# 8 - surprise

# Take calm happy surprise as POSTITIVE (1)  (2, 3, 8)
# and sad angry disgust fear as NEGATIVE (0) (4,5,6,7)
# drop neutral (1)

# gender 
# 0 - f
# 1 - m

In [5]:
path_list = glob.glob('/content/Actor_*/**')

emotion_list = [int((x.split('.')[0]).split('-')[2]) for x in path_list]
gender_list = [1 if int((x.split('.')[0]).split('-')[-1]) % 2 != 0 else 0 for x in path_list]

df = pd.DataFrame({'path': path_list, 'emotion': emotion_list, 'gender': gender_list})

In [None]:
print(df['emotion'].value_counts())
print(df['gender'].value_counts())

In [7]:
n_mfcc = 13
input_duration = 3
sample_rate = 44100

def wavplot(path):
  y, sr = librosa.load(path, sr=sample_rate)

  plt.figure(figsize=(15,5))
  librosa.display.waveshow(y, sr=sr)
  plt.xlabel('Time')
  plt.ylabel('Amplitude')
  plt.show()

def mfccplot(path):
  y, sr = librosa.load(path, sr=sample_rate)
  
  mfccs = librosa.feature.mfcc(y, sr=sr, n_mfcc=n_mfcc)
  
  plt.figure(figsize=(15,5))
  librosa.display.specshow(mfccs, sr=sr, hop_length=512)
  plt.xlabel('Time')
  plt.ylabel('MFCC')
  plt.colorbar()
  plt.show()

def extract_mfcc(path):
  y, sr = librosa.load(path, duration=input_duration, sr=sample_rate, offset=0.5)
  
  mfccs = librosa.feature.mfcc(y, sr=sr, n_mfcc=n_mfcc)
  return np.mean(mfccs, axis=0)

In [None]:
df.iloc[0], df.iloc[1], df.iloc[2], df.iloc[3], df.iloc[6], df.iloc[11], df.iloc[14], df.iloc[36]

In [None]:
# happy
wavplot(df['path'][0])
mfccplot(df['path'][0])
ipd.Audio(df['path'][0])

In [None]:
# disgust
wavplot(df['path'][1])
mfccplot(df['path'][1])
ipd.Audio(df['path'][1])

In [None]:
# angry 
wavplot(df['path'][2])
mfccplot(df['path'][2])
ipd.Audio(df['path'][2])

In [None]:
# fear 
wavplot(df['path'][3])
mfccplot(df['path'][3])
ipd.Audio(df['path'][3])

In [None]:
# sad
wavplot(df['path'][6])
mfcc_s = mfccplot(df['path'][6])
ipd.Audio(df['path'][6])

In [None]:
# suprise
wavplot(df['path'][11])
mfccplot(df['path'][11])
ipd.Audio(df['path'][11])

In [None]:
# calm
wavplot(df['path'][14])
mfccplot(df['path'][14])
ipd.Audio(df['path'][14])

In [None]:
# neutral
wavplot(df['path'][36])
mfccplot(df['path'][36])
ipd.Audio(df['path'][36])

In [21]:
def extend_mfcc(lst, n):
    lst.extend([0] * n)
    return lst

In [None]:
mfcc_list = [extract_mfcc(x).tolist() for x in path_list]

In [8]:
# aug
def noise(y, val):
  noise_a = val * np.random.uniform() * np.amax(y)
  return y.astype('float64') + noise_a * np.random.normal(size=y.shape[0])

def pitch_sh(y):
  pitch_shift = 2 * 2 * np.random.uniform()
  return librosa.effects.pitch_shift(y.astype('float64'), sr=44100, n_steps=pitch_shift, bins_per_octave=12)

In [38]:
# sample
samp_y, samp_sr = librosa.load(path_list[0], sr=44100)

noised_y = noise(samp_y)
p_shifted_y = pitch_sh(samp_y)

In [None]:
plt.figure(figsize=(15,5))
librosa.display.waveshow(samp_y, sr=samp_sr)
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

ipd.Audio(samp_y, rate=sample_rate)

In [None]:
plt.figure(figsize=(15,5))
librosa.display.waveshow(noised_y, sr=samp_sr)
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

ipd.Audio(noised_y, rate=sample_rate)

In [None]:
plt.figure(figsize=(15,5))
librosa.display.waveshow(p_shifted_y, sr=samp_sr)
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

ipd.Audio(p_shifted_y, rate=sample_rate)

In [None]:
for i, val in enumerate(path_list):
  y, sr = librosa.load(val, duration=input_duration, sr=sample_rate, offset=0.5)

  for j in [0.05, 0.01, 0.005]:
    noised_y = noise(y, j)

    mfccs = librosa.feature.mfcc(noised_y, sr=sr, n_mfcc=n_mfcc)
    mfcc_list.append(np.mean(mfccs, axis=0).tolist())

    emotion_list.append(emotion_list[i])
    gender_list.append(gender_list[i])

  p_shifted_y = pitch_sh(y)

  mfccs = librosa.feature.mfcc(p_shifted_y, sr=sr, n_mfcc=n_mfcc)
  mfcc_list.append(np.mean(mfccs, axis=0).tolist())

  emotion_list.append(emotion_list[i])
  gender_list.append(gender_list[i])

  

In [10]:
filled_mfcc_list = [extend_mfcc(x, 259 - len(x)) for x in mfcc_list]
train_test_df = pd.DataFrame({'emotion': emotion_list, 'gender': gender_list, **{f"{i}": [mfcc[i] for mfcc in filled_mfcc_list] for i in range(259)}})

In [11]:
train_test_df.drop(train_test_df.index[train_test_df['emotion'] == 1], inplace=True)

train_test_df['emotion'].replace([2,3,8], 1, inplace=True) 
train_test_df['emotion'].replace([4,5,6,7], 0, inplace=True) 

In [12]:
train_test_df

Unnamed: 0,emotion,gender,0,1,2,3,4,5,6,7,...,249,250,251,252,253,254,255,256,257,258
0,1,1,-42.565693,-45.723320,-54.830330,-54.560692,-55.215652,-53.613274,-51.695705,-52.152580,...,-59.006248,-59.611973,-58.766659,-58.319504,-56.437336,-55.988224,-57.266418,-59.833511,-60.339657,-60.883591
1,0,1,-58.282578,-55.270210,-54.399715,-56.411068,-55.321575,-54.209984,-55.636570,-56.426567,...,-34.589397,-34.243053,-33.655228,-32.269119,-31.086882,-29.989555,-29.105869,-26.915915,-26.996634,-24.238993
2,0,1,-40.761723,-42.155186,-43.708538,-44.351032,-44.351032,-43.947445,-43.190849,-43.290970,...,-24.618227,-24.178001,-26.103615,-25.431160,-25.081776,-23.242748,-23.397562,-23.668596,-21.625772,-17.608624
3,0,1,-48.062202,-48.062202,-47.749996,-48.062202,-48.062202,-47.751663,-47.382103,-45.814663,...,-47.090385,-46.408684,-45.647617,-45.908638,-46.386456,-47.419334,-48.062202,-48.062202,-47.807083,-47.791618
4,0,1,-41.174011,-44.095951,-48.757656,-50.474705,-51.651203,-50.363728,-48.455132,-50.036892,...,-35.615089,-34.782650,-34.602928,-33.255592,-31.265129,-30.362068,-26.578074,-24.863678,-17.450375,-11.114177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7195,0,0,-61.470255,-61.685223,-62.664492,-63.101557,-65.719938,-65.743158,-66.646538,-67.913451,...,-64.020833,-67.159888,-67.426587,-67.213719,-66.698409,-66.348706,-66.761418,-66.511344,-66.408772,-66.841293
7196,1,0,-42.859975,-41.284255,-43.507908,-41.025778,-39.374642,-39.518081,-41.581720,-41.278283,...,-39.623291,-40.463192,-41.521979,-41.027986,-42.508374,-41.623047,-38.640859,-40.623519,-41.862037,-41.772543
7197,1,0,-56.460777,-54.144900,-53.137138,-54.111981,-55.137571,-54.625375,-54.102019,-52.958497,...,-51.693983,-53.968713,-53.344916,-53.307824,-53.406349,-50.984629,-50.605158,-51.364151,-50.239456,-49.371110
7198,1,0,-66.951501,-61.113868,-59.468495,-60.722996,-62.403052,-62.995126,-63.394265,-63.575390,...,-58.539882,-59.030686,-58.814419,-58.209728,-58.702521,-57.475045,-55.483812,-55.519759,-53.485806,-50.986704


In [28]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [14]:
train_test_df.to_csv('ravdess.csv', index=False)

In [15]:
!mv ravdess.csv /content/gdrive/MyDrive/SER/

In [8]:
# test
# from SAVEES dataset only fear, happy

In [10]:
path_list = glob.glob('/content/ALL/**')

emotion_list = [(list((x.split('.')[0]).split('_')[1])[0]) for x in path_list]
gender_list = [1] * len(emotion_list)
df = pd.DataFrame({'path': path_list, 'emotion': emotion_list, 'gender': gender_list})

filtered_df = df[(df['emotion'] == 'h') | (df['emotion'] == 'f')]

In [None]:
mfcc_list = [extract_mfcc(x).tolist() for x in filtered_df['path'].values]

In [25]:
filled_mfcc_list = [extend_mfcc(x, 259 - len(x)) for x in mfcc_list]
savees_df = pd.DataFrame({'emotion': filtered_df['emotion'].values, 'gender': filtered_df['gender'].values, **{f"{i}": [mfcc[i] for mfcc in filled_mfcc_list] for i in range(259)}})

In [26]:
savees_df['emotion'].replace(['h'], 1, inplace=True) 
savees_df['emotion'].replace(['f'], 0, inplace=True) 

In [29]:
savees_df.to_csv('test_savees.csv', index=False)

In [None]:
# from SAVEES dataset only fear, happy, angry, disgust, 

In [39]:
path_list = glob.glob('/content/TESS Toronto emotional speech set data/**/**')

emotion_list = [((x.split('.')[0]).split('/')[-1].split('_')[-1]) for x in path_list]
gender_list = [0] * len(emotion_list)
df = pd.DataFrame({'path': path_list, 'emotion': emotion_list, 'gender': gender_list})

filtered_df = df[(df['emotion'] == 'fear') | (df['emotion'] == 'angry') | (df['emotion'] == 'disgust') | (df['emotion'] == 'happy')]

In [None]:
mfcc_list = [extract_mfcc(x).tolist() for x in filtered_df['path'].values]

In [42]:
filled_mfcc_list = [extend_mfcc(x, 259 - len(x)) for x in mfcc_list]
tess_df = pd.DataFrame({'emotion': filtered_df['emotion'].values, 'gender': filtered_df['gender'].values, **{f"{i}": [mfcc[i] for mfcc in filled_mfcc_list] for i in range(259)}})

In [43]:
tess_df['emotion'].replace(['happy'], 1, inplace=True) 
tess_df['emotion'].replace(['fear', 'angry', 'disgust'], 0, inplace=True) 

In [44]:
tess_df.to_csv('test_tess.csv', index=False)

In [45]:
!mv test_tess.csv test_savees.csv /content/gdrive/MyDrive/SER/