<a href="https://colab.research.google.com/github/DorAzaria/Sentiment-Analysis-Deep-Learning-Methods-For-Speech-Recognition/blob/main/preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/data/')

Mounted at /content/data/


# **IMPORTS**

---

In [2]:
import numpy as np
import pandas as pd
import os
import librosa
import sys
import IPython
import matplotlib
import matplotlib.pyplot as plt
import requests
import torch
import pickle
import torchaudio
from pathlib import Path
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# **STATICS**
---



In [25]:
torch.random.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
model = bundle.get_model().to(device)
EMOTIONS = {0: 'surprise', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fear', 6: 'disgust'}  # surprise has been changed from 8 to 0
RAVDESS_PATH = '/content/data/MyDrive/dl/ravdess'
TESS_PATH = '/content/data/MyDrive/dl/tess'
URDU_PATH = '/content/data/MyDrive/dl/urdu'
SAMPLE_RATE = 16000
count_calm = 0
data = pd.DataFrame(columns=['Emotion', 'Gender', 'Path'])

# **IMPORT RAVDESS**
---

In [26]:
for dirname, _, filenames in os.walk(RAVDESS_PATH):
    for filename in filenames:
        file_path = os.path.join('\\', dirname, filename)
        identifiers = filename.split('.')[0].split('-')

        emotion = (int(identifiers[2]))
        flag_change = False

        if emotion == 2:
            emotion = 1

        if emotion == 8:  # surprise has been changed from 8 to 0
            emotion = 0
            flag_change = True

        if emotion == 1:
            count_calm += 1
            flag_change = True

        if int(identifiers[6]) % 2 == 0:  # actor id. (even = female, odd = male)
            gender = 'female'
        else:
            gender = 'male'

        if flag_change:
            data = data.append({"Emotion": emotion,
                                "Gender": gender,
                                "Path": file_path
                                },
                               ignore_index=True
                               )
        else:
            emotion -= 1
            data = data.append({"Emotion": emotion,
                                "Gender": gender,
                                "Path": file_path
                                },
                               ignore_index=True
                               )

# **IMPORT TESS**
---

In [27]:
for dirname, _, filenames in os.walk(TESS_PATH):
    for filename in filenames:
        file_path = os.path.join('\\', dirname, filename)
        identifiers = filename.split('.')[0].split('_')
        emotion = identifiers[2]

        if emotion == 'angry':
            emotion = 4
        if emotion == 'disgust':
            emotion = 6
        if emotion == 'fear':
            emotion = 5
        if emotion == 'happy':
            emotion = 2
        if emotion == 'neutral':
            emotion = 1
            count_calm += 1
        if emotion == 'ps':
            emotion = 0
        if emotion == 'sad':
            emotion = 3

        if identifiers[0] == 'YAF':  # actor id. (even = female, odd = male)
            gender = 'female'
        else:
            gender = 'male'

        if emotion == 1 and count_calm > 592:
            continue

        data = data.append({"Emotion": emotion,
                            "Gender": gender,
                            "Path": file_path
                            },
                           ignore_index=True
                           )


# **IMPORT URDU**
---

In [28]:
for dirname, _, filenames in os.walk(URDU_PATH):
    for filename in filenames:
        file_path = os.path.join('\\', dirname, filename)
        identifiers = filename.split('.')[0].split('_')
        emotion = dirname[10:]

        if emotion == 'Angry':
            emotion = 4
        if emotion == 'Happy':
            emotion = 2
        if emotion == 'Neutral':
            emotion = 1
        if emotion == 'Sad':
            emotion = 3

        if identifiers[0][1] == 'F':  # actor id. (even = female, odd = male)
            gender = 'female'
        else:
            gender = 'male'

        data = data.append({"Emotion": emotion,
                            "Gender": gender,
                            "Path": file_path
                            },
                           ignore_index=True
                           )

# **SAMPLE & NORMALIZATION**
---

In [29]:
def speech_file_to_array_fn(path):
    waveform, sampling_rate = torchaudio.load(filepath=path, num_frames=SAMPLE_RATE * 3)
    waveform = waveform.to(device)

    if (len(waveform[0]) < 48000):
        print(f'less than 3 seconds: {path}')

    return waveform


def normalize_features(features):
    for i in range(len(features[0])):
        mlist = features[0][i]
        features[0][i] = 2 * (mlist - np.max(mlist)) / (np.max(mlist) - np.min(mlist)) + 1

# **SAMPLE DATA**
---
EACH SAMPLE SHAPE IS (1, 149, 32)

In [32]:
signals = []

j = 1
total_data = len(data)
with torch.inference_mode():
    for i, file_path in enumerate(data.Path):
        emission, _ = model(speech_file_to_array_fn(file_path))
        features = emission.detach().cpu().numpy()
        normalize_features(features)
        check = 0
        if features.shape[1] != 149:
            print(f'\n{j} is not in shape of 149, current shape: {features.shape[1]}')
            check += 1
        if features.shape[2] != 32:
            print(f'\n{j} is not in shape of 32, current shape: {features.shape[2]}')
            check += 1
        max = np.max(features)
        min = np.min(features)
        if max > 1:
            print(f'\n{j} max is not 1, current max: {max}')
            check += 1
        if min < -1:
            print(f'\n{j} min is not -1, current min: {min}')
            check += 1

        if check == 0:
            row = (file_path, features, data.iloc[i]['Emotion'])
            signals.append(row)
        else:
            total_data -= 1

        j += 1
        percent = (len(signals) / total_data) * 100
        print("\r Processed {}/{} files. ({}%) ".format(len(signals), total_data, int(percent)), end='')


 Processed 127/4544 files. (2%) 
128 max is not 1, current max: 3.6543893814086914

128 min is not -1, current min: -17.06072235107422
 Processed 575/4543 files. (12%) 
577 max is not 1, current max: 3.4985759258270264

577 min is not -1, current min: -18.883403778076172
 Processed 591/4542 files. (13%) 
594 max is not 1, current max: 3.7599971294403076

594 min is not -1, current min: -19.477046966552734
 Processed 796/4541 files. (17%) 
800 max is not 1, current max: 11.385799407958984

800 min is not -1, current min: -26.65748405456543
 Processed 797/4540 files. (17%) 
802 max is not 1, current max: 3.2526369094848633

802 min is not -1, current min: -18.596477508544922
 Processed 1835/4539 files. (40%) less than 3 seconds: /content/data/MyDrive/dl/tess/OAF_Fear/OAF_cool_fear.wav

1841 is not in shape of 149, current shape: 122
 Processed 1835/4538 files. (40%) less than 3 seconds: /content/data/MyDrive/dl/tess/OAF_Fear/OAF_gas_fear.wav

1842 is not in shape of 149, current shape: 1

# **SET DISTRIBUTION**
---
DATASET SIZE IS 3493 SAMPLES

[0: surprise, 1 :calm, 2:happy, 3: sad, 4: angry, 5: fear, 6: disgust]

Before: [407, 610, 522, 691, 469, 202, 592], total: 3493

After: [929, 610, 1954], total: 3493

In [45]:

new_distribution = []
counter = [0 for i in range(7)]
classes = [0, 0, 0]
for tup in signals:
    m_class = 0
    emotion = tup[2]
    

    if emotion == 'ata/MyDrive/dl/urdu/Angry':
      emotion = 4
    if emotion == 'ata/MyDrive/dl/urdu/Happy':
      emotion = 2
    if emotion == 'ata/MyDrive/dl/urdu/Sad':
      emotion = 3
    if emotion == 'ata/MyDrive/dl/urdu/Neutral':
      emotion = 1   
    counter[emotion] += 1

    if emotion == 0 or emotion == 2: # POSITIVE( HAPPY, SURPRISED )
        m_class = 0
    if emotion == 1: # NEUTRAL( CALM ) USELESS CONDITION ONLY FOR UNDERSTANDING
        m_class = 1
    if emotion == 3 or emotion == 4 or emotion == 5 or emotion == 6: # NEGATIVE( SAD, ANGRY, FEAR, DISGUST )
        m_class = 2
    classes[m_class] += 1

    new_distribution.append((tup[0], tup[1], m_class))

print(f'Before: {counter}, total: {np.sum(counter)}')
print(f'After: {classes}, total: {np.sum(classes)}')

Before: [407, 610, 522, 691, 469, 202, 592], total: 3493
After: [929, 610, 1954], total: 3493


# **SAVE DATA**

---



In [46]:
file_pth = open('/content/dataset.pth', 'wb')
pickle.dump(new_distribution, file_pth)

In [48]:
file_pth = open('/content/data/MyDrive/dl/dataset.pth', 'wb')
pickle.dump(new_distribution, file_pth)