In [1]:
!pip install pymrmr
!pip install audiomentations
!pip install pydub

Collecting pymrmr
  Downloading pymrmr-0.1.11.tar.gz (69 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.5/69.5 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pymrmr
  Building wheel for pymrmr (setup.py) ... [?25l[?25hdone
  Created wheel for pymrmr: filename=pymrmr-0.1.11-cp310-cp310-linux_x86_64.whl size=390098 sha256=75fdd448fbb9482fb63be64f9b90a12da432efe0641273b8742dca783438381a
  Stored in directory: /root/.cache/pip/wheels/46/ae/55/4a2479c5f0de7eb363fe970cb18e4a750e03e4e63b1b5c2005
Successfully built pymrmr
Installing collected packages: pymrmr
Successfully installed pymrmr-0.1.11
Collecting audiomentations
  Downloading audiomentations-0.34.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: audiomentations
Successfully installed au

In [2]:
import os
import re
import librosa
import numpy as np
import pandas as pd
from audiomentations import Compose, TimeStretch, PitchShift, AddGaussianNoise
from pydub import AudioSegment
from pydub.effects import normalize
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import MinMaxScaler

In [3]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json

api_token = {"username":"liseeg","key":"401cee8c329831c9c60638dd88d4ee45"}

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json


In [4]:
import kaggle

kaggle.api.authenticate()

kaggle.api.dataset_download_files('ejlok1/surrey-audiovisual-expressed-emotion-savee', path='.', unzip=True)


In [5]:
#Fonctions de traitement des fichiers audios
def extract_features(y, sr, max_len=100):
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)


    mfccs = librosa.util.fix_length(mfccs, size = max_len, axis=1)
    chroma = librosa.util.fix_length(chroma, size = max_len, axis=1)
    spectral_contrast = librosa.util.fix_length(spectral_contrast, size = max_len, axis=1)

    flat_mfccs = np.ravel(mfccs)
    flat_chroma = np.ravel(chroma)
    flat_spectral_contrast = np.ravel(spectral_contrast)

    return flat_mfccs, flat_chroma, flat_spectral_contrast

def augment_audio(y, sr):

    augment = Compose([
        TimeStretch(min_rate=0.8, max_rate=1.2),
        PitchShift(min_semitones=-2, max_semitones=2),
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015)
    ])

    augmented_y = augment(samples=y, sample_rate=sr)

    return augmented_y

def process_audio(file_path):

    y, sr = librosa.load(file_path)
    y, _ = librosa.effects.trim(y)


    normalized_y = librosa.util.normalize(y)

    return normalized_y



audio_directory = "/content/ALL"


audio_files = []
speakers = []
emotions = []
positions = []
mfccs_list = []
chroma_list = []
spectral_contrast_list = []


emotion_mapping = {
    'a': 'anger',
    'd': 'disgust',
    'f': 'fear',
    'h': 'happiness',
    'n': 'neutral',
    'sa': 'sadness',
    'su': 'surprise'
}


for filename in os.listdir(audio_directory):
    if filename.endswith(".wav"):
        match = re.search(r'_(.*?)(\d+)', filename)
        speaker = filename[:2]
        emotion = emotion_mapping.get(match.group(1), 'unknown')
        position = filename[-6:-4]

        file_path = os.path.join(audio_directory, filename)


        y, sr = librosa.load(file_path)
        y, _ = librosa.effects.trim(y)


        normalized_y = librosa.util.normalize(y)


        original_mfccs, original_chroma, original_spectral_contrast = extract_features(normalized_y, sr)


        augmented_y = augment_audio(normalized_y, sr)
        augmented_mfccs, augmented_chroma, augmented_spectral_contrast = extract_features(augmented_y, sr)


        audio_files.extend([filename, f"augmented_{filename}"])
        speakers.extend([speaker, speaker])
        emotions.extend([emotion, emotion])
        positions.extend([position, position])
        mfccs_list.extend([original_mfccs, augmented_mfccs])
        chroma_list.extend([original_chroma, augmented_chroma])
        spectral_contrast_list.extend([original_spectral_contrast, augmented_spectral_contrast])



df = pd.DataFrame({
    'Audio_File': audio_files,
    'Speaker': speakers,
    'Emotion': emotions,
    'Position': positions,
    'MFCCs': mfccs_list,
    'Chroma': chroma_list,
    'Spectral_Contrast': spectral_contrast_list
})



In [6]:
#Une emotion contre les autres
def is_anger(emotion):
    return 1 if emotion == "anger" else 0
df["anger"] = df['Emotion'].apply(is_anger)


In [7]:
#Dataframe de chaque categories de features

mfccs_list_columns = [f'mfccs_{i}' for i in range(len(mfccs_list[0]))]
df_mfccs = pd.DataFrame(mfccs_list, columns=mfccs_list_columns)

chroma_list_columns = [f'chroma_{i}' for i in range(len(chroma_list[0]))]
df_chroma = pd.DataFrame(chroma_list, columns=chroma_list_columns)

spectral_contrast_list_columns = [f'spectral_contrast_{i}' for i in range(len(spectral_contrast_list[0]))]
df_spectral_contrast = pd.DataFrame(spectral_contrast_list, columns=spectral_contrast_list_columns)

In [8]:
#Dataset d'exploitation avec features et sans features selection
features = pd.concat([df["Audio_File"],df_mfccs, df_chroma,df_spectral_contrast,df["anger"]], axis=1)
features = features.sample(frac = 1)
features.head()

Unnamed: 0,Audio_File,mfccs_0,mfccs_1,mfccs_2,mfccs_3,mfccs_4,mfccs_5,mfccs_6,mfccs_7,mfccs_8,...,spectral_contrast_691,spectral_contrast_692,spectral_contrast_693,spectral_contrast_694,spectral_contrast_695,spectral_contrast_696,spectral_contrast_697,spectral_contrast_698,spectral_contrast_699,anger
564,KL_d14.wav,-426.369415,-450.904175,-468.328857,-469.14917,-466.949249,-469.481689,-472.289795,-473.585052,-469.659637,...,43.803558,46.26692,46.381775,55.016091,56.928597,56.466932,50.585607,45.337099,41.629511,0
897,augmented_DC_h15.wav,-280.793396,-270.846832,-280.496307,-278.94754,-275.385315,-275.710876,-280.079071,-277.681,-279.855469,...,13.748385,12.526052,13.699794,12.830678,14.018127,13.193332,13.760719,12.867285,14.97425,0
20,JK_a06.wav,-333.581757,-373.860992,-395.048462,-367.673645,-356.128662,-357.307312,-373.021698,-390.110352,-399.05542,...,54.546111,53.866093,53.652943,56.696897,55.945909,54.711334,66.403361,65.330246,65.139486,1
339,augmented_KL_d04.wav,-430.211365,-461.490051,-489.36377,-490.823029,-490.419373,-490.282654,-490.480988,-489.221863,-489.302643,...,60.161725,58.917816,55.917829,54.331813,51.605482,54.931672,60.575566,62.474817,66.386464,0
629,augmented_JK_sa11.wav,-244.416092,-249.67041,-273.436829,-272.328674,-277.524078,-277.427856,-277.319733,-277.468201,-279.143311,...,14.562752,15.04943,15.515789,12.983554,13.353886,14.277283,12.659731,12.841619,14.938996,0


In [9]:
def train_model_knn(data,nb_neighbors):
    X = data.drop(columns=['Audio_File', 'anger'])
    y = data["anger"].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    neigh = KNeighborsClassifier(n_neighbors=nb_neighbors)
    neigh.fit(X, y)


    return neigh, X_test, y_test

def train_model_svc(data):
    X = data.drop(columns=['Audio_File', 'anger'])
    y = data["anger"].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = SVC()
    model.fit(X, y)


    return model, X_test, y_test

def train_model_dt(data):
    X = data.drop(columns=['Audio_File', 'anger'])
    y = data["anger"].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = DecisionTreeClassifier()
    model.fit(X, y)


    return model, X_test, y_test

In [10]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")

In [11]:
model, X_test, y_test = train_model_svc(features)
evaluate_model(model, X_test, y_test)

Accuracy: 0.8906
Precision: 0.7932
Recall: 0.8906


  _warn_prf(average, modifier, msg_start, len(result))
