In [241]:
import pandas as pd
import numpy as np
import librosa
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

import IPython.display as ipd
from playsound import playsound
import notebook

#import files
audio_sad_folder = 'C:/Users/gusta/PycharmProjects/pythonProject5/sad-20230428T080436Z-001/sad'
audio_happy_folder ='C:/Users/gusta/PycharmProjects/pythonProject5/happy-20230428T080438Z-001/happy'
audio_angry_folder ='C:/Users/gusta/PycharmProjects/pythonProject5/angry-20230428T082147Z-001/angry'
audio_fear_folder  ='C:/Users/gusta/PycharmProjects/pythonProject5/fear-20230428T080443Z-001/fear'

#funtion extract the audio from file and save them in array


def import_files_audio(files_imported):
    audio_info_saved = []
    audio_data_saved = []
    for save_info in os.listdir(files_imported):
        if save_info.endswith('.wav'):
          file_path = os.path.join(files_imported, save_info)
          data, audio_sample = librosa.load(file_path)

          audio_data_saved.append(data)
          audio_info_saved.append(audio_sample)

    return  np.array(audio_data_saved),np.array(audio_info_saved)




In [242]:

#variables with the datasets already saved
audio_sad_data, audio_sad_info = import_files_audio(audio_sad_folder)
audio_happy_data, audio_happy_info = import_files_audio(audio_happy_folder)
audio_fear_data, audio_fear_info = import_files_audio(audio_fear_folder)
audio_angry_data, audio_angry_info = import_files_audio(audio_angry_folder)

ipd.display(ipd.Audio(data= audio_sad_data[0],rate=44100))
print(audio_sad_data)


#X_sad_train, X_sad_test,y_sad_train, y_sad_test = train_test_split(audio_sad_data,audio_sad_info , test_size = 0.30, random_state = 42)
#X_angry_train, X_angry_test,y_angry_train, y_angry_test= train_test_split(audio_angry_data,audio_angry_info, test_size = 0.30, random_state = 42)
#X_happy_train, X_happy_test,y_happy_train, y_happy_test= train_test_split(audio_happy_data,audio_happy_info, test_size = 0.30, random_state = 42)
#X_fear_train, X_fear_test,y_fear_train, y_fear_test = train_test_split(audio_fear_data,audio_fear_info, test_size = 0.30, random_state = 42)

  return  np.array(audio_data_saved),np.array(audio_info_saved)


[array([ 0.00011745,  0.00029148,  0.00042354, ...,  0.00011656,
        -0.00020939,  0.00010238], dtype=float32)
 array([ 2.1943405e-04,  7.4776611e-04,  7.6236017e-04, ...,
        -3.4604664e-04,  7.5502001e-05,  0.0000000e+00], dtype=float32)
 array([-1.0439449e-05,  4.1927642e-04,  1.0503622e-04, ...,
         1.5253150e-04,  1.9370265e-04,  0.0000000e+00], dtype=float32)
 array([0.00022474, 0.00028667, 0.00021235, ..., 0.00101252, 0.00025075,
        0.00026783], dtype=float32)
 array([-2.1112183e-04, -2.7997181e-04, -3.0572677e-04, ...,
         3.4075059e-04,  6.0488441e-05,  0.0000000e+00], dtype=float32)
 array([6.4031621e-05, 5.1222643e-04, 8.9471007e-04, ..., 6.6371233e-04,
        2.9980169e-05, 0.0000000e+00], dtype=float32)
 array([ 1.1928255e-04, -1.2068823e-04,  7.9754645e-05, ...,
         1.1817925e-04,  1.8813318e-04,  2.0536111e-04], dtype=float32)
 array([-0.0002599 , -0.00110653, -0.00023245, ..., -0.00049881,
         0.00029446,  0.        ], dtype=float32)
 a

In [243]:
#feature extraction
def feature_extraction(train):


    loudness_values = []
    for data in train:
        rms = librosa.feature.rms(y=data)
        loudness = librosa.core.power_to_db(rms, ref=0)[0]
        loudness_values.append(loudness)
        loudness_values
    mfccs_values = []

    for data in train:
        mfccs = librosa.feature.mfcc(y=data, sr=44100, n_mfcc=12)
        mfccs_values.append(mfccs)

    zcr_values = []
    for data in train:
        zcr = librosa.feature.zero_crossing_rate(y=data)
        zcr_values.append(zcr)

    chroma_values =[]
    for data in train:
        chromagram = librosa.feature.chroma_stft(y=data, sr=44100)
        chroma_values.append(chromagram)
    spectro_values = []
    for data in train:
        mel_spectrogram = librosa.feature.melspectrogram(y=data, sr=44100, n_mels=12)
        spectro_values.append(mel_spectrogram)


    loudness_s = pd.Series(loudness_values)

    mfccs_s = pd.Series(mfccs_values)
    zcr_s = pd.Series(zcr_values)
    chroma_s = pd.Series(chroma_values)
    spectro_s = pd.Series(spectro_values)

    feature = pd.concat([loudness_s, mfccs_s, zcr_s, chroma_s, spectro_s], axis=1)


    return feature


In [244]:
processed_sad_data = feature_extraction(audio_sad_data)
processed_happy_data = feature_extraction(audio_happy_data)
processed_fear_data = feature_extraction(audio_fear_data)
processed_angry_data = feature_extraction(audio_angry_data)






X_train_sad, X_test_sad,y_train_sad, y_test_sad = train_test_split(processed_sad_data ,audio_sad_info, test_size = 0.30, random_state = 42)
X_train_happy, X_test_happy,y_train_happy, y_test_happy = train_test_split(processed_happy_data ,audio_happy_info, test_size = 0.30, random_state = 42)
X_train_fear, X_test_fear,y_train_fear, y_test_fear = train_test_split(processed_fear_data ,audio_fear_info, test_size = 0.30, random_state = 42)
X_train_angry, X_test_angry,y_train_angry, y_test_angry = train_test_split(processed_angry_data  ,audio_angry_info, test_size = 0.30, random_state = 42)






In [245]:
from sklearn.svm import SVC
#from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score

svm = SVC(kernel='linear', C=1)

y_train = np.array(y_train_sad).reshape(-1,1)



svm.fit(X_train_sad, y_train_sad)


pred_labels = svm.predict(X_test)
acc = accuracy_score(y_test , pred_labels)

ValueError: setting an array element with a sequence.