In [4]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [11]:
def extract_feature(file_name, mfcc, chroma, mel):
    X, sample_rate = librosa.load(os.path.join(file_name),res_type='kaiser_fast')
    if chroma:
        stft = np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfcc = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result,mfcc))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
        result = np.hstack((result, mel))
    return result

In [12]:
emotions = {'01':'Happy', '02':'Sad', '03':'Angry', '04':'Neutral'}
observed_emotions = ['Happy', 'Sad', 'Angry', 'Neutral']

In [15]:
def load_data(test_size=0.2):
    x,y = [],[]
    for folder in observed_emotions:
        filepath = os.path.join("C:\Ruchira\Academic\.IMMEDIATE\FYP\FYP\Datasets\Common\Vocal",folder)
        for filename in os.listdir(filepath):
            feature = extract_feature(os.path.join(filepath,filename), mfcc=True, chroma=True, mel=True)
            x.append(feature)
            y.append(folder)
    return train_test_split(np.array(x),y,test_size=test_size, train_size=0.75, random_state=9)

In [16]:
x_train, x_test, y_train, y_test = load_data(test_size=0.25)

  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
  1.2919363e-06  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
 0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspect

In [27]:
x_train.shape

(3675, 180)

In [28]:
model = MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08,hidden_layer_sizes=(300,), learning_rate='adaptive',max_iter=500)

In [29]:
model.fit(x_train,y_train)

In [30]:
y_pred = model.predict(x_test)

In [31]:
score = accuracy_score(y_true=y_test,y_pred=y_pred)
score

0.6481632653061224

In [39]:
def predict_audio(file_path):
    feature = extract_feature(os.path.join(file_path), mfcc=True, chroma=True, mel=True)
    x = [feature]
    result = model.predict_proba(x)
    print(result)

In [40]:
predict_audio("C:\Ruchira\Academic\.IMMEDIATE\FYP\FYP\Datasets\Personalized\Vocal\Splitted\\Neutral\\Neutral_out_1.m4a")

  return f(*args, **kwargs)
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)


[[4.85306017e-01 4.96907835e-01 2.12417875e-04 1.75737307e-02]]


In [41]:
import joblib

# Assuming 'model' is your trained scikit-learn model
model_path = "Vocal.pkl"
joblib.dump(model, model_path)

['Vocal.pkl']

In [None]:
loaded_model = joblib.load(model_path)