In [5]:
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd

happy_folder = "happy"

features = []
labels = []

for filename in os.listdir(happy_folder):
    if filename.endswith(".wav"):
        audio_file = os.path.join(happy_folder, filename)
        
        # Load the audio file
        y, sr = librosa.load(audio_file)
        
        # Extract MFCC features (adjust parameters as needed)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512, n_fft=2048, n_mels=26)
        
        # Flatten the MFCC matrix to create a feature vector
        mfccs_flat = np.mean(mfccs, axis=1)  # You can also use np.mean, np.std, etc. to summarize the MFCCs
        
        # Append the feature vector and label to the respective lists
        features.append(mfccs_flat)
        labels.append("happy")

# Create a DataFrame from the features and labels
df = pd.DataFrame(features)
df["Emotion"] = labels


In [8]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,Emotion
0,-144.007965,58.708755,-4.280030,22.588066,3.465376,8.792150,-8.145284,5.087916,-4.118076,2.241210,-6.584430,3.655278,-2.815126,happy
1,-112.288742,58.314201,-10.104110,15.490354,-9.121796,8.848783,-7.467923,3.400584,-7.435456,1.396987,-6.059454,5.766613,-3.776857,happy
2,-167.256165,62.946507,-2.773091,23.133757,-4.923666,13.635571,-4.182585,4.974473,-5.847533,2.220066,-5.148891,5.709449,-3.662388,happy
3,-157.853302,59.967590,-1.289362,19.282114,-3.014376,11.103924,-3.802889,4.261051,-5.872185,1.413765,-4.059240,5.125031,-2.285027,happy
4,-151.617355,61.622948,-0.590583,25.089151,0.606927,7.388477,-4.516265,5.740804,-6.079794,1.165198,-5.856667,3.632424,-3.528920,happy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1082,-147.119003,70.776840,-5.216261,15.039186,-6.031559,4.191583,-4.736637,1.347852,-7.221465,2.808748,-2.829440,-1.118910,-1.474635,happy
1083,-177.446259,64.852409,4.325708,20.851091,-4.659900,10.990689,-8.740526,6.475883,-8.451095,5.493451,-4.586441,-1.644338,-2.564188,happy
1084,-177.716370,70.876175,5.000517,25.712021,-8.194897,8.359855,-8.903695,6.571529,-8.152275,2.928565,-4.805890,1.144441,-1.077922,happy
1085,-158.978302,67.243713,5.984472,19.394457,-7.811292,0.697363,-5.281727,2.747301,-8.218135,2.525125,-4.772151,0.237409,-0.422161,happy


In [7]:
happy_folder = "sad"

features = []
labels = []

for filename in os.listdir(happy_folder):
    if filename.endswith(".wav"):
        audio_file = os.path.join(happy_folder, filename)
        
        # Load the audio file
        y, sr = librosa.load(audio_file)
        
        # Extract MFCC features (adjust parameters as needed)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512, n_fft=2048, n_mels=26)
        
        # Flatten the MFCC matrix to create a feature vector
        mfccs_flat = np.mean(mfccs, axis=1)  # You can also use np.mean, np.std, etc. to summarize the MFCCs
        
        # Append the feature vector and label to the respective lists
        features.append(mfccs_flat)
        labels.append("sad")

# Create a DataFrame from the features and labels
df1 = pd.DataFrame(features)
df1["Emotion"] = labels

In [9]:
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,Emotion
0,-176.519806,61.615368,-5.159897,27.609232,1.620013,14.775539,-9.400245,8.247709,-6.156116,5.163786,-5.738259,4.081781,-3.118683,sad
1,-179.801453,66.220398,-0.581284,22.758276,-1.910941,13.642160,-8.033227,7.796725,-6.954207,4.281512,-4.916007,4.982571,-2.824811,sad
2,-182.619278,67.675827,-0.797868,24.051430,-5.190639,16.157488,-8.632424,8.377496,-7.821844,4.154719,-6.584860,5.440742,-3.227072,sad
3,-176.730347,69.460732,-3.556798,23.724281,-3.923401,14.380940,-10.258853,7.920942,-8.471033,4.707633,-7.483985,6.082211,-1.976128,sad
4,-169.961014,67.228783,-5.108936,25.972433,-0.960968,10.707214,-7.709500,9.845826,-7.130170,1.443047,-5.541646,4.582058,-2.510003,sad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1082,-175.633728,74.015785,2.712404,19.897556,-6.309462,10.357076,-6.993424,6.231153,-7.332644,2.900886,-3.851538,1.732221,-0.015147,sad
1083,-189.807785,68.901428,6.567017,23.107821,-7.408116,14.314412,-12.288042,7.221433,-8.962927,6.892297,-4.911611,3.426297,-1.613749,sad
1084,-184.201416,70.399010,5.299627,24.696863,-6.612162,11.239370,-8.934715,7.810757,-6.904093,3.232856,-5.515260,2.140845,0.100377,sad
1085,-191.838074,67.235069,6.359173,26.766935,-8.016623,14.006180,-9.946400,10.850006,-9.195849,5.764503,-6.640813,3.534524,-1.396288,sad


In [10]:
data = pd.concat([df,df1])

In [12]:
data = data.sample(frac=1.0, random_state=42)


In [13]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,Emotion
282,-187.291916,60.559792,5.524512,28.017920,-5.134236,11.992344,-5.312694,8.525510,-6.117586,6.419153,-3.630326,0.889226,-0.534564,happy
401,-181.190689,54.937691,8.245098,27.110374,-4.613346,13.557610,-12.092719,10.512252,-9.665386,6.453562,-6.521296,1.327836,-3.749032,sad
792,-122.850060,59.267120,1.687985,17.103163,-1.257266,10.769147,-7.797624,2.494846,-5.696882,3.604553,-1.868009,3.231911,0.851455,happy
727,-172.144989,65.690483,0.725744,22.690552,-6.746203,11.921304,-10.059653,7.634089,-6.381462,5.297874,-3.262336,1.499478,-1.644313,happy
92,-204.358231,62.460064,12.527433,28.293600,-5.310612,16.849634,-5.972641,10.497198,-5.701761,7.364686,-4.412080,2.041025,-2.910482,sad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551,-206.485321,62.054054,9.425039,28.212427,-4.987573,18.965616,-9.682539,11.042815,-7.657458,6.728415,-5.422341,2.554426,-3.687952,sad
8,-179.816452,63.810776,-2.018862,25.356987,-5.103701,16.068565,-6.822363,10.157461,-7.991560,3.856697,-5.701437,5.423973,-4.030729,sad
43,-206.062180,64.305214,0.434274,24.533056,-2.728749,17.091026,-8.720998,11.258034,-6.232398,6.001196,-5.110663,4.870625,-3.148695,sad
207,-184.682144,63.847946,6.748990,28.057064,-5.370512,16.681566,-7.384784,9.111710,-8.278574,7.326663,-4.194708,3.411689,-1.207105,sad


In [28]:
x = data.iloc[:,:-1]
y = data['Emotion']

In [29]:
x

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
282,-187.291916,60.559792,5.524512,28.017920,-5.134236,11.992344,-5.312694,8.525510,-6.117586,6.419153,-3.630326,0.889226,-0.534564
401,-181.190689,54.937691,8.245098,27.110374,-4.613346,13.557610,-12.092719,10.512252,-9.665386,6.453562,-6.521296,1.327836,-3.749032
792,-122.850060,59.267120,1.687985,17.103163,-1.257266,10.769147,-7.797624,2.494846,-5.696882,3.604553,-1.868009,3.231911,0.851455
727,-172.144989,65.690483,0.725744,22.690552,-6.746203,11.921304,-10.059653,7.634089,-6.381462,5.297874,-3.262336,1.499478,-1.644313
92,-204.358231,62.460064,12.527433,28.293600,-5.310612,16.849634,-5.972641,10.497198,-5.701761,7.364686,-4.412080,2.041025,-2.910482
...,...,...,...,...,...,...,...,...,...,...,...,...,...
551,-206.485321,62.054054,9.425039,28.212427,-4.987573,18.965616,-9.682539,11.042815,-7.657458,6.728415,-5.422341,2.554426,-3.687952
8,-179.816452,63.810776,-2.018862,25.356987,-5.103701,16.068565,-6.822363,10.157461,-7.991560,3.856697,-5.701437,5.423973,-4.030729
43,-206.062180,64.305214,0.434274,24.533056,-2.728749,17.091026,-8.720998,11.258034,-6.232398,6.001196,-5.110663,4.870625,-3.148695
207,-184.682144,63.847946,6.748990,28.057064,-5.370512,16.681566,-7.384784,9.111710,-8.278574,7.326663,-4.194708,3.411689,-1.207105


In [107]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 0)


In [108]:
x_train.shape

(1739, 13)

In [109]:
x_train = x_train.to_numpy()
x_test= x_test.to_numpy()

In [110]:
x_train = x_train.reshape(-1, 1, 13)
x_test = x_test.reshape(-1, 1, 13)

In [111]:
x_train.shape

(1739, 1, 13)

In [112]:
y_train = y_train.map({'happy': 0, 'sad': 1})
y_test = y_test.map({'happy': 0, 'sad': 1})

In [113]:
from keras.layers import Dense,LSTM
from keras.models import Sequential
model = Sequential()
model.add(LSTM(units = 256, input_shape = (1,13)))
model.add(Dense(128, activation ='relu'))
model.add(Dense(128, activation ='relu'))
model.add(Dense(64, activation ='relu'))
model.add(Dense(1, activation ='sigmoid'))

In [114]:
model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics =['accuracy'] )

In [115]:
model.fit(x_train ,y_train ,epochs = 15 ,validation_split = 0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x228d04f0650>

In [116]:
model.evaluate(x_test,y_test)



[0.394106388092041, 0.8298850655555725]

In [117]:
model.save('Model')

INFO:tensorflow:Assets written to: Model\assets


INFO:tensorflow:Assets written to: Model\assets


In [2]:
import keras
model = keras.models.load_model('Model')

In [21]:
import librosa
import numpy as np
audio = '1001_TIE_HAP_XX.wav'
y, sr = librosa.load(audio)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512, n_fft=2048, n_mels=26)
fccs_flat = np.mean(mfccs, axis=1)

In [22]:
fccs_flat

array([-139.0592   ,   59.99885  ,   -5.6606994,   18.38387  ,
          5.337334 ,    6.8025365,   -7.675044 ,    3.238221 ,
         -2.1208603,    1.890469 ,   -5.494014 ,    5.5995393,
         -3.333587 ], dtype=float32)

In [24]:
fccs_flat = fccs_flat.reshape(-1, 1, 13)

In [25]:
fccs_flat.shape

(1, 1, 13)

In [26]:
pred = model.predict(fccs_flat)



In [31]:
prediction = np.argmax(pred)
if prediction == 0:
    print('happy')
else:
    print('sad')

happy
