In [1]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from keras.optimizers import Adamax

data_path = 'C:\_PROJECT_\Speech Recognition\Data'
class_labels = os.listdir(data_path)

sampling_rate =25050  
n_mfcc = 20 
hop_length = 550  

features = []
labels = []

for label in class_labels:
    label_path = os.path.join(data_path, label)
    for audio_file in os.listdir(label_path):
        audio, _ = librosa.load(os.path.join(label_path, audio_file), sr=sampling_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=n_mfcc, hop_length=hop_length)
        features.append(mfccs.T)  
        labels.append(label)

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)



In [2]:
X_test=np.array(X_test)
y_test=np.array(y_test)
y_train=np.array(y_train)
X_train=np.array(X_train)

  X_test=np.array(X_test)
  X_train=np.array(X_train)


In [3]:
m =0
n=0
for i in range(X_train.shape[0]):
    if(X_train[i].shape[0]>m):
        m=X_train[i].shape[0]
    if(X_train[i].shape[1]>n):
        n=X_train[i].shape[1]

In [4]:

max_shape = (m, n)  
X_train_processed = []
X_test_processed = []

for sample in X_train:
    if sample.shape[0] < max_shape[0]:
        padding = np.zeros((max_shape[0] - sample.shape[0], max_shape[1]))
        processed_sample = np.vstack((sample, padding))
    else:
        processed_sample = sample[:max_shape[0], :]

    X_train_processed.append(processed_sample)

for sample in X_test:
    if sample.shape[0] < max_shape[0]:
        padding = np.zeros((max_shape[0] - sample.shape[0], max_shape[1]))
        processed_sample = np.vstack((sample, padding))
    else:
        processed_sample = sample[:max_shape[0], :]

    X_test_processed.append(processed_sample)

X_train_processed = np.array(X_train_processed)
X_test_processed = np.array(X_test_processed)



In [41]:
model = Sequential()
from keras.optimizers import Adamax
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(X_train_processed.shape[1], X_train_processed.shape[2], 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(MaxPooling2D(pool_size=(1, 1)))

model.add(Flatten())
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01))) 
model.add(Dropout(0.5))
model.add(Dense(len(class_labels), activation='softmax')) 

model.compile(loss='sparse_categorical_crossentropy', optimizer=Adamax(), metrics=['accuracy'])

X_train_processed = X_train_processed.reshape(X_train_processed.shape[0], X_train_processed.shape[1], X_train_processed.shape[2], 1)
X_test_processed = X_test_processed.reshape(X_test_processed.shape[0], X_test_processed.shape[1], X_test_processed.shape[2], 1)

early_stopping = EarlyStopping(monitor='val_loss', patience=3)  

history = model.fit(X_train_processed, y_train, validation_data=(X_test_processed, y_test), epochs=30, batch_size=32, callbacks=[early_stopping])



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


In [68]:
early_stopping = EarlyStopping(monitor='val_loss', patience=1)
history = model.fit(X_train_processed, y_train, validation_data=(X_test_processed, y_test), epochs=10, batch_size=32, callbacks=[early_stopping])


Epoch 1/10
Epoch 2/10


In [39]:
ac=model.evaluate(X_train_processed, y_train)[1]
print(f"Accuracy Close dataset: {ac* 100:f}%")

Accuracy Close dataset: 97.777778%


In [60]:
accuracy = model.evaluate(X_test_processed, y_test)[1]
print(f"Accuracy Open dataset: {accuracy * 100:.2f}%")

Accuracy Open dataset: 72.06%
