In [1]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle


In [2]:
# Config
TRAIN_PATH = r"C:\Users\ADMIN\Documents\Speaker-Identification\voice_dataset\train"
VAL_PATH = r"C:\Users\ADMIN\Documents\Speaker-Identification\voice_dataset\val"
SAMPLE_RATE = 22050
DURATION = 3  # seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION


In [3]:
import os
import numpy as np
import librosa

def extract_features(directory):
    features = []
    labels = []
    
    for label in os.listdir(directory):
        label_path = os.path.join(directory, label)
        if not os.path.isdir(label_path):
            continue
        for file in os.listdir(label_path):
            file_path = os.path.join(label_path, file)
            try:
                audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
                if len(audio) < SAMPLES_PER_TRACK:
                    padding = SAMPLES_PER_TRACK - len(audio)
                    audio = np.pad(audio, (0, padding))  # zero-pad to 3 seconds
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
                mfcc_scaled = np.mean(mfcc.T, axis=0)
                features.append(mfcc_scaled)
                labels.append(label)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    
    return np.array(features), np.array(labels)


In [4]:
#Load the Data Using Config Paths
print("Loading training and validation data...")
X_train, y_train = extract_features(TRAIN_PATH)
X_val, y_val = extract_features(VAL_PATH)

Loading training and validation data...


In [5]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical


In [6]:
# Encode string labels into integers
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

In [7]:

# One-hot encode
y_train_cat = to_categorical(y_train_encoded)
y_val_cat = to_categorical(y_val_encoded)


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(256, activation='relu', input_shape=(13,)),  # 13 MFCCs
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(y_train)), activation='softmax')  # Output units = number of people
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
from tensorflow.keras.layers import Input

model = Sequential([
    Input(shape=(13,)),  # define input shape here
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')  # 3 = number of people
])

In [10]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_val, y_val_cat),
    epochs=30,
    batch_size=32,
    callbacks=[early_stop]
)


ValueError: You must call `compile()` before using the model.

In [11]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [12]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_val, y_val_cat),
    epochs=30,
    batch_size=32,
    callbacks=[early_stop]
)

Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3448 - loss: 21.3165 - val_accuracy: 0.1111 - val_loss: 5.6275
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - accuracy: 0.3793 - loss: 18.8944 - val_accuracy: 0.2222 - val_loss: 5.1250
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.2759 - loss: 16.4350 - val_accuracy: 0.3333 - val_loss: 11.8633
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.4483 - loss: 14.1240 - val_accuracy: 0.3333 - val_loss: 13.6427
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.3448 - loss: 14.3318 - val_accuracy: 0.3333 - val_loss: 10.4865
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.3103 - loss: 16.6927 - val_accuracy: 0.3333 - val_loss: 5.1528
Epoch 7/30
[1m1/1[0m [32m━━━━━━━━━━

In [13]:
def extract_mfcc(file_path, sample_rate=22050, n_mfcc=40):
    audio, _ = librosa.load(file_path, sr=sample_rate)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
    return np.mean(mfccs.T, axis=0)  # Mean across time

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

model = Sequential([
    Dense(512, activation='relu', input_shape=(40,)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 speakers
])

In [15]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train_cat,
          validation_data=(X_val, y_val_cat),
          epochs=50,
          batch_size=16,
          callbacks=[early_stop, lr_reduce])


Epoch 1/50


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_6" is incompatible with the layer: expected axis -1 of input shape to have value 40, but received input with shape (None, 13)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 13), dtype=float32)
  • training=True
  • mask=None

In [16]:
model = Sequential()
model.add(Dense(40, input_shape=(13,), activation='relu'))
# continue your layers


In [17]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train_cat,
          validation_data=(X_val, y_val_cat),
          epochs=50,
          batch_size=16,
          callbacks=[early_stop, lr_reduce])

Epoch 1/50


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 3), output.shape=(None, 40)

In [18]:
model.summary()


In [19]:
model = Sequential()
model.add(Dense(40, input_shape=(13,), activation='relu'))
# add more layers if needed
model.add(Dense(num_classes, activation='softmax'))  # final lay

NameError: name 'num_classes' is not defined

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

num_classes = 3  # set this to the actual number of your classes

model = Sequential()
model.add(Dense(40, input_shape=(13,), activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


In [21]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [22]:
model.fit(X_train, y_train_cat,
          validation_data=(X_val, y_val_cat),
          epochs=50,
          batch_size=16,
          callbacks=[early_stop, lr_reduce])


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 160ms/step - accuracy: 0.3341 - loss: 45.8718 - val_accuracy: 0.3333 - val_loss: 43.6216 - learning_rate: 0.0010
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3549 - loss: 40.7635 - val_accuracy: 0.3333 - val_loss: 40.2906 - learning_rate: 0.0010
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3757 - loss: 34.4786 - val_accuracy: 0.3333 - val_loss: 36.9739 - learning_rate: 0.0010
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3341 - loss: 33.7425 - val_accuracy: 0.3333 - val_loss: 33.6749 - learning_rate: 0.0010
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2464 - loss: 34.1229 - val_accuracy: 0.3333 - val_loss: 30.6713 - learning_rate: 0.0010
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

<keras.src.callbacks.history.History at 0x215e4e87200>

In [23]:
model.save('my_model.h5')




In [24]:
model.save('my_model.keras')
