In [1]:
import pandas as pd
import librosa
import numpy as np
from tensorflow.keras import layers,models
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import save_model


In [2]:
csv_file_path = '/Users/dheemankumar/github/audio-ai/broken_3s_audio_data.csv'
df = pd.read_csv(csv_file_path)

In [3]:
audio_data = []
labels_gender = []

# Step 2 and 3: Load audio files and process the data with a sample rate of 22050
for index, row in df.iterrows():
    audio_file_path = '/Users/dheemankumar/github/audio-ai/3sec_audio/' + row['name']  # Adjust the path as needed
    audio, sample_rate = librosa.load(audio_file_path, sr=22050)  # Load audio with a sample rate of 22050

    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate)

    # Convert to decibels (log scale)
    mel_spectrogram_db = librosa.amplitude_to_db(mel_spectrogram, ref=np.max)

    mel_spectrogram_db_with_channel = np.expand_dims(mel_spectrogram_db, axis=-1)

    # Append the processed audio data and label to the lists
    audio_data.append(mel_spectrogram_db_with_channel)


    labels_gender.append(row[['male','female']])

In [4]:
# Step 4: Create NumPy arrays
audio_data = np.array(audio_data)
labels_gender = np.array(labels_gender)

In [5]:
labels_gender.shape

(13388, 2)

In [6]:
audio_data.shape

(13388, 128, 130, 1)

In [7]:
X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(audio_data, labels_gender, test_size=0.2, random_state=42)

In [8]:
y_train_g[0]

array([0, 1], dtype=object)

In [9]:
# Create a Sequential model_gender
model_gender = models.Sequential()

# Add a 2D convolutional layer with 32 filters, a 3x3 kernel, and 'relu' activation
model_gender.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 130, 1)))

# Add a max-pooling layer
model_gender.add(layers.MaxPooling2D((2, 2)))

# Add another 2D convolutional layer with 64 filters and 'relu' activation
model_gender.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add another max-pooling layer
model_gender.add(layers.MaxPooling2D((2, 2)))

# Add a flattening layer to convert to 1D tensor
model_gender.add(layers.Flatten())

# Add a fully connected (dense) layer with 64 units and 'relu' activation
model_gender.add(layers.Dense(64, activation='relu'))

# Add the output layer with 7 units (since you want 7 outputs) and 'softmax' activation
model_gender.add(layers.Dense(2, activation='softmax'))

# Compile the model_gender
model_gender.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model_gender summary
model_gender.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 128, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 64, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 62, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 30, 31, 64)        0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 59520)             0         
                                                                 
 dense (Dense)               (None, 64)                3

In [10]:
# Create a Sequential model_gender_2
model_gender_2 = models.Sequential()

# Add a 2D convolutional layer with 32 filters, a 3x3 kernel, and 'relu' activation
model_gender_2.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 130, 1)))

# Add a max-pooling layer
model_gender_2.add(layers.MaxPooling2D((2, 2)))

# Add another 2D convolutional layer with 64 filters and 'relu' activation
model_gender_2.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add another max-pooling layer
model_gender_2.add(layers.MaxPooling2D((2, 2)))

# Add a flattening layer to convert to 1D tensor
model_gender_2.add(layers.Flatten())

# Add a fully connected (dense) layer with 64 units and 'relu' activation
model_gender_2.add(layers.Dense(64, activation='relu'))

model_gender_2.add(layers.Dropout(0.5))

# Add the output layer with 7 units (since you want 7 outputs) and 'softmax' activation
model_gender_2.add(layers.Dense(2, activation='softmax'))

# Compile the model_gender_2
model_gender_2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model_gender_2 summary
model_gender_2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 126, 128, 32)      320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 63, 64, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 61, 62, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 30, 31, 64)        0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 59520)             0         
                                                                 
 dense_2 (Dense)             (None, 64)               

In [11]:
# Create a Sequential model_gender_3
model_gender_3 = models.Sequential()

# Add a 2D convolutional layer with 32 filters, a 3x3 kernel, and 'relu' activation
model_gender_3.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(128, 130, 1)))

# Add a max-pooling layer
model_gender_3.add(layers.MaxPooling2D((2, 2)))

# Add another 2D convolutional layer with 64 filters and 'relu' activation
model_gender_3.add(layers.Conv2D(128, (5, 5), activation='relu'))

# Add another max-pooling layer
model_gender_3.add(layers.MaxPooling2D((2, 2)))

# Add a flattening layer to convert to 1D tensor
model_gender_3.add(layers.Flatten())

# Add a fully connected (dense) layer with 64 units and 'relu' activation
model_gender_3.add(layers.Dense(64, activation='relu'))

# Add the output layer with 7 units (since you want 7 outputs) and 'softmax' activation
model_gender_3.add(layers.Dense(2, activation='softmax'))

# Compile the model_gender_3
model_gender_3.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model_gender_3 summary
model_gender_3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 126, 128, 64)      640       
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 63, 64, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 59, 60, 128)       204928    
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 29, 30, 128)       0         
 g2D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 111360)            0         
                                                                 
 dense_4 (Dense)             (None, 64)               

In [12]:
# Create a Sequential model_gender_4
model_gender_4 = models.Sequential()

# Add a 2D convolutional layer with 32 filters, a 3x3 kernel, and 'relu' activation
model_gender_4.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 130, 1)))

# Add a max-pooling layer
model_gender_4.add(layers.MaxPooling2D((2, 2)))

# Add another 2D convolutional layer with 64 filters and 'relu' activation
model_gender_4.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add another max-pooling layer
model_gender_4.add(layers.MaxPooling2D((2, 2)))

# Add another 2D convolutional layer with 128 filters and 'relu' activation
model_gender_4.add(layers.Conv2D(128, (5, 5), activation='relu'))

# Add another max-pooling layer
model_gender_4.add(layers.MaxPooling2D((2, 2)))

# Add a flattening layer to convert to 1D tensor
model_gender_4.add(layers.Flatten())

# Add a fully connected (dense) layer with 64 units and 'relu' activation
model_gender_4.add(layers.Dense(64, activation='relu'))

model_gender_4.add(layers.Dense(32, activation='relu'))

# Add the output layer with 7 units (since you want 7 outputs) and 'softmax' activation
model_gender_4.add(layers.Dense(2, activation='softmax'))

# Compile the model_gender_4
model_gender_4.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model_gender_4 summary
model_gender_4.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 126, 128, 32)      320       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 63, 64, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 61, 62, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 30, 31, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 26, 27, 128)       204928    
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 13, 13, 128)      

In [13]:
# Convert data types if needed
X_train_g = np.asarray(X_train_g, dtype=np.float32)
X_test_g = np.asarray(X_test_g, dtype=np.float32)
y_train_g = np.asarray(y_train_g, dtype=np.float32)
y_test_g = np.asarray(y_test_g, dtype=np.float32)

In [14]:
# Save the array to a file
np.save('X_train_gender.npy', X_train_g)
np.save('X_test_gender.npy', X_test_g)
np.save('y_train_gender.npy', y_train_g)
np.save('y_test_gender.npy', y_test_g)

In [19]:
# Train the model
ep=3

In [20]:

history_gender_1 = model_gender.fit(X_train_g, y_train_g, epochs=ep, validation_data=(X_test_g, y_test_g), batch_size=32)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [21]:

history_gender_2 = model_gender_2.fit(X_train_g, y_train_g, epochs=ep, validation_data=(X_test_g, y_test_g), batch_size=32)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [22]:

history_gender_3 = model_gender_3.fit(X_train_g, y_train_g, epochs=ep, validation_data=(X_test_g, y_test_g), batch_size=32)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [23]:

history_gender_4 = model_gender_4.fit(X_train_g, y_train_g, epochs=ep, validation_data=(X_test_g, y_test_g), batch_size=32)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [24]:
model_gender.save("models/genderModel.h5")
model_gender_2.save("models/genderModel2.h5")
model_gender_3.save("models/genderModel3.h5")
model_gender_4.save("models/genderModel4.h5")

  saving_api.save_model(


In [25]:
import json

# Save history to a JSON file
with open('history_gender_1.json', 'w') as json_file:
    json.dump(history_gender_1.history, json_file)


with open('history_gender_2.json', 'w') as json_file:
    json.dump(history_gender_1.history, json_file)


with open('history_gender_3.json', 'w') as json_file:
    json.dump(history_gender_1.history, json_file)


with open('history_gender_4.json', 'w') as json_file:
    json.dump(history_gender_1.history, json_file)