In [1]:
import pandas as pd
import librosa
import numpy as np
from tensorflow.keras import layers,models
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import save_model


In [2]:
csv_file_path = '/Users/dheemankumar/github/audio-ai/broken_3s_audio_data.csv'
df = pd.read_csv(csv_file_path)

In [4]:
audio_data = []
labels_language = []

# Step 2 and 3: Load audio files and process the data with a sample rate of 22050
for index, row in df.iterrows():
    audio_file_path = '/Users/dheemankumar/github/audio-ai/3sec_audio/' + row['name']  # Adjust the path as needed
    audio, sample_rate = librosa.load(audio_file_path, sr=22050)  # Load audio with a sample rate of 22050

    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate)

    # Convert to decibels (log scale)
    mel_spectrogram_db = librosa.amplitude_to_db(mel_spectrogram, ref=np.max)

    mel_spectrogram_db_with_channel = np.expand_dims(mel_spectrogram_db, axis=-1)

    # Append the processed audio data and label to the lists
    audio_data.append(mel_spectrogram_db_with_channel)



    labels_language.append(row[["english","hindi","punjabi","bangoli"]])

In [5]:
# Step 4: Create NumPy arrays
audio_data = np.array(audio_data)
labels_language = np.array(labels_language)

In [6]:
labels_language.shape

(13388, 4)

In [7]:
audio_data.shape

(13388, 128, 130, 1)

In [8]:
X_train_l, X_test_l, y_train_l, y_test_l = train_test_split(audio_data, labels_language, test_size=0.2, random_state=42)

In [9]:
y_train_l[0]

array([0, 0, 0, 1], dtype=object)

In [14]:
# Create a Sequential model_language
model_language = models.Sequential()

# Add a 2D convolutional layer with 32 filters, a 3x3 kernel, and 'relu' activation
model_language.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 130, 1)))

# Add a max-pooling layer
model_language.add(layers.MaxPooling2D((2, 2)))

# Add another 2D convolutional layer with 64 filters and 'relu' activation
model_language.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add another max-pooling layer
model_language.add(layers.MaxPooling2D((2, 2)))

# Add a flattening layer to convert to 1D tensor
model_language.add(layers.Flatten())

# Add a fully connected (dense) layer with 64 units and 'relu' activation
model_language.add(layers.Dense(64, activation='relu'))

# Add the output layer with 7 units (since you want 7 outputs) and 'softmax' activation
model_language.add(layers.Dense(4, activation='softmax'))

# Compile the model_language
model_language.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display the model_language summary
model_language.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 126, 128, 32)      320       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 63, 64, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 61, 62, 64)        18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 30, 31, 64)        0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 59520)             0         
                                                                 
 dense_2 (Dense)             (None, 64)               

In [20]:
model_language_2 = models.Sequential()

model_language_2.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 130, 1)))
model_language_2.add(layers.MaxPooling2D((2, 2)))

model_language_2.add(layers.Conv2D(64, (3, 3), activation='relu'))
model_language_2.add(layers.MaxPooling2D((2, 2)))

model_language_2.add(layers.Flatten())

# Add a fully connected (dense) layer with 64 units and 'relu' activation
model_language_2.add(layers.Dense(64, activation='relu'))
# Add dropout layer for regularization
model_language_2.add(layers.Dropout(0.5))

model_language_2.add(layers.Dense(4, activation='softmax'))

model_language_2.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])


In [23]:
model_language_3 = models.Sequential()

# Use more filters in the first convolutional layer
model_language_3.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(128, 130, 1)))
model_language_3.add(layers.MaxPooling2D((2, 2)))

# Use larger kernel size in the second convolutional layer
model_language_3.add(layers.Conv2D(128, (5, 5), activation='relu'))
model_language_3.add(layers.MaxPooling2D((2, 2)))

model_language_3.add(layers.Flatten())

model_language_3.add(layers.Dense(64, activation='relu'))
model_language_3.add(layers.Dense(4, activation='softmax'))

model_language_3.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])


In [24]:
model_language_4 = models.Sequential()

# Use more filters in the first convolutional layer
model_language_4.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 130, 1)))
model_language_4.add(layers.MaxPooling2D((2, 2)))

# Use larger kernel size in the second convolutional layer
model_language_4.add(layers.Conv2D(64, (5, 5), activation='relu'))
model_language_4.add(layers.MaxPooling2D((2, 2)))

# Use larger kernel size in the third convolutional layer
model_language_4.add(layers.Conv2D(128, (5, 5), activation='relu'))
model_language_4.add(layers.MaxPooling2D((2, 2)))

model_language_4.add(layers.Flatten())

model_language_4.add(layers.Dense(64, activation='relu'))
model_language_4.add(layers.Dense(32, activation='relu'))
model_language_4.add(layers.Dense(4, activation='softmax'))

model_language_4.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])

In [15]:
# Convert data types if needed
X_train_l = np.asarray(X_train_l, dtype=np.float32)
X_test_l = np.asarray(X_test_l, dtype=np.float32)
y_train_l = np.asarray(y_train_l, dtype=np.float32)
y_test_l = np.asarray(y_test_l, dtype=np.float32)

In [16]:
# Save the array to a file
np.save('X_train_language.npy', X_train_l)
np.save('X_test_language.npy', X_test_l)
np.save('y_train_language.npy', y_train_l)
np.save('y_test_language.npy', y_test_l)

In [26]:
# Train the model
ep=10

history_language_1 = model_language.fit(X_train_l, y_train_l, epochs=ep, validation_data=(X_test_l, y_test_l), batch_size=32)




Epoch 1/10

KeyboardInterrupt: 

In [None]:
# Train the model
ep=10

history_language_2 = model_language_2.fit(X_train_l, y_train_l, epochs=ep, validation_data=(X_test_l, y_test_l), batch_size=32)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Train the model
ep=10

history_language_3 = model_language_3.fit(X_train_l, y_train_l, epochs=ep, validation_data=(X_test_l, y_test_l), batch_size=32)




Epoch 1/10

KeyboardInterrupt: 

In [None]:
# Train the model
ep=10

history_language_4 = model_language_4.fit(X_train_l, y_train_l, epochs=ep, validation_data=(X_test_l, y_test_l), batch_size=32)




In [None]:

model_language.save("models/LanguageModel.h5")
model_language_2.save("models/LanguageModel2.h5")
model_language_3.save("models/LanguageModel3.h5")
model_language_4.save("models/LanguageModel4.h5")

  saving_api.save_model(


In [None]:
import json

# Save history to a JSON file
with open('history_language_1.json', 'w') as json_file:
    json.dump(history_language_1.history, json_file)


with open('history_language_2.json', 'w') as json_file:
    json.dump(history_language_1.history, json_file)


with open('history_language_3.json', 'w') as json_file:
    json.dump(history_language_1.history, json_file)


with open('history_language_4.json', 'w') as json_file:
    json.dump(history_language_1.history, json_file)