In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# Standard libraries
import numpy as np
import pandas as pd
import os
import time

# Libraries for audio
from IPython.display import Audio
import librosa
import scipy.ndimage
import IPython.display as ipd

# Training and Testing Split
from sklearn.model_selection import train_test_split

# for normalization & avgpooling features
# from sklearn.preprocessing import MinMaxScaler # to squeeze all the features to be within 0 and 1
import tensorflow as tf

# Model
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,GRU
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization


# Operational
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [2]:
train_pkl_path = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V3/feature-extraction/stage-2-1/4s-2D-MS-train.pkl'
val_pkl_path = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V3/feature-extraction/stage-2-1/4s-2D-MS-val.pkl'
csv_path = 'C:/Users/thato/Documents/Final-Year-Project/Dataset/Project-V3/trainval-split/stage-1-1-trainval.csv'
trainval_data = pd.read_csv(csv_path)
train_data = trainval_data[trainval_data['set'] == 'tr']
val_data = trainval_data[trainval_data['set'] == 'val']

In [3]:
with open(train_pkl_path, 'rb') as f:
    train_df = pickle.load(f)

In [4]:
with open(val_pkl_path, 'rb') as f:
    val_df = pickle.load(f)

In [5]:
del f

# Preparing Training Data

In [6]:
train = train_df['processed_spectrograms']
train.head()

0    {'features': [[[[ 3.45461357e-03  2.81875355e-...
1    {'features': [[[[ 2.01279696e-03  7.86764336e-...
2    {'features': [[[[ 1.32586967e-01  7.20314414e-...
3    {'features': [[[[ 7.34869327e-04  2.62249643e-...
4    {'features': [[[[ 5.51799884e-03  2.52135763e-...
Name: processed_spectrograms, dtype: object

In [7]:
train_features = []
train_labels = []

for data in train:
    label = data['label']
    features = data['features']

    for feature in features:
        train_features.append(feature)
        train_labels.append(label)

In [9]:
len(train_features)

11230

In [10]:
len(train_labels)

11230

In [8]:
X_train = np.array(train_features)
y_train = np.array(train_labels)

In [10]:
X_train.shape

(11230, 300, 200, 3)

# Preparing the Val data

In [9]:
val = val_df['processed_spectrograms']
val.head()

483    {'features': [[[[ 4.05979712e-04  2.58888767e-...
484    {'features': [[[[ 1.20266295e-12  1.00000000e+...
485    {'features': [[[[ 1.89046248e-04  1.00000000e+...
486    {'features': [[[[ 6.30343615e-11  4.16850191e-...
487    {'features': [[[[ 7.46824022e-03  6.65180028e-...
Name: processed_spectrograms, dtype: object

In [10]:
val_features = []
val_labels = []

for data in val:
    label = data['label']
    features = data['features']

    for feature in features:
        val_features.append(feature)
        val_labels.append(label)

In [15]:
len(val_features)

2592

In [16]:
len(val_labels)

2592

In [11]:
X_val = np.array(val_features)
y_val = np.array(val_labels)

In [18]:
X_val.shape

(2592, 300, 200, 3)

# Encoding Labels

In [12]:
# Encode labels into numerical format
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

In [13]:
y_train_one_hot = to_categorical(y_train_encoded)
y_val_one_hot = to_categorical(y_val_encoded)

In [14]:
num_classes = len(np.unique(y_train_encoded))
num_classes

20

# Preparing Data For CNN

In [None]:
# I think it should be ight

# Preparing data for GRU

In [15]:
# Reshaping the input data to (samples, timesteps, features)
X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2] * X_train.shape[3])
X_val_reshaped = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2] * X_val.shape[3])

# **Model Building**

In [16]:
def gru(input_shape=(300, 600), num_classes=num_classes):
    # Define the GRU model
    model = Sequential()

    # GRU layers
    model.add(GRU(128, input_shape=(300, 600), return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
    model.add(BatchNormalization())
    model.add(GRU(64, return_sequences=False, dropout=0.2, recurrent_dropout=0.2))
    model.add(BatchNormalization())

    # Dense layers for classification
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))  # For classification

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [17]:
def cnn(input_shape, num_classes):
    model = models.Sequential()

    # Convolutional Layer 1
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))

    # Convolutional Layer 2
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))

    model.add(layers.MaxPooling2D((2, 2)))

    # # Convolutional Layer 3
    # model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    # model.add(layers.MaxPooling2D((2, 2)))

    # # Convolutional Layer 4
    # model.add(layers.Conv2D(256, (3, 3), activation='relu'))
    # model.add(layers.MaxPooling2D((2, 2)))

    # Flatten the 3D output to 1D for the fully connected layers
    model.add(layers.Flatten())

    # Fully connected layer
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.5))  # Add dropout for regularization

    # Output layer with softmax activation for classification
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Testing CNN

In [18]:
input_shape_cnn = (300, 200, 3)
num_classes = 20

In [19]:
model_cnn = cnn(input_shape=input_shape_cnn, num_classes=num_classes)
model_cnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
history_cnn = model_cnn.fit(X_train, y_train_one_hot,
                    epochs=10,
                    batch_size=32,
                    validation_data=(X_val, y_val_one_hot))

Epoch 1/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m450s[0m 1s/step - accuracy: 0.3366 - loss: 3.0212 - val_accuracy: 0.3935 - val_loss: 2.0521
Epoch 2/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 1s/step - accuracy: 0.7400 - loss: 0.8779 - val_accuracy: 0.4907 - val_loss: 1.8488
Epoch 3/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 1s/step - accuracy: 0.9049 - loss: 0.3147 - val_accuracy: 0.4541 - val_loss: 2.4841
Epoch 4/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 1s/step - accuracy: 0.9549 - loss: 0.1480 - val_accuracy: 0.4892 - val_loss: 2.5791
Epoch 5/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m403s[0m 1s/step - accuracy: 0.9772 - loss: 0.0769 - val_accuracy: 0.5089 - val_loss: 2.6748
Epoch 6/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 1s/step - accuracy: 0.9799 - loss: 0.0643 - val_accuracy: 0.5112 - val_loss: 2.7893
Epoch 7/10
[1m351/351

In [21]:
# Evaluate model on validation set
val_loss, val_accuracy = model_cnn.evaluate(X_val, y_val_one_hot)
print(f"Validation Accuracy: {val_accuracy}")

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 139ms/step - accuracy: 0.4724 - loss: 3.3106
Validation Accuracy: 0.5034722089767456
