Steps for preparing the feature vectors for the neural network:
1. Make all vectors the same size by truncating/padding with 0's so that dimensions are (1, 300, 768)
2. Use normalization/standardization (I used standardization) (this is where preprocessed_feature_vectors.pkl ended)
3. Reduce the embedding dimensions (768 -> 256) using PCA (this is where reduced_feature_vecs.pkl.gz ended)
4. Use squeeze function to remove the 1 from the dimensions
5. Flatten the feature vectors into 1D arrays since neural networks only accept 2D arrays in the form (num_samples, num_features)

Note: I used GPU to train model so only took a few mins

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import gzip
import pickle

file_path = '/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/reduced_feature_vecs.pkl.gz'

with gzip.open(file_path, 'rb') as f:
    reduced_feature_vecs = pickle.load(f)

In [None]:
reduced_feature_vecs[0].shape

(1, 300, 256)

In [None]:
file_path = '/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/test_dataset_compressed.pkl.gz'

with gzip.open(file_path, 'rb') as f:
    test_dataset = pickle.load(f)

In [None]:
import numpy as np

#squeezing removes the extra 1 in the dimensions
feature_vecs = np.array(reduced_feature_vecs)
feature_vecs_squeezed = feature_vecs.squeeze(axis=1)

In [None]:
feature_vecs_squeezed.shape

(2000, 300, 256)

In [None]:
import numpy as np

flattened_feature_vecs = []
# don't use this one for flattening vectors
for i in range(len(feature_vecs_squeezed)):
  reduced_feature_vecs[i] = np.array(feature_vecs_squeezed[i])
  flattened_feature_vec = feature_vecs_squeezed.reshape(feature_vecs_squeezed[i].shape[0], -1)
  flattened_feature_vecs.append(flattened_feature_vec)


In [None]:
# use this code to flatten feature vectors
flattened_feature_vecs = feature_vecs_squeezed.reshape(feature_vecs.shape[0], -1)

# Verify the new shape
print(flattened_feature_vecs.shape)


(2000, 76800)


In [None]:
flattened_feature_vecs[0].shape

(76800,)

In [None]:
import pandas as pd

#dataframe with the flattened feature vectors
test_df = pd.DataFrame(flattened_feature_vecs)

#labels added as a new column in test_df
labels = test_dataset['label'].to_list()
test_df['label'] = labels

In [None]:
label_col = test_df.pop('label')
test_df.insert(0, 'label', label_col)  #setting label as leftmost column

In [None]:
X = test_df.drop('label', axis=1)
y = test_df['label']

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [None]:
import tensorflow as tf

# Check if GPU is available
print("GPU Available: ", tf.test.is_gpu_available())


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


GPU Available:  True


In [None]:
from keras.layers import Dense, Input
model = Sequential()

# Use Input layer for the first layer to specify input shape
model.add(Input(shape=(X.shape[1],)))  # Specify the input shape here

model.add(Dense(32, activation='relu'))  # Reduce number of neurons
model.add(Dropout(0.5))

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))  # Binary classification output

model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Fit the model to your data
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)  # Adjust epochs and batch_size as needed


Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 72ms/step - accuracy: 0.6806 - loss: 0.8366 - val_accuracy: 0.8075 - val_loss: 0.3778
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9487 - loss: 0.1460 - val_accuracy: 0.9100 - val_loss: 0.2203
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9953 - loss: 0.0189 - val_accuracy: 0.8925 - val_loss: 0.2422
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0029 - val_accuracy: 0.8975 - val_loss: 0.2373
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0015 - val_accuracy: 0.9000 - val_loss: 0.2296
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 1.0000 - loss: 0.0012 - val_accuracy: 0.9050 - val_loss: 0.2211
Epoch 7/10
[1m50/50[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7d3d360079d0>

In [None]:
# Assuming you have test data (X_test and y_test) ready
score = model.evaluate(X, y, verbose=0)
print(f'Test Loss: {score[0]}, Test Accuracy: {score[1]}')


Test Loss: 0.04316375032067299, Test Accuracy: 0.9819999933242798
