<a href="https://colab.research.google.com/github/Interactions-SpoofProof/interactions-ai-studio-project/blob/main/train_dataset_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Steps for preparing the feature vectors for the neural network:
1. Make all vectors the same size by truncating/padding with 0's so that dimensions are (1, 300, 768)
2. Use normalization/standardization (I used standardization) (this is where preprocessed_feature_vectors.pkl ended)
3. Reduce the embedding dimensions (768 -> 256) using PCA (this is where reduced_feature_vecs.pkl.gz ended)
4. Use squeeze function to remove the 1 from the dimensions
5. Flatten the feature vectors into 1D arrays since neural networks only accept 2D arrays in the form (num_samples, num_features)

Note: I used GPU to train model so only took a few mins

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import gzip
import pickle

file_path = '/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/train_dataset_feature_vectors_01.pkl.gz'

with gzip.open(file_path, 'rb') as f:
    test_feature_vecs = pickle.load(f)

In [None]:
test_feature_vecs[0].shape

(1, 220, 768)

In [None]:
# step 1: padding with 0's (10 secs)
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Pad all sequences to 300
padded_vecs = []
for i in range(len(test_feature_vecs)):
  padded_feature_vecs = pad_sequences(test_feature_vecs[i], maxlen=300, dtype='float32', padding='post', value=0.0)

  #print("Padded feature vectors shape:", padded_feature_vecs.shape)
  padded_vecs.append(padded_feature_vecs)

In [None]:
del test_feature_vecs

In [None]:
padded_vecs[0].shape

(1, 300, 768)

In [None]:
# step 2: standardization (10 secs)
from sklearn.preprocessing import StandardScaler

standardized_vecs = []

for i in range(len(padded_vecs)):

# Assuming padded_vecs[i] is a 3D numpy array: (num_samples, num_frames, embedding_dim)
  num_samples, num_frames, embedding_dim = padded_vecs[i].shape

# Reshape the features to 2D for standardization
  flattened_features = padded_vecs[i].reshape(num_samples * num_frames, embedding_dim)

# Apply standardization
  scaler = StandardScaler()
  standardized_features = scaler.fit_transform(flattened_features)

# Reshape back to 3D
  standardized_features = standardized_features.reshape(num_samples, num_frames, embedding_dim)
  standardized_vecs.append(standardized_features)


In [None]:
# step 2: try normalizing so that range is [0,1]


In [None]:
del padded_vecs

In [None]:
standardized_vecs[0].shape

(1, 300, 768)

In [None]:
# step 3: Reduce the embedding dimensions from (768 -> 256) using PCA (3 min for 2000)
from sklearn.decomposition import PCA

#reduced_feature_vecs = []

for i in range(len(standardized_vecs[2000:])):
  num_samples, num_frames, embedding_dim = standardized_vecs[i].shape

# Reshape to 2D (combine samples and frames into one dimension for PCA)
  flattened_features = standardized_vecs[i].reshape(num_samples * num_frames, embedding_dim)

# Perform PCA to reduce to 256 dimensions instead of 768
  pca = PCA(n_components=256)
  reduced_features = pca.fit_transform(flattened_features)

# Reshape back to 3D: (num_samples, num_frames, 256)
  reduced_feature_vec = reduced_features.reshape(num_samples, num_frames, 256)
  reduced_feature_vecs.append(reduced_feature_vec)

In [None]:
reduced_feature_vecs[0].shape

(1, 300, 256)

In [None]:
# step 4: Use squeeze function to remove the extra 1 from the dimensions (1 sec)
import numpy as np

feature_vecs = np.array(reduced_feature_vecs)
feature_vecs_squeezed = feature_vecs.squeeze(axis=1)

In [None]:
feature_vecs_squeezed[0].shape

(300, 256)

In [None]:
del reduced_feature_vecs
del feature_vecs
del standardized_vecs

In [None]:
# don't use this code for flattening vectors
#flattened_feature_vecs = []

#for i in range(len(feature_vecs_squeezed)):
  #reduced_feature_vecs[i] = np.array(feature_vecs_squeezed[i])
  #flattened_feature_vec = feature_vecs_squeezed.reshape(feature_vecs_squeezed[i].shape[0], -1)
  #flattened_feature_vecs.append(flattened_feature_vec)

In [None]:
# step 5: flatten feature vectors to 1D arrays instead of 2D
#flattened_feature_vecs = feature_vecs_squeezed.reshape(feature_vecs.shape[0], -1)
flattened_feature_vecs = feature_vecs_squeezed.reshape(feature_vecs_squeezed.shape[0], -1)
flattened_feature_vecs[0].shape

(76800,)

In [None]:
with gzip.open('/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/fully_preprocessed_train_vectors_first_half.pkl.gz', 'wb') as f:
    pickle.dump(flattened_feature_vecs, f)

Run the cells below for neural network training:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import gzip
import pickle

with gzip.open('/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/train_labels.pkl.gz', 'rb') as f:
    labels = pickle.load(f) # contains just labels column

In [None]:
with gzip.open('/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/fully_preprocessed_train_vectors_first_half.pkl.gz', 'rb') as f:
    final_feature_vecs1 = pickle.load(f)

In [None]:
with gzip.open('/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/fully_preprocessed_train_vectors_second_half.pkl.gz', 'rb') as f:
    final_feature_vecs2 = pickle.load(f)

In [None]:
import numpy as np

final_feature_vecs = np.concatenate((final_feature_vecs1, final_feature_vecs2), axis=0)

In [None]:
del final_feature_vecs1
del final_feature_vecs2

In [None]:
final_feature_vecs.shape

(8000, 76800)

In [None]:
#import tensorflow as tf

#dataset = tf.data.Dataset.from_tensor_slices((final_feature_vecs, labels))

In [None]:
import pandas as pd

#dataframe with the flattened feature vectors
#test_df = pd.DataFrame(flattened_feature_vecs)
train_df = pd.DataFrame(final_feature_vecs)

# label added as a new column in test_df
#labels = train_dataset['label'].to_list()[:]
print(len(labels))
train_df['label'] = labels[:]

8000


In [None]:
#shuffling order of datapoints
train_df = train_df.sample(frac=1).reset_index(drop=True)

#from sklearn.model_selection import train_test_split
#df, test_df = train_test_split(test_df, test_size=0.05, random_state=42)

In [None]:
label_col = train_df.pop('label')
train_df.insert(0, 'label', label_col)  # setting label as leftmost column

In [None]:
with gzip.open('/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/train_vectors+labels_reshuffled.pkl.gz', 'wb') as f:
    pickle.dump(train_df, f)

In [None]:
X_train = train_df.drop('label', axis=1)
y_train = train_df['label']

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Input, BatchNormalization, Activation, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [None]:
import tensorflow as tf

# Check if GPU is available
print("GPU Available: ", tf.test.is_gpu_available())


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


GPU Available:  False


In [None]:
model = Sequential()

# Input layer
model.add(Input(shape=(X_train.shape[1],)))

# First hidden layer
model.add(Dense(512, kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))

# Second hidden layer
model.add(Dense(256, kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))


# Output layer
model.add(Dense(1, activation='sigmoid'))

# Compile with lower learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


#history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stopping])


In [None]:
# fit model to data (2 min) highest accuracy on train is 82%
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2, callbacks=[early_stopping])


Epoch 1/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 1s/step - accuracy: 0.7217 - loss: 1.9702 - val_accuracy: 0.7875 - val_loss: 1.2444
Epoch 2/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m495s[0m 1s/step - accuracy: 0.7725 - loss: 1.1374 - val_accuracy: 0.7937 - val_loss: 0.9890
Epoch 3/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m465s[0m 1s/step - accuracy: 0.7786 - loss: 0.8790 - val_accuracy: 0.7706 - val_loss: 0.9255
Epoch 4/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m506s[0m 1s/step - accuracy: 0.7854 - loss: 0.8158 - val_accuracy: 0.7812 - val_loss: 0.9465
Epoch 5/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m515s[0m 1s/step - accuracy: 0.7907 - loss: 0.7776 - val_accuracy: 0.7700 - val_loss: 0.9007
Epoch 6/50
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m460s[0m 1s/step - accuracy: 0.7902 - loss: 0.7673 - val_accuracy: 0.7906 - val_loss: 0.9111
Epoch 7/50
[1m400/400

<keras.src.callbacks.history.History at 0x7ea31d4df8b0>

In [None]:
file_path = '/content/drive/MyDrive/Team Interactions: SpoofProof - AudioClassification/test_final_preprocessed_vectors.pkl.gz'

with gzip.open(file_path, 'rb') as f:
  test_df = pickle.load(f)

In [None]:
X_test = test_df.drop('label', axis=1) # contains the feature vectors
y_test = test_df['label'] # contains just the labels

In [None]:
# evaluating model
score = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {score[0]}, Test Accuracy: {score[1]}')


Test Loss: 0.7182996869087219, Test Accuracy: 0.8144999742507935


In [None]:
# model makes preditions on test set
y_pred_prob = model.predict(X_test)

# Convert predicted probabilities to class labels (0 or 1)
y_pred = (y_pred_prob > 0.5).astype(int)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 98ms/step


In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


# Calculate confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Calculate key metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
fpr = fp / (fp + tn)  # False positive rate
specificity = tn / (tn + fp)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall (Sensitivity): {recall}")
print(f"F1-Score: {f1}")
print(f"False Positive Rate: {fpr}")
print(f"Specificity: {specificity}")


Accuracy: 0.8115
Precision: 0.7555555555555555
Recall (Sensitivity): 0.085
F1-Score: 0.15280898876404495
False Positive Rate: 0.006875
Specificity: 0.993125


In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize Logistic Regression model
logistic_model = LogisticRegression(max_iter=100)  # You can adjust max_iter for convergence

# Train the model
logistic_model.fit(X, y)

In [None]:
from sklearn.linear_model import SGDClassifier

sgd_model = SGDClassifier(loss="log_loss", max_iter=1000)
sgd_model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Predict on test set
y_pred = sgd_model.predict(X_test)

# Calculate accuracy and other metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')


Accuracy: 0.7565, Precision: 0.27461139896373055, Recall: 0.1325, F1-Score: 0.178752107925801
