In [41]:
import os

from transformers import Wav2Vec2Model
import torch.nn as nn
from pydub import AudioSegment
import torch
from torch.utils.data import DataLoader


In [3]:
# Local imports
import data_utils
import importlib
importlib.reload(data_utils)
from data_utils import get_data
from utils import load_protocol, plot_results
import train_utils
import audio_utils

In [38]:
import importlib
importlib.reload(audio_utils)
dataset_path = get_data()

train_dir = f"{dataset_path}/LA/LA/ASVspoof2019_LA_train/flac"
val_dir = f"{dataset_path}/LA/LA/ASVspoof2019_LA_dev/flac"
test_dir = f"{dataset_path}/LA/LA/ASVspoof2019_LA_eval/flac"

In [39]:
# Add augmentations to train data
audio_utils.add_augmentations(train_dir)

In [53]:
# get a file that was augmented
train_augmented_files = [f for f in os.listdir(train_dir) if f.startswith("aug_")]

example_aug_file = train_augmented_files[2]
example_orig_file = example_aug_file[4:]



In [51]:
# display original file
AudioSegment.from_file(f"{train_dir}/{example_orig_file}", format="flac")

Python(54161) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(54162) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(54163) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [54]:
AudioSegment.from_file(f"{train_dir}/{example_aug_file}", format="flac")

Python(54174) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(54175) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(54176) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


In [6]:
# Load the .flac file
example_file = f"{dataset_path}/LA/LA/ASVspoof2019_LA_train/flac/LA_T_1004407.flac"
audio = AudioSegment.from_file(example_file, format="flac")

audio

In [83]:
importlib.reload(data_utils)
col_names = ['speaker', 'file_name', 'attack', 'label']
protocols_folder = "LA/LA/ASVspoof2019_LA_cm_protocols"
protocol_file_prefix = "ASVspoof2019.LA.cm"
data_info_train_file_path = f"{dataset_path}/{protocols_folder}/{protocol_file_prefix}.train.trn.txt"
data_info_val_file_path = f"{dataset_path}/{protocols_folder}/{protocol_file_prefix}.dev.trl.txt"
data_info_test_file_path = f"{dataset_path}/{protocols_folder}/{protocol_file_prefix}.eval.trl.txt"

train_df_info = load_protocol(data_info_train_file_path, names=col_names)
val_df_info = load_protocol(data_info_train_file_path, names=col_names)
test_df_info = load_protocol(data_info_train_file_path, names=col_names)

In [84]:
train_df = data_utils.process_data_frame(train_dir, train_df_info)
val_df = data_utils.process_data_frame(val_dir, val_df_info)
test_df = data_utils.process_data_frame(test_dir, test_df_info)

6345
      file_name label
0  LA_T_1138215     0
1  LA_T_1271820     0
2  LA_T_1272637     0
3  LA_T_1276960     0
4  LA_T_1341447     0
      file_name label
0  LA_T_1138215     0
1  LA_T_1271820     0
2  LA_T_1272637     0
3  LA_T_1276960     0
4  LA_T_1341447     0
      file_name label
0  LA_T_1138215     0
1  LA_T_1271820     0
2  LA_T_1272637     0
3  LA_T_1276960     0
4  LA_T_1341447     0


In [86]:
train_df[train_df['file_name'] == example_aug_file[:-5]]

Unnamed: 0,file_name,label
25382,aug_LA_T_9350805,1


In [87]:
train_dataset = audio_utils.AudioDataset(data_dir=train_dir, df=train_df)
val_dataset = audio_utils.AudioDataset(data_dir=val_dir, df=val_df)
test_dataset = audio_utils.AudioDataset(data_dir=test_dir, df=test_df)

In [88]:
class Wav2Vec2BinaryClassifier(nn.Module):
    def __init__(self, model_name="facebook/wav2vec2-base", input_dim=256, dropout=0.3):
        super(Wav2Vec2BinaryClassifier, self).__init__()
        self.wav2vec2 = Wav2Vec2Model.from_pretrained(model_name)
        self.classifier = nn.Sequential(
            nn.Linear(self.wav2vec2.config.hidden_size, input_dim),  # Reduce to 256 features
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(input_dim, 1),  # Single neuron for binary classification
            nn.Sigmoid()
        )

    def forward(self, input_values, attention_mask=None):
        outputs = self.wav2vec2(input_values, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state  # Shape: (batch_size, seq_len, hidden_dim)
        
        # Get the mean embedding over time (global average pooling)
        pooled_output = hidden_states.mean(dim=1)  # Shape: (batch_size, hidden_dim)
        
        return self.classifier(pooled_output)  # Shape: (batch_size, 1)

In [89]:
device = torch.device('mps' if torch.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu')

In [90]:
def freeze_except_classifier(model):
    """
    Freeze all layers of the Wav2Vec 2.0 model except the classifier layers.
    """
    
    # Freeze all parameters in the Wav2Vec 2.0 model
    for param in model.wav2vec2.parameters():
        param.requires_grad = False

    # Unfreeze the classifier layers
    for param in model.classifier.parameters():
        param.requires_grad = True

    print("Wav2Vec 2.0 layers frozen. Only classifier layers are trainable.")

In [91]:
# Hyperparameters
LEARNING_RATE = 0.001
criterion = nn.BCEWithLogitsLoss()
EPOCHS = 3

In [92]:
wav2vec = Wav2Vec2BinaryClassifier()
freeze_except_classifier(wav2vec)

optimizer = torch.optim.Adam(wav2vec.parameters(), lr=LEARNING_RATE)
wav2vec.to(device)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

results = train_utils.train_model(wav2vec, train_loader, val_loader, criterion, optimizer, EPOCHS, device)



Wav2Vec 2.0 layers frozen. Only classifier layers are trainable.


Python(56159) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(56161) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Progress of epoch 1/3: loop 0/1983 finished


Traceback (most recent call last):
  File "<string>", line 1, in <module>
Process Process-31:
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/spawn.py", line 129, in _main
    return self._bootstrap(parent_sentinel)
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 333, in _bootstrap
    threading._shutdown()
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/threading.py", line 1435, in _shutdown
    atexit_call()
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/f

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/roeeseren/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/5y/g1j3_4fs0rz08qx2c5tfp28h0000gn/T/ipykernel_2429/3394482668.py", line 10, in <module>
    results = train_utils.train_model(wav2vec, train_loader, val_loader, criterion, optimizer, EPOCHS, device)
  File "/Users/roeeseren/Documents/semester-seven/deep-learning/project/train_utils.py", line 28, in train_model
    outputs = model(inputs).squeeze(1)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/roeeseren/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 2105, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
  File "/Users/roeeseren/Library/Python/3.9/lib/python/site-packages/IPython/core/ultratb.py", line 1396, 