# 1. Setup

# 1.1 Imports

In [3]:
!pip install comet_ml

Collecting comet_ml
  Downloading comet_ml-3.55.0-py3-none-any.whl.metadata (4.0 kB)
Collecting dulwich!=0.20.33,>=0.20.6 (from comet_ml)
  Downloading dulwich-1.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.4 kB)
Collecting everett<3.2.0,>=1.0.1 (from everett[ini]<3.2.0,>=1.0.1->comet_ml)
  Downloading everett-3.1.0-py2.py3-none-any.whl.metadata (17 kB)
Collecting python-box<7.0.0 (from comet_ml)
  Downloading python_box-6.1.0-py3-none-any.whl.metadata (7.8 kB)
Collecting configobj (from everett[ini]<3.2.0,>=1.0.1->comet_ml)
  Downloading configobj-5.0.9-py2.py3-none-any.whl.metadata (3.2 kB)
Downloading comet_ml-3.55.0-py3-none-any.whl (780 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.9/780.9 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dulwich-1.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m48.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ev

In [4]:
import os
import random
import pandas as pd
import numpy as np
from comet_ml import start
from comet_ml.integration.pytorch import log_model
import torch
import torchaudio
import torch.nn as nn
from torch.nn.functional import pad
from torch.utils.data import Dataset, DataLoader
from torchaudio.functional import compute_deltas
from tqdm.auto import tqdm
from IPython.display import Audio
from typing import List
from pathlib import Path
import soundfile as sf

# 1.2 Fix randomness

In [21]:
SEED = 42

In [22]:
def set_random_seed(seed):
    """
    Set random seed for model training or inference.

    Args:
        seed (int): defines which seed to use.
    """
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

set_random_seed(seed=SEED)

In [23]:
def set_worker_seed(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# 2. Data


## 2.1 Download and read protocol files

In [24]:
def read_info(path_to_info: str):
    file_names = []
    labels = []

    with open(path_to_info, 'r') as f:
        for line in f:
            line = line.split()
            file_names.append(line[1])
            labels.append(1 if line[-1] == "bonafide" else 0)

    dataset = pd.DataFrame({"file_name": file_names, "label": labels})

    return dataset


In [25]:
import kagglehub

path = kagglehub.dataset_download("awsaf49/asvpoof-2019-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'asvpoof-2019-dataset' dataset.
Path to dataset files: /kaggle/input/asvpoof-2019-dataset


In [26]:
BASE_DIR = os.path.join(path, "LA", "LA")

train_info_path = os.path.join(BASE_DIR,"ASVspoof2019_LA_cm_protocols","ASVspoof2019.LA.cm.train.trn.txt")
dev_info_path = os.path.join(BASE_DIR,"ASVspoof2019_LA_cm_protocols","ASVspoof2019.LA.cm.dev.trl.txt")
eval_info_path = os.path.join(BASE_DIR,"ASVspoof2019_LA_cm_protocols","ASVspoof2019.LA.cm.eval.trl.txt")

train_dataset_path = os.path.join(BASE_DIR,"ASVspoof2019_LA_train","flac")
dev_dataset_path = os.path.join(BASE_DIR,"ASVspoof2019_LA_dev","flac")
eval_dataset_path = os.path.join(BASE_DIR,"ASVspoof2019_LA_eval","flac")
train_dataset_info = read_info(train_info_path)
dev_dataset_info = read_info(dev_info_path)
eval_dataset_info = read_info(eval_info_path)

In [27]:
dev_dataset_info

Unnamed: 0,file_name,label
0,LA_D_1047731,1
1,LA_D_1105538,1
2,LA_D_1125976,1
3,LA_D_1293230,1
4,LA_D_1340209,1
...,...,...
24839,LA_D_9924204,0
24840,LA_D_9931163,0
24841,LA_D_9935163,0
24842,LA_D_9944718,0


### 2.2 Get sample rate for future transforms

In [29]:
sample_flac = os.path.join(
    BASE_DIR,
    "ASVspoof2019_LA_train", "flac", "LA_T_1000137.flac"
)

_, sr = sf.read(sample_flac)
SAMPLE_RATE = sr

# 3. Features


In [30]:
def getSTFT(n_fft, win_length, hop_length, window_fn=torch.hann_window, power=None, normalized=False):
  STFT_CONFIG = {"n_fft": n_fft, "win_length": win_length, "hop_length": hop_length,
        "window_fn": window_fn,  "power": power, "normalized": normalized}

  STFT = torchaudio.transforms.Spectrogram(**STFT_CONFIG)

  return STFT

In [31]:
def getMagnitudeSpectrum(input_audio):
    magnitude_spectrogram = input_audio.abs()
    return magnitude_spectrogram

In [32]:
def createFilterBank(n_fft=512, n_filter_banks=60):

    freqs = np.linspace(0, SAMPLE_RATE / 2, int(n_fft // 2 + 1))
    f_min, f_max = 0.0, SAMPLE_RATE / 2
    filter_edges = np.linspace(f_min, f_max, n_filter_banks + 2)
    filter_banks = np.zeros((n_filter_banks, len(freqs)))
    for i in range(1, n_filter_banks + 1):
        left = filter_edges[i - 1]
        center = filter_edges[i]
        right = filter_edges[i + 1]

        left_slope = (freqs - left) / (center - left)
        right_slope = (right - freqs) / (right - center)
        filter_banks[i - 1] = np.maximum(0, np.minimum(left_slope, right_slope))

    return torch.tensor(filter_banks, dtype = torch.float)

 In https://arxiv.org/pdf/1904.05576 log power magnitude spectrum computed from the signal as features was used as input for frontend (FFT).

Configuration of STFT is taken from https://arxiv.org/pdf/2103.11326 as well as addition of 60 linear filter-bank channels.

In [33]:
class SpectrogramFrontEnd(nn.Module):
    def __init__(self, n_filter_banks=60):
        super().__init__()
        self.n_fft = 512
        self.win_length = int(SAMPLE_RATE * 0.020)
        self.hop_length = int(SAMPLE_RATE * 0.010)
        self.window_fn = torch.hann_window

        self.n_filter_banks = 60

        self.STFT = getSTFT(self.n_fft, self.win_length, self.hop_length, window_fn=self.window_fn)

        self.fc_layer = nn.Linear(self.n_fft // 2 + 1, self.n_filter_banks)
        self.filter_banks = createFilterBank(n_fft=self.n_fft, n_filter_banks=self.n_filter_banks)
        self.fc_layer.weight.data = self.filter_banks

    def forward(self, input_audio):
        spectrogram = self.STFT(input_audio)
        log_power_spectrogram = getMagnitudeSpectrum(spectrogram)

        log_power_spectrogram = log_power_spectrogram.squeeze(0).transpose(0,1)

        compressed_spec = self.fc_layer(log_power_spectrogram)
        compressed_spec = compressed_spec.transpose(0,1).unsqueeze(0)
        return compressed_spec

To concatenate several tensors into a single batch, PyTorch requires all the tensors to have the equal shape.

We want all utterances to have the same fixed length, so it is done inside __getitem__, not collate_fn.

In https://arxiv.org/pdf/2103.11326 LCCC-trim-pad used length of 750 claiming it to cover input features of 98% input trials

In [34]:
import soundfile as sf
import torch

def load_audio(path):
    audio, sr = sf.read(path)
    audio = torch.tensor(audio).float()
    if audio.ndim == 2:
        audio = audio.mean(dim=1)
    return audio, sr

In [35]:
class AntiSpoofDataset(Dataset):
    def __init__(
        self,
        audio_dir_path: str,
        description: pd.DataFrame,
        transforms: list|None = None,
        k: int = 750
    ):
        self.audio_dir_path = audio_dir_path
        self.description = description
        self.transforms = transforms
        self.k = k

    def __len__(self):
        return len(self.description)

    def __getitem__(self, idx: int):
        row = self.description.iloc[idx]

        label = row["label"]

        audio_path = Path(self.audio_dir_path, f'{row["file_name"]}.flac')
        # audio, sr = torchaudio.load(audio_path)
        audio, sr = load_audio(audio_path)

        if self.transforms is not None:
            for transform in self.transforms:
                audio = transform(audio)

        _, freq_bins, length = audio.shape
        if length < self.k:
            repeat_factor = (self.k + length - 1) // length
            audio = audio.repeat(1, 1, repeat_factor)
            audio = audio[:, :, :self.k]
        else:
            max_start = length - self.k
            start = random.randint(0, max_start)
            audio = audio[:, :, start:start + self.k]

        object = {"label": label, "audio": audio,"file_name": row["file_name"]}

        return object

In [36]:
transforms = [SpectrogramFrontEnd()]

In [37]:
train_dataset = AntiSpoofDataset(train_dataset_path, train_dataset_info, transforms=transforms)
dev_dataset = AntiSpoofDataset(dev_dataset_path, dev_dataset_info, transforms=transforms)
eval_dataset = AntiSpoofDataset(eval_dataset_path,eval_dataset_info, transforms=transforms)

In [38]:
print(eval_dataset[1000]["audio"].shape)

torch.Size([1, 60, 750])


By https://arxiv.org/pdf/2103.11326 mini-batch size should be either 64 or 8,  each mini-batch contained randomly selected trials with similar duration.


No voice activity detection or feature normalization was used, no data augmentation


In [39]:
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True,
                             num_workers=0, worker_init_fn=set_worker_seed)
dev_dataloader = DataLoader(dev_dataset, batch_size=64, shuffle=True,
                           num_workers=0, worker_init_fn=set_worker_seed)
eval_dataloader = DataLoader(eval_dataset, batch_size=64, shuffle=True,
                            num_workers=0, worker_init_fn=set_worker_seed)

# 4. Model

## 4.1 Layers

In [40]:
class MFM(nn.Module):
    def __init__(self, input_channels, output_size, kernel_size=3, stride=1, padding=1, in_fc_layer=False):
        super().__init__()

        if (output_size != input_channels // 2):
            raise ValueError(f"expected output of MFM {input_channels // 2}, got {output_size}")
        if not in_fc_layer:
            self.net = nn.Conv2d(input_channels, output_size * 2, kernel_size, stride, padding)
        else:
            self.net = nn.Linear(input_channels, output_size * 2)

    def forward(self, x):
        x = self.net(x)
        out = torch.split(x, x.size(1) // 2, dim = 1)
        return torch.max(out[0], out[1])

In [41]:
class Layer1(nn.Module):
    def __init__(self, input_channels, out_hidden):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(input_channels, out_hidden * 2, kernel_size=(5, 5), stride=(1, 1), padding=2),
            MFM(out_hidden * 2, out_hidden)
        )

    def forward(self, input_data):
        return self.layer(input_data)

In [42]:
class Layer2(nn.Module):
    def __init__(self, in_hidden, out_hidden):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(in_hidden, in_hidden * 2, kernel_size=(1, 1), stride=(1, 1)),
            MFM(in_hidden * 2, in_hidden),
            nn.BatchNorm2d(in_hidden),
            nn.Conv2d(in_hidden, out_hidden * 2, kernel_size=(3, 3), stride=(1, 1), padding=1),
            MFM(out_hidden * 2, out_hidden)
        )

    def forward(self, input_data):
        return self.layer(input_data)

In [43]:
class Layer3(nn.Module):
    def __init__(self, in_hidden, out_hidden):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(in_hidden, in_hidden * 2, kernel_size=(1, 1), stride=(1, 1)),
            MFM(in_hidden * 2, in_hidden),
            nn.BatchNorm2d(in_hidden),
            nn.Conv2d(in_hidden, out_hidden * 2, kernel_size=(3, 3), stride=(1, 1), padding=1),
            MFM(out_hidden * 2, out_hidden)
        )

    def forward(self, input_data):
        return self.layer(input_data)

In [44]:
class Layer4(nn.Module):
    def __init__(self, in_hidden, out_hidden):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(in_hidden, in_hidden * 2, kernel_size=(1, 1), stride=(1, 1)),
            MFM(in_hidden * 2, in_hidden),
            nn.BatchNorm2d(in_hidden),
            nn.Conv2d(in_hidden, in_hidden, kernel_size=(3, 3), stride=(1, 1), padding=1),
            MFM(in_hidden, out_hidden),
            nn.BatchNorm2d(out_hidden),
            nn.Conv2d(out_hidden, in_hidden, kernel_size=(1, 1), stride=(1, 1)),
            MFM(in_hidden, out_hidden),
            nn.BatchNorm2d(out_hidden),
            nn.Conv2d(out_hidden, in_hidden, kernel_size=(3, 3), stride=(1, 1), padding=1),
            MFM(in_hidden, out_hidden)
        )

    def forward(self, input_data):
        return self.layer(input_data)

## 4.2 Compute linear input size

In [2]:
def convOutSizePar(input_size, kernel_size, stride, padding=0):
    return [int(input_size[0] - kernel_size + 2 * padding / stride) + 1,
            int(input_size[1] - kernel_size + 2 * padding / stride) + 1]

def maxPoolOutpSizePar(input_size, filter_size, stride):
    return [int((input_size[0] - filter_size) / stride) + 1,
            int((input_size[1] - filter_size) / stride) + 1]

def computeLinearInputSize(input_audio):
    size = list(input_audio.size())[1:]
    s1 = convOutSizePar(input_size=size, kernel_size=5, stride=1, padding=2)
    s2 = maxPoolOutpSizePar(input_size=s1, filter_size=2, stride=2)
    s3 = convOutSizePar(input_size=s2, kernel_size=1, stride=1)
    s7 = convOutSizePar(input_size=s3, kernel_size=3, stride=1, padding=1)
    s9 = maxPoolOutpSizePar(input_size=s7, filter_size=2, stride=2)
    s11 = convOutSizePar(input_size=s9, kernel_size=1, stride=1)
    s14 = convOutSizePar(input_size=s11, kernel_size=3, stride=1, padding=1)
    s16 = maxPoolOutpSizePar(input_size=s14, filter_size=2, stride=2)
    s17 = convOutSizePar(input_size=s16, kernel_size=1, stride=1)
    s20 = convOutSizePar(input_size=s17, kernel_size=3, stride=1, padding=1)
    s23 = convOutSizePar(input_size=s20, kernel_size=1, stride=1)
    s26 = convOutSizePar(input_size=s23, kernel_size=3, stride=1, padding=1)
    s28 = maxPoolOutpSizePar(input_size=s26, filter_size=2, stride=2)
    return s28

## Initialize model

In [46]:
class LCNN(nn.Module):
    def __init__(self, input_channels, hidden_channels, linear_input_size, output_size):
        super().__init__()
        hidden_channels_2 = hidden_channels * 3 // 2

        self.net = nn.Sequential(
            Layer1(input_channels, hidden_channels),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            Layer2(hidden_channels, hidden_channels_2),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            nn.BatchNorm2d(hidden_channels_2),

            Layer3(hidden_channels_2, hidden_channels * 2),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

            Layer4(hidden_channels * 2, hidden_channels),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            nn.Flatten(),
            nn.Linear(linear_input_size[0] * linear_input_size[1] * hidden_channels, hidden_channels * 5),
            MFM(hidden_channels * 5, hidden_channels * 5 // 2, in_fc_layer=True),
            nn.Dropout(p=0.75),
            nn.BatchNorm1d(hidden_channels * 5 // 2),
            nn.Linear(hidden_channels * 5 // 2, output_size)
        )

    def forward(self, x):
        out = self.net(x)
        return out

# 5. Training and evaluation

### 5.1 EER

In [47]:
def compute_det_curve(target_scores, nontarget_scores):

    n_scores = target_scores.size + nontarget_scores.size
    all_scores = np.concatenate((target_scores, nontarget_scores))
    labels = np.concatenate((np.ones(target_scores.size), np.zeros(nontarget_scores.size)))

    indices = np.argsort(all_scores, kind='mergesort')
    labels = labels[indices]

    tar_trial_sums = np.cumsum(labels)
    nontarget_trial_sums = nontarget_scores.size - \
(np.arange(1, n_scores + 1) - tar_trial_sums)

    frr = np.concatenate((np.atleast_1d(0), tar_trial_sums / target_scores.size))
    far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums /nontarget_scores.size))
    thresholds = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))

    return frr, far, thresholds

def compute_eer(bonafide_scores, other_scores):
    """
    Returns equal error rate (EER) and the corresponding threshold.
    """
    frr, far, thresholds = compute_det_curve(bonafide_scores, other_scores)
    abs_diffs = np.abs(frr - far)
    min_index = np.argmin(abs_diffs)
    eer = np.mean((frr[min_index], far[min_index]))
    return eer, thresholds[min_index]

In [48]:
def count_acc(preds, labels):
    preds = torch.argmax(preds, dim=-1)
    acc = torch.sum(preds == labels)

    return acc.item() / torch.numel(preds)

## 5.2 Training loop

In [49]:
def train_one_epoch(model, dataloader, criterion, optimizer, scheduler, experiment, device, epoch):
    model.train()
    avg_loss = 0
    all_labels, all_scores = [], []
    step = epoch * len(dataloader)

    for batch_idx, batch in tqdm(enumerate(dataloader), total=len(dataloader)):
        label = batch["label"]
        audio = batch["audio"]
        label, audio = label.to(device), audio.to(device)
        output = model(audio)
        loss = criterion(output, label)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()
        avg_loss += loss.item()

        scores = torch.softmax(output, dim=1)[:, 1]
        all_labels.extend(label.detach().cpu().numpy())
        all_scores.extend(scores.detach().cpu().numpy())

        experiment.log_metrics({"train_step_loss": loss.item(), "acc_step": count_acc(output, label),}, step=step + batch_idx)
        torch.cuda.empty_cache()

    avg_loss = avg_loss / (batch_idx + 1)
    all_scores = np.array(all_scores)
    all_labels = np.array(all_labels)
    bonafide_scores = all_scores[all_labels == 1]
    spoof_scores = all_scores[all_labels == 0]
    eer, _ = compute_eer(bonafide_scores, spoof_scores)

    return avg_loss, eer


def inference(model, dataloader, criterion, device, epoch, is_test = False):
    model.eval()

    avg_loss, accuracy = 0, 0
    all_labels, all_scores = [], []
    if is_test:
        keys = []

    for  batch_idx, batch in enumerate(dataloader):
        label = batch["label"]
        audio = batch["audio"]
        label, audio = label.to(device), audio.to(device)

        with torch.no_grad():
            output = model(audio)
            loss = criterion(output, label)

        scores = torch.softmax(output, dim=1)[:, 1]

        all_labels.extend(label.detach().cpu().numpy())
        all_scores.extend(scores.detach().cpu().numpy())
        if is_test:
            keys.extend(batch["file_name"])

        avg_loss += loss.item()

        torch.cuda.empty_cache()

    avg_loss = avg_loss / (batch_idx + 1)

    all_scores = np.array(all_scores)
    all_labels = np.array(all_labels)

    bonafide_scores = all_scores[all_labels == 1]
    spoof_scores = all_scores[all_labels == 0]

    eer, _ = compute_eer(bonafide_scores, spoof_scores)

    if is_test:
      df = pd.DataFrame({"key": keys, "score": all_scores})
      df.to_csv(f"mvdegtiarenko_{epoch + 1}.csv", index=False, header=False)

    return avg_loss, eer


def train(model, train_dataloader, dev_dataloader, eval_dataloader, criterion,  optimizer,  scheduler, device,
    n_epochs, experiment):

    experiment.log_parameters({
        "batch_size": 128,
        "SEED": SEED,
        "frontend": "new config STFT",
        "optimizer": optimizer.__class__.__name__
    })

    for epoch in range(n_epochs):
        current_lr = optimizer.param_groups[0]['lr']
        experiment.log_metric("learning_rate", current_lr, step=(epoch + 1))

        print("Training model")
        train_avg_loss, train_eer = train_one_epoch(model, train_dataloader, criterion, optimizer, scheduler, experiment, device, epoch)
        experiment.log_metrics({"train_avg_loss": train_avg_loss, "train_eer": train_eer,}, step=(epoch + 1))

        print("Training complete\nValidating model")

        val_avg_loss, val_eer = inference(model, dev_dataloader, criterion, device, epoch)
        experiment.log_metrics({"dev_avg_loss": val_avg_loss,"dev_eer": val_eer,}, step=(epoch + 1))

        print("Validation complete")

        if epoch % 3 == 0:
            print("\nTesting model")
            test_avg_loss, test_eer = inference(model, eval_dataloader, criterion, device, epoch, is_test=True)
            experiment.log_metrics({"eval_avg_loss": test_avg_loss,"eval_eer": test_eer}, step=(epoch + 1))

            print("Test complete")

    experiment.end()

## 5.3 Run training

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [50]:
audio = train_dataset[0]["audio"]
linear_input_size = computeLinearInputSize(audio)

In [None]:
model = LCNN(input_channels=1, hidden_channels=32, linear_input_size=linear_input_size, output_size=2)
model.to(device)

In [None]:
from torchsummary import summary

In [None]:
summary(model, input_size=(1, 750, 60))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 64, 750, 60]           1,664
            Conv2d-2          [-1, 64, 750, 60]          36,928
            MFM2_1-3          [-1, 32, 750, 60]               0
            Layer1-4          [-1, 32, 750, 60]               0
         MaxPool2d-5          [-1, 32, 375, 30]               0
            Conv2d-6          [-1, 64, 375, 30]           2,112
            Conv2d-7          [-1, 64, 375, 30]          36,928
            MFM2_1-8          [-1, 32, 375, 30]               0
       BatchNorm2d-9          [-1, 32, 375, 30]              64
           Conv2d-10          [-1, 96, 375, 30]          27,744
           Conv2d-11          [-1, 96, 375, 30]          83,040
           MFM2_1-12          [-1, 48, 375, 30]               0
           Layer2-13          [-1, 48, 375, 30]               0
        MaxPool2d-14          [-1, 48, 

In https://arxiv.org/pdf/2103.11326 it is stated that the simple sigmoid function is comparable to margin-based softmax for LCNN-based CMs, so Cross Entropy function is used

In [None]:
NUM_EPOCHS=5

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), betas=(0.9, 0.999), eps=1e-06, lr=4*10e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS * len(train_dataloader))

In [None]:
experiment = start(
  api_key="Nvhi54f3GDZBkhMjOi8TsLHxG",
  project_name="lcnn-antispoofing-project",
  workspace="mshqd"
)

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/mshqd/lcnn-antispoofing-project/3b08a7bb072e49bb9ed5f17a4c0a4eda



In [None]:
train(model, train_dataloader, dev_dataloader, eval_dataloader, criterion, optimizer, scheduler, device, NUM_EPOCHS, experiment)

Training model


  0%|          | 0/397 [00:00<?, ?it/s]

Training complete
Validating model
Validation complete

Testing model
Test complete
Training model


  0%|          | 0/397 [00:00<?, ?it/s]

Training complete
Validating model
Validation complete
Training model


  0%|          | 0/397 [00:00<?, ?it/s]

Training complete
Validating model
Validation complete
Training model


  0%|          | 0/397 [00:00<?, ?it/s]

Training complete
Validating model
Validation complete

Testing model
Test complete
Training model


  0%|          | 0/397 [00:00<?, ?it/s]

Training complete
Validating model
Validation complete


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : whispering_xerinae_8780
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/mshqd/lcnn-antispoofing-project/3b08a7bb072e49bb9ed5f17a4c0a4eda
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     acc_step [1985]        : (0.5625, 1.0)
[1;38;5;39mCOMET INFO:[0m     dev_avg_loss [5]       : (0.025526431653257173, 0.26422187546148396)
[1;38;5;39mCOMET INFO:[0m     dev_eer [5]            : (0.020009643337338248, 0.17509138498489568)
[1;38;5;39mCOMET INFO:[0m     eval_avg_loss [2]      : (0.1850

In [None]:
experiment.end()

In [None]:
!ls -lah students_solutions

total 1.8M
drwxr-xr-x 3 root root 4.0K Jan 28 09:31 .
drwxr-xr-x 1 root root 4.0K Jan 28 09:34 ..
drwxr-xr-x 2 root root 4.0K Jan 28 09:27 .ipynb_checkpoints
-rw-r--r-- 1 root root 1.8M Jan 28 09:05 mvdegtiarenko.csv


In [None]:
import shutil

path = kagglehub.dataset_download("awsaf49/asvpoof-2019-dataset")
BASE_DIR = os.path.join(path, "LA", "LA")

src = os.path.join(
    BASE_DIR,
    "ASVspoof2019_LA_cm_protocols",
    "ASVspoof2019.LA.cm.eval.trl.txt"
)

dst = "ASVspoof2019.LA.cm.eval.trl.txt"

shutil.copy(src, dst)
print("Copied to:", dst)

In [None]:
!ls -lah | egrep "grading.py|ASVspoof2019.*eval.*trl"

-rw-r--r-- 1 root root 2.3M Jan 28 09:34 ASVspoof2019.LA.cm.eval.trl.txt
-rw-r--r-- 1 root root 2.5K Jan 28 09:29 grading.py


In [None]:
!python3 grading.py

Grading complete. Results saved to /content/grades.csv
