# Imports

In [1]:
import torch
from torch import nn
from utils import load_checkpoint, pad

from dataset import *
from model.model import *
from metrics import produce_evaluation_file, evaluate_EER
import utils

  from .autonotebook import tqdm as notebook_tqdm


# Config

In [2]:
config = load_checkpoint('configs/config.json')

# Evaluate ResCapsGuard on LA2019

In [None]:
eval_ids, eval_labels = get_data_for_dataset("/asvspoof/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt")

eval_dataset = ASVspoof2019(eval_ids, config['eval_path_flac'], eval_labels, pad, False)
eval_dataset = {
    "eval": eval_dataset
}

dataloader = get_dataloaders(eval_dataset, config)

model = get_model(config).to(config["device"])

if config["model"] == "ResCapsGuard":
    loss_fn = CapsuleLoss(gpu_id=config['gpu_id'], weight=torch.FloatTensor([0.1, 0.9]))
elif config["model"] == "Res2TCNGuard":
    loss_fn = nn.CrossEntropyLoss(weight=torch.FloatTensor([0.1, 0.9]).to(config["device"]))

produce_evaluation_file(
    dataloader["eval"],
    model,
    config["device"],
    loss_fn,
    config["produced_file"],
    config["eval_label_path"])
print(evaluate_EER(
    pred_df=config["produced_file"],
    ref_df=config["eval_label_path"],
))

1.862744002575619


In [None]:
print(evaluate_EER(
    pred_df=config["produced_file"],
    ref_df=config["eval_label_path"],
))

# SSL model

In [40]:
sys.path.append("/app/SSL_Anti_spoofing")

In [None]:
from SSL_Anti_spoofing import RawBoost
from SSL_Anti_spoofing import data_utils_SSL
from SSL_Anti_spoofing.model import *
from SSL_Anti_spoofing.main_SSL_LA import produce_evaluation_file

ImportError: cannot import name 'Model' from 'model' (/app/SafeSpeak-2024/model/__init__.py)

In [72]:
model = Model(_, device='cuda')

In [73]:
model.load_state_dict(torch.load('/app/SafeSpeak-2024/weights/LA_model.pth',map_location='cuda'))

<All keys matched successfully>

In [74]:
nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
model =model.to('cuda')
print('nb_params:',nb_params)

nb_params: 317837834


In [48]:
eval_ids, eval_labels = get_data_for_dataset("/asvspoof/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt")

eval_dataset = ASVspoof2019(eval_ids, config['eval_path_flac'], eval_labels, pad, False)
eval_dataset = {
    "eval": eval_dataset
}

# SSL_Anti_spoofing.main_SSL_LA.produce_evaluation_file(eval_dataset['eval'], model, 'cuda', 'ssl_eval.txt')

In [51]:
dataloader = get_dataloaders(eval_dataset, config)

In [81]:
len(dataloader["eval"])

8905

In [53]:
next(iter(dataloader["eval"]))

[tensor([[ 0.0009,  0.0008,  0.0007,  ...,  0.1776,  0.1562,  0.1233],
         [-0.0013, -0.0007, -0.0009,  ...,  0.0267,  0.0245,  0.0246],
         [-0.0144, -0.0124, -0.0105,  ..., -0.0129, -0.0183, -0.0281],
         ...,
         [-0.0005, -0.0009, -0.0012,  ..., -0.0500, -0.0488, -0.0467],
         [-0.0004, -0.0003, -0.0004,  ...,  0.0003,  0.0043, -0.0033],
         [ 0.0005,  0.0007,  0.0006,  ..., -0.0013, -0.0012, -0.0014]]),
 ('LA_E_2834763',
  'LA_E_8877452',
  'LA_E_6828287',
  'LA_E_6977360',
  'LA_E_5932896',
  'LA_E_5849185',
  'LA_E_6163791',
  'LA_E_4581379'),
 tensor([0, 0, 0, 0, 0, 1, 0, 1])]

In [75]:
import tqdm
device = "cuda"
model.eval()

# read file ASVspoof2019.LA.cm.<dev/train/eval>.trl.txt
# with open(trial_path, "r") as file_trial:
#     trial_lines = file_trial.readlines()

# list of utterance id and list of score for appropiate uid
fname_list = []
score_list = []
current_loss = 0
weight = torch.FloatTensor([0.1, 0.9]).to(device)
criterion = nn.CrossEntropyLoss(weight=weight)
# inference
for batch_x, utt_id, batch_y in utils.progressbar(dataloader["eval"], prefix='computing cm score'):
    batch_x, batch_y = batch_x.to(device), batch_y.view(-1).type(torch.int64).to(device)
    with torch.no_grad():
        # first is hidden layer, second is result
        # print(batch_x, batch_y.shape)
        batch_out = model(batch_x)
        # print(batch_out.shape)
        # 1 - for bonafide speech class
        batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
        # print(batch_out, batch_y)
        loss = criterion(batch_out, batch_y)
        current_loss += loss.item() / len(dataloader["eval"])
        print(current_loss)

    # add outputs
    fname_list.extend(utt_id)
    score_list.extend(batch_score.tolist())
# assert len(trial_lines) == len(fname_list) == len(score_list)

1.0095857809996037e-10
4.6490965836585924e-10.........................................................] 1/8905 time 00:00.84 / 124:15.11
1.1770580205918563e-08.........................................................] 2/8905 time 00:00.92 / 68:01.03
4.554701171203932e-05..........................................................] 3/8905 time 00:01.00 / 49:29.43
4.556853541798809e-05..........................................................] 4/8905 time 00:01.08 / 39:59.97
4.563318436937986e-05..........................................................] 5/8905 time 00:01.16 / 34:18.29
4.563339465314685e-05..........................................................] 6/8905 time 00:01.23 / 30:29.94
4.5635804147790605e-05.........................................................] 7/8905 time 00:01.32 / 27:51.91
4.5654935330951044e-05.........................................................] 8/8905 time 00:01.40 / 25:54.24
0.00016271391955087857..................................................

In [76]:
with open(config['eval_label_path'], "r") as file_trial:
    trial_lines = file_trial.readlines()

with open('ssl_preds.txt', "w") as fh:

    # fn - uid, sco - score, trl - trial_lines
    for fn, sco, trl in zip(fname_list, score_list, trial_lines):
        _, utt_id, _, src, key = trl.strip().split(' ')
        assert fn == utt_id
        # format: utterance id - type of spoof attack - key - score
        fh.write("{} {} {} {}\n".format(utt_id, src, key, sco))

In [79]:
!pwd

/app/SafeSpeak-2024


In [80]:
print(evaluate_EER(
    pred_df='/app/SafeSpeak-2024/ssl_preds.txt',
    ref_df=config["eval_label_path"],
))

0.22069465645264424


# LSSL model

In [3]:
model = Model(config['device'])
model.load_state_dict(torch.load(config['checkpoint'], map_location='cuda'))
print(sum([param.view(-1).size()[0] for param in model.parameters()]))

317837834


In [None]:
class LSSLModel(nn.Module):
    def __init__(self,device):
        super(LSSLModel, self).__init__()
        
        # cp_path = '/app/SafeSpeak-2024/weights/xlsr2_300m.pt'   # Change the pre-trained XLSR model path. 
        cp_path = '/app/SafeSpeak-2024/weights/xlsr2_960m_1000k.pt'
        model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([cp_path])
        self.model = model[0]
        self.device=device
        self.out_dim = 1024
        return

    def extract_feat(self, input_data):
        
        # put the model to GPU if it not there
        if next(self.model.parameters()).device != input_data.device \
           or next(self.model.parameters()).dtype != input_data.dtype:
            self.model.to(input_data.device, dtype=input_data.dtype)
            self.model.train()

        
        if True:
            # input should be in shape (batch, length)
            if input_data.ndim == 3:
                input_tmp = input_data[:, :, 0]
            else:
                input_tmp = input_data
                
            # [batch, length, dim]
            emb = self.model(input_tmp, mask=False, features_only=True)['x']
        return emb[..., :self.out_dim]

In [14]:
lssl = LSSLModel(config['device'])
model.ssl_model = lssl
print(sum([param.view(-1).size()[0] for param in model.parameters()]))

965961994


In [15]:
model.to(config['device'])

Model(
  (ssl_model): LSSLModel(
    (model): Wav2Vec2Model(
      (feature_extractor): ConvFeatureExtractionModel(
        (conv_layers): ModuleList(
          (0): Sequential(
            (0): Conv1d(1, 512, kernel_size=(10,), stride=(5,))
            (1): Dropout(p=0.0, inplace=False)
            (2): Sequential(
              (0): TransposeLast()
              (1): Fp32LayerNorm((512,), eps=1e-05, elementwise_affine=True)
              (2): TransposeLast()
            )
            (3): GELU()
          )
          (1): Sequential(
            (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
            (1): Dropout(p=0.0, inplace=False)
            (2): Sequential(
              (0): TransposeLast()
              (1): Fp32LayerNorm((512,), eps=1e-05, elementwise_affine=True)
              (2): TransposeLast()
            )
            (3): GELU()
          )
          (2): Sequential(
            (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
            (1): Dropout(p=0

In [None]:
def eval(model, cfg=config):
    eval_ids, eval_labels = get_data_for_dataset(cfg['eval_label_path'])

    eval_dataset = ASVspoof2019(eval_ids, cfg['eval_path_flac'], eval_labels, pad, False)
    eval_dataset = {
        "eval": eval_dataset
    }
    dataloader = get_dataloaders(eval_dataset, cfg)

    loss_fn = nn.CrossEntropyLoss(weight=torch.FloatTensor([0.1, 0.9]).to(cfg["device"]))

    produce_evaluation_file(
        dataloader["eval"],
        model,
        cfg["device"],
        loss_fn,
        cfg["produced_file"],
        cfg["eval_label_path"]
    )
    
    print(evaluate_EER(
        pred_df=cfg["produced_file"],
        ref_df=cfg["eval_label_path"],
    ))

eval(model, config)

computing cm score[............................................................] 110/8905 time 00:21.03 / 28:01.81

In [9]:
old_weights = model.LL.weight.data
old_bias = model.LL.bias.data

new_LL = torch.nn.Linear(model.ssl_model.out_dim, model.LL.out_features)


# model with knn

In [4]:
torch.cuda.is_available()

True

In [3]:
train_ids, train_labels = get_data_for_dataset(config['train_label_path'])

train_dataset = ASVspoof2019(train_ids, config['train_path_flac'], train_labels)
train_dataset = {
    "train_knn": train_dataset
}

train_knn_dataloader = get_dataloaders(train_dataset, config)

model = get_model(config["checkpoint"], config["device"])

# model.train_head(train_knn_dataloader["train_knn"])

nb_params: 317837834


In [5]:
next(iter(train_knn_dataloader['train_knn']))[0].shape

torch.Size([64, 64600])

In [4]:
sampler = CustomBatchSampler(data_source=train_dataset["train_knn"], elems_per_class=32, n_batches=2)

In [None]:
next(iter(sampler.__iter__()))

64

In [4]:
batch = next(iter(train_knn_dataloader['train_knn']))

In [5]:
batch[0].shape

torch.Size([200, 64600])

In [6]:
model.load_head("/app/SafeSpeak-2024/weights/knn_model_light.bin")

In [4]:
model.train_head(train_knn_dataloader["train_knn"])

In [6]:
features, labels = [], []
audios, targets, _ = next(iter(train_knn_dataloader["train_knn"]))
with torch.no_grad():
    feats, out = model(audios)
    feats = torch.nn.functional.normalize(feats, p=2, dim=1)
    print(feats)
    features.append(feats.numpy())
    labels.append(targets.numpy())

# features = np.vstack(features)
# labels = np.concatenate(labels)
# self.knn.fit(features, labels)

labels

tensor([[ 0.0068,  0.0180,  0.1973,  ...,  0.0264,  0.0293, -0.0045],
        [ 0.0069,  0.0235,  0.1986,  ...,  0.0260,  0.0288, -0.0041],
        [ 0.1070,  0.0847,  0.0739,  ...,  0.0496,  0.0489, -0.0205],
        ...,
        [ 0.1870,  0.0667,  0.0375,  ..., -0.0125, -0.0034, -0.0050],
        [ 0.1846,  0.0666,  0.0447,  ..., -0.0128, -0.0033, -0.0040],
        [ 0.1797,  0.0568,  0.0576,  ..., -0.0137, -0.0024, -0.0038]])


[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]

In [7]:
eval_ids, eval_labels = get_data_for_dataset(config['eval_label_path'])
eval_dataset = ASVspoof2019(eval_ids, config['eval_path_flac'], eval_labels, pad, False)
eval_dataset = {
    "eval": eval_dataset
}
eval_dataloader = get_dataloaders(eval_dataset, config)


In [12]:
submit_ids = get_data_for_evaldataset("/asvspoof/wavs/")

submit_dataset = EvalDataset(submit_ids, "/asvspoof/wavs/", pad)
submit_dataset = {
    "eval": submit_dataset
}
dataloader = get_dataloaders(submit_dataset, config)


In [39]:
batch_x, utt_id, batch_y = next(iter(eval_dataloader['eval']))
# print(batch_x)
preds = model.predict(batch_x)
print(preds[:,1])
batch_score = (preds[:, 1]).ravel()
batch_score

[0. 0. 0. 0. 0. 1. 0. 1.]


array([0., 0., 0., 0., 0., 1., 0., 1.])

In [10]:
model.predict(batch[0]), preds, batch[2]

(array([[1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.]]),
 tensor([[ 6.7713, -6.0203],
         [ 7.0593, -6.1585],
         [ 6.9526, -6.1356],
         [ 7.0976, -6.1661],
         [ 7.1002, -6.1285],
         [-8.1498,  6.2407],
         [ 7.0574, -6.1579],
         [-8.1230,  6.3595]], grad_fn=<AddmmBackward>),
 tensor([0, 0, 0, 0, 0, 1, 0, 1]))

In [1]:
model.predict(batch[0])

NameError: name 'model' is not defined

In [18]:
preds[1], batch[2]

(tensor([[ 6.7713, -6.0203],
         [ 7.0593, -6.1585],
         [ 6.9526, -6.1356],
         [ 7.0976, -6.1661],
         [ 7.1002, -6.1285],
         [-8.1498,  6.2407],
         [ 7.0574, -6.1579],
         [-8.1230,  6.3595]], grad_fn=<AddmmBackward>),
 tensor([0, 0, 0, 0, 0, 1, 0, 1]))

In [4]:
batch = next(iter(train_knn_dataloader['train_knn']))


In [11]:
batch

[tensor([[-2.1362e-04, -3.6621e-04, -4.8828e-04,  ...,  8.0566e-03,
          -6.1035e-04, -8.7891e-03],
         [-3.9673e-04, -3.0518e-05,  6.1035e-05,  ...,  3.2043e-03,
           1.9836e-03,  7.0190e-04],
         [ 6.1462e-02, -1.0361e-01,  6.7749e-03,  ...,  1.2207e-04,
           1.2207e-04,  1.2207e-04],
         ...,
         [-1.6174e-03, -1.6174e-03, -1.6174e-03,  ..., -4.8828e-04,
          -5.4932e-04, -5.4932e-04],
         [-4.5471e-03, -4.4861e-03, -4.3335e-03,  ...,  3.3002e-01,
           1.6507e-01,  3.9062e-03],
         [-5.7068e-03, -5.5237e-03, -5.2490e-03,  ...,  1.0071e-03,
           9.4604e-04,  6.7139e-04]]),
 tensor([0, 0, 0,  ..., 1, 1, 1]),
 tensor([16000, 16000, 16000,  ..., 16000, 16000, 16000])]

In [7]:
sampler = IndexSampler(train_dataset['train_knn'], 1000)

In [8]:
i = 0

for item in sampler:
    i+= 1

print(i)

2000
