In [1]:
import os
import sys
import json
import yaml
import torch
import resampy
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import soundfile as sf
from pathlib import Path
from pprint import pprint
from typing import Dict, List
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile
from collections import defaultdict
pipeline_root = Path().resolve().parents[0]
print(pipeline_root)
sys.path.append(str(pipeline_root)) # adding pipeline root to sys.path

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-dq445c3u because the default path (/home/iiakovlev/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


/media/ssdraid0cgpu01/home/iiakovlev/new-pipeline/audio-pipelines-pytorch


In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
from utils.metrics import EER
from torch.utils.data import DataLoader
import datautils.parsing.antispoofing as parsing
from models.utils import load_weights_from_pl_pipeline
from models.model_builders import AudioClassificationModel
from datautils.dataset import Dataset, simple_collate_func, DummyAugmentor

In [4]:
def merge_dicts(dicts : List[Dict[int,List]]):
    merged = {}
    for d in dicts:
        for k,v in d.items():
            if k in merged:
                merged[k].extend(v)
            else:
                merged[k] = v
    return merged

def flatten_dict(d : Dict[int,List]):
    flattened = []
    for k,v in d.items():
        flattened.extend(zip(v,[k]*len(v)))
    return flattened

In [5]:
data_setup = yaml.safe_load((pipeline_root / 'data.yml').read_text())
ASVSPOOF2017 = Path(data_setup['asv17_root'])
LRPD = Path(data_setup['lrpd_root'])

asv17_dev = flatten_dict(parsing.parse_asv17(**{
    "asv_spoof_root": str(ASVSPOOF2017),
    "part": "dev",
    "return_as": "dict"
}))

asv17_eval = flatten_dict(parsing.parse_asv17(**{
    "asv_spoof_root": str(ASVSPOOF2017),
    "part": "eval",
    "return_as": "dict"
}))

lrpd_eval = flatten_dict(merge_dicts([parsing.parse_dir(root,label) for root,label in [
    (LRPD/"source_val",0),
    (LRPD/"val_aparts",1),
]]))

datasets = {
    "asv17_dev" : asv17_dev,
    "asv17_eval" : asv17_eval,
    "lrpd_eval" : lrpd_eval
}

In [6]:
model_dirs = list((pipeline_root/"checkpoints/antispoofing").glob("*/"))
pprint(model_dirs)

[PosixPath('/media/ssdraid0cgpu01/home/iiakovlev/new-pipeline/audio-pipelines-pytorch/checkpoints/antispoofing/lrpd_office_lrpd_aparts'),
 PosixPath('/media/ssdraid0cgpu01/home/iiakovlev/new-pipeline/audio-pipelines-pytorch/checkpoints/antispoofing/asv17_train'),
 PosixPath('/media/ssdraid0cgpu01/home/iiakovlev/new-pipeline/audio-pipelines-pytorch/checkpoints/antispoofing/lrpd_office_lrpd_aparts_asv17_train')]


In [7]:
def load_model(model_dir):
    model_config = json.loads((model_dir/"model_config.json").read_text())
    model = AudioClassificationModel(**model_config)
    model = model.eval()
    weights_path = str(model_dir/"model.ckpt")
    load_weights_from_pl_pipeline(model,str(weights_path),remove_unessacary=False,strict=False)
    return model

In [8]:
from typing import List, Tuple
def simple_collate_func(batch):
    xs, ys = list(zip(*batch))
    xs = np.stack(xs)
    ys = np.stack(ys)
    xs = torch.from_numpy(xs)
    return xs,ys

def run_prediction(model, data : List[Tuple[Path,str]],utt_len_sec=3.0):
    val_dataset = Dataset(
        data=data,
        size=None,
        augmentor=DummyAugmentor(),
        utt_len_sec=utt_len_sec,
        samplerate=16000,
        convert_to_ohe=False
    )

    val_dl = DataLoader(dataset=val_dataset,
        batch_size=128, shuffle=False, sampler=None,
        batch_sampler=None, num_workers=10, collate_fn=simple_collate_func,
        pin_memory=True, drop_last=False, timeout=0,
        worker_init_fn=None, multiprocessing_context=None
    )

    predictions = []
    labels = []
    model = model.cuda()
    with torch.no_grad():
        for batch in tqdm(val_dl):
            x, y = batch
            x = x.cuda()
            pred = torch.nn.functional.softmax(model(x),dim=-1)
            labels.extend(y)
            predictions.append(pred.cpu().numpy())
    predictions = np.concatenate(predictions)
    return labels, predictions

In [9]:
all_predictions = dict()
for model_dir in model_dirs:
    model_name = model_dir.parts[-1]
    all_predictions[model_name] = {}
    for dataset_name, files in datasets.items():
        model = load_model(model_dir)
        labels, predictions = run_prediction(model,files)
        del model
        torch.cuda.empty_cache()
        eer = EER(labels,predictions[:,1])[0]
        all_predictions[model_name][dataset_name] = eer
        print(f"{model_name} | {dataset_name} | EER : {eer*100:.2f}%")

  0%|          | 0/14 [00:00<?, ?it/s]

unexpected_keys : []
missing_keys : []
created ds with : 1710


100%|██████████| 14/14 [00:03<00:00,  3.52it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

lrpd_office_lrpd_aparts | asv17_dev | EER : 27.84%
unexpected_keys : []
missing_keys : []
created ds with : 13306


100%|██████████| 104/104 [00:08<00:00, 12.85it/s]
  0%|          | 0/1580 [00:00<?, ?it/s]

lrpd_office_lrpd_aparts | asv17_eval | EER : 17.18%
unexpected_keys : []
missing_keys : []
created ds with : 202165


100%|██████████| 1580/1580 [03:31<00:00,  7.48it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

lrpd_office_lrpd_aparts | lrpd_eval | EER : 0.16%
unexpected_keys : []
missing_keys : []
created ds with : 1710


100%|██████████| 14/14 [00:02<00:00,  6.25it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

asv17_train | asv17_dev | EER : 17.54%
unexpected_keys : []
missing_keys : []
created ds with : 13306


100%|██████████| 104/104 [00:08<00:00, 12.99it/s]
  0%|          | 0/1580 [00:00<?, ?it/s]

asv17_train | asv17_eval | EER : 13.94%
unexpected_keys : []
missing_keys : []
created ds with : 202165


100%|██████████| 1580/1580 [03:31<00:00,  7.49it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

asv17_train | lrpd_eval | EER : 21.70%
unexpected_keys : []
missing_keys : []
created ds with : 1710


100%|██████████| 14/14 [00:02<00:00,  6.09it/s]
  0%|          | 0/104 [00:00<?, ?it/s]

lrpd_office_lrpd_aparts_asv17_train | asv17_dev | EER : 18.63%
unexpected_keys : []
missing_keys : []
created ds with : 13306


100%|██████████| 104/104 [00:08<00:00, 12.86it/s]
  0%|          | 0/1580 [00:00<?, ?it/s]

lrpd_office_lrpd_aparts_asv17_train | asv17_eval | EER : 11.91%
unexpected_keys : []
missing_keys : []
created ds with : 202165


100%|██████████| 1580/1580 [03:31<00:00,  7.47it/s]


lrpd_office_lrpd_aparts_asv17_train | lrpd_eval | EER : 0.28%


In [10]:
data = defaultdict(list)
model_names = list(all_predictions.keys())
ds_names = list(datasets.keys())
for model_name  in model_names:
    for ds_name in ds_names:
        data[ds_name].append(all_predictions[model_name][ds_name])
df = pd.DataFrame(data=data)
df.index = model_names

In [11]:
from matplotlib import cm
cm = cm.get_cmap('Wistia')
df.style.set_properties(
    **{'color': 'black !important',
       'border': '1px black solid !important'}
).set_table_styles([{
    'selector': 'th',
    'props': [('border', '1px black solid !important')]
}]
).format("{:.2%}").background_gradient(cmap=cm,vmin=0.0,vmax=df.values.max())

Unnamed: 0,asv17_dev,asv17_eval,lrpd_eval
lrpd_office_lrpd_aparts,27.84%,17.18%,0.16%
asv17_train,17.54%,13.94%,21.70%
lrpd_office_lrpd_aparts_asv17_train,18.63%,11.91%,0.28%
