# Wav2Vec2.0 Pretraining

Trying to reproduce https://github.com/pytorch/fairseq/blob/master/examples/wav2vec/README.md


# Install all prerequirements

In [1]:
import torch
 
torch.cuda.get_device_name(device=None)

'Tesla V100-SXM2-16GB'

In [2]:
!pip install git+https://github.com/huggingface/transformers.git git+https://github.com/pytorch/fairseq pyarrow soundfile torchaudio librosa

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-wpgouz6r
  Running command git clone -q https://github.com/huggingface/transformers.git /tmp/pip-req-build-wpgouz6r
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting git+https://github.com/pytorch/fairseq
  Cloning https://github.com/pytorch/fairseq to /tmp/pip-req-build-sgv8albk
  Running command git clone -q https://github.com/pytorch/fairseq /tmp/pip-req-build-sgv8albk
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: transformers, fairseq
  Building wheel fo

# Install APEX for mixed precision Fairseq trainings

In [3]:
%%shell
rm -f /usr/local/cuda
ln -sv /usr/local/cuda-10.1 /usr/local/cuda
git clone https://github.com/NVIDIA/apex
cd apex
git reset --hard b5eb38dbf7accc24bd872b3ab67ffc77ee858e62
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" \
  --global-option="--deprecated_fused_adam" --global-option="--xentropy" \
  --global-option="--fast_multihead_attn" ./
cd ..

[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
                     from apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask.cpp:1:
    /usr/local/lib/python3.7/dist-packages/torch/include/ATen/core/TensorBody.h:303:30: note: declared here
       DeprecatedTypeProperties & type() const {
                                  ^~~~
    In file included from /usr/local/lib/python3.7/dist-packages/torch/include/c10/core/Device.h:5:0,
                     from /usr/local/lib/python3.7/dist-packages/torch/include/c10/core/Allocator.h:6,
                     from /usr/local/lib/python3.7/dist-packages/torch/include/ATen/ATen.h:7,
                     from /usr/local/lib/python3.7/dist-packages/torch/include/torch/csrc/api/include/torch/types.h:3,
                     from /usr/local/lib/python3.7/dist-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h:4,
                     from /usr/local/lib/python3.7/dist-packages/



# Gather the data

In [None]:
%%shell
wget https://os.unil.cloud.switch.ch/fma/fma_large.zip

--2021-04-30 19:21:28--  https://os.unil.cloud.switch.ch/fma/fma_large.zip
Resolving os.unil.cloud.switch.ch (os.unil.cloud.switch.ch)... 86.119.28.16, 2001:620:5ca1:201::214
Connecting to os.unil.cloud.switch.ch (os.unil.cloud.switch.ch)|86.119.28.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 100306112191 (93G) [application/zip]
Saving to: ‘fma_large.zip’


2021-04-30 20:23:58 (25.5 MB/s) - ‘fma_large.zip’ saved [100306112191/100306112191]





In [None]:
from zipfile import ZipFile
from tqdm import tqdm

with ZipFile('fma_large.zip', 'r') as zip_file:
    for file in tqdm(iterable=zip_file.namelist(), total=len(zip_file.namelist())):
        zip_file.extract(member=file, path='/content/drive/MyDrive/musicology-dataset/')

100%|██████████| 106576/106576 [2:48:56<00:00, 10.51it/s]


# Preprocess the data - wav2vec2 is strict about keeping 16k framerate

In [None]:
!sudo apt-get install sox libsox-fmt-mp3
! pip install sox pqdm

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'sudo apt autoremove' to remove it.
The following additional packages will be installed:
  libid3tag0 libmad0 libmagic-mgc libmagic1 libopencore-amrnb0
  libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox3
Suggested packages:
  file libsox-fmt-all
The following NEW packages will be installed:
  libid3tag0 libmad0 libmagic-mgc libmagic1 libopencore-amrnb0
  libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base libsox-fmt-mp3 libsox3
  sox
0 upgraded, 11 newly installed, 0 to remove and 34 not upgraded.
Need to get 872 kB of archives.
After this operation, 7,087 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopencore-amrnb0 amd64 0.1.3-2.1 [92.0 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopencore-amrwb0 a

In [None]:
from os import listdir
from os.path import isfile, join
from tqdm.notebook import tqdm
import sox
from pqdm.processes import pqdm

def convert(file):
  resampler = sox.Transformer()
  resampler.rate(16000, quality='h')
  return resampler.build_file(f'{in_path}/{file}', f'{out_path}/{file}')

for num in range(1, 156):
  num = str(num)
  if len(num)==1:
    num = "00"+num
  if len(num)==2:
    num = "0"+num
  print(num)
  in_path = f"/content/drive/MyDrive/musicology-dataset/fma_large/{num}"
  out_path = "/content/drive/MyDrive/musicology-dataset-downsampled"
  files_zero = [f for f in listdir(in_path) if isfile(join(in_path, f))]
  _ = pqdm(files_zero, convert, n_jobs=4)
  

SUBMITTING | :   0%|          | 0/870 [00:00<?, ?it/s]

001


SUBMITTING | : 100%|██████████| 870/870 [00:00<00:00, 10515.92it/s]
PROCESSING | : 100%|██████████| 870/870 [03:47<00:00,  3.82it/s]
COLLECTING | : 100%|██████████| 870/870 [00:00<00:00, 384271.74it/s]
SUBMITTING | :   0%|          | 0/64 [00:00<?, ?it/s]

002


SUBMITTING | : 100%|██████████| 64/64 [00:00<00:00, 2071.50it/s]
PROCESSING | : 100%|██████████| 64/64 [00:16<00:00,  3.89it/s]
COLLECTING | : 100%|██████████| 64/64 [00:00<00:00, 179796.02it/s]
SUBMITTING | :   0%|          | 0/599 [00:00<?, ?it/s]

003


SUBMITTING | : 100%|██████████| 599/599 [00:00<00:00, 9404.02it/s]
PROCESSING | : 100%|██████████| 599/599 [02:38<00:00,  3.78it/s]
COLLECTING | : 100%|██████████| 599/599 [00:00<00:00, 326157.09it/s]
SUBMITTING | :   0%|          | 0/731 [00:00<?, ?it/s]

004


SUBMITTING | : 100%|██████████| 731/731 [00:00<00:00, 10006.06it/s]
PROCESSING | : 100%|██████████| 731/731 [02:54<00:00,  4.18it/s]
COLLECTING | : 100%|██████████| 731/731 [00:00<00:00, 381870.25it/s]
SUBMITTING | :   0%|          | 0/321 [00:00<?, ?it/s]

005


SUBMITTING | : 100%|██████████| 321/321 [00:00<00:00, 7009.61it/s]
PROCESSING | : 100%|██████████| 321/321 [01:24<00:00,  3.78it/s]
COLLECTING | : 100%|██████████| 321/321 [00:00<00:00, 321790.53it/s]
SUBMITTING | :   0%|          | 0/314 [00:00<?, ?it/s]

006


SUBMITTING | : 100%|██████████| 314/314 [00:00<00:00, 7233.70it/s]
PROCESSING | : 100%|██████████| 314/314 [01:24<00:00,  3.72it/s]
COLLECTING | : 100%|██████████| 314/314 [00:00<00:00, 254642.59it/s]
SUBMITTING | :   0%|          | 0/380 [00:00<?, ?it/s]

007


SUBMITTING | : 100%|██████████| 380/380 [00:00<00:00, 2588.95it/s]
PROCESSING | : 100%|██████████| 380/380 [01:37<00:00,  3.90it/s]
COLLECTING | : 100%|██████████| 380/380 [00:00<00:00, 198238.25it/s]
SUBMITTING | :   0%|          | 0/474 [00:00<?, ?it/s]

008


SUBMITTING | : 100%|██████████| 474/474 [00:00<00:00, 11487.19it/s]
PROCESSING | : 100%|██████████| 474/474 [02:02<00:00,  3.87it/s]
COLLECTING | : 100%|██████████| 474/474 [00:00<00:00, 183083.17it/s]
SUBMITTING | :   0%|          | 0/743 [00:00<?, ?it/s]

009


SUBMITTING | : 100%|██████████| 743/743 [00:00<00:00, 7892.92it/s]
PROCESSING | : 100%|██████████| 743/743 [02:44<00:00,  4.51it/s]
COLLECTING | : 100%|██████████| 743/743 [00:00<00:00, 274400.62it/s]
SUBMITTING | :   0%|          | 0/685 [00:00<?, ?it/s]

010


SUBMITTING | : 100%|██████████| 685/685 [00:00<00:00, 8935.65it/s]
PROCESSING | : 100%|██████████| 685/685 [02:46<00:00,  4.12it/s]
COLLECTING | : 100%|██████████| 685/685 [00:00<00:00, 346281.58it/s]
SUBMITTING | :   0%|          | 0/738 [00:00<?, ?it/s]

011


SUBMITTING | : 100%|██████████| 738/738 [00:00<00:00, 7602.91it/s]
PROCESSING | : 100%|██████████| 738/738 [03:11<00:00,  3.85it/s]
COLLECTING | : 100%|██████████| 738/738 [00:00<00:00, 258574.58it/s]
SUBMITTING | :   0%|          | 0/753 [00:00<?, ?it/s]

012


SUBMITTING | : 100%|██████████| 753/753 [00:00<00:00, 11201.67it/s]
PROCESSING | : 100%|██████████| 753/753 [03:15<00:00,  3.84it/s]
COLLECTING | : 100%|██████████| 753/753 [00:00<00:00, 274182.73it/s]
SUBMITTING | :   0%|          | 0/769 [00:00<?, ?it/s]

013


SUBMITTING | : 100%|██████████| 769/769 [00:00<00:00, 7971.68it/s]
PROCESSING | : 100%|██████████| 769/769 [03:14<00:00,  3.96it/s]
COLLECTING | : 100%|██████████| 769/769 [00:00<00:00, 302005.60it/s]
SUBMITTING | :   0%|          | 0/743 [00:00<?, ?it/s]

014


SUBMITTING | : 100%|██████████| 743/743 [00:00<00:00, 12168.08it/s]
PROCESSING | : 100%|██████████| 743/743 [03:20<00:00,  3.71it/s]
COLLECTING | : 100%|██████████| 743/743 [00:00<00:00, 319463.65it/s]


015


SUBMITTING | : 100%|██████████| 679/679 [00:00<00:00, 7550.51it/s]
PROCESSING | : 100%|██████████| 679/679 [02:57<00:00,  3.82it/s]
COLLECTING | : 100%|██████████| 679/679 [00:00<00:00, 338234.25it/s]


016


SUBMITTING | : 100%|██████████| 722/722 [00:00<00:00, 4063.20it/s]
PROCESSING | : 100%|██████████| 722/722 [03:01<00:00,  3.97it/s]
COLLECTING | : 100%|██████████| 722/722 [00:00<00:00, 287532.04it/s]


017


SUBMITTING | : 100%|██████████| 628/628 [00:00<00:00, 9294.00it/s]
PROCESSING | : 100%|██████████| 628/628 [02:39<00:00,  3.94it/s]
COLLECTING | : 100%|██████████| 628/628 [00:00<00:00, 336831.57it/s]


018


SUBMITTING | : 100%|██████████| 625/625 [00:00<00:00, 8611.15it/s]
PROCESSING | : 100%|██████████| 625/625 [02:36<00:00,  3.98it/s]
COLLECTING | : 100%|██████████| 625/625 [00:00<00:00, 306242.99it/s]


019


SUBMITTING | : 100%|██████████| 636/636 [00:00<00:00, 9150.64it/s]
PROCESSING | : 100%|██████████| 636/636 [02:45<00:00,  3.84it/s]
COLLECTING | : 100%|██████████| 636/636 [00:00<00:00, 223134.87it/s]


020


SUBMITTING | : 100%|██████████| 692/692 [00:00<00:00, 7036.33it/s]
PROCESSING | : 100%|██████████| 692/692 [02:51<00:00,  4.03it/s]
COLLECTING | : 100%|██████████| 692/692 [00:00<00:00, 334731.68it/s]


021


SUBMITTING | : 100%|██████████| 674/674 [00:00<00:00, 9281.02it/s]
PROCESSING | : 100%|██████████| 674/674 [02:58<00:00,  3.77it/s]
COLLECTING | : 100%|██████████| 674/674 [00:00<00:00, 197385.90it/s]
SUBMITTING | :   0%|          | 0/678 [00:00<?, ?it/s]

022


SUBMITTING | : 100%|██████████| 678/678 [00:00<00:00, 9381.68it/s]
PROCESSING | : 100%|██████████| 678/678 [02:53<00:00,  3.91it/s]
COLLECTING | : 100%|██████████| 678/678 [00:00<00:00, 340894.04it/s]


023


SUBMITTING | : 100%|██████████| 666/666 [00:00<00:00, 7559.66it/s]
PROCESSING | : 100%|██████████| 666/666 [02:49<00:00,  3.94it/s]
COLLECTING | : 100%|██████████| 666/666 [00:00<00:00, 371611.87it/s]


024


SUBMITTING | : 100%|██████████| 689/689 [00:00<00:00, 9964.06it/s]
PROCESSING | : 100%|██████████| 689/689 [03:00<00:00,  3.82it/s]
COLLECTING | : 100%|██████████| 689/689 [00:00<00:00, 360288.67it/s]
SUBMITTING | :   0%|          | 0/506 [00:00<?, ?it/s]

025


SUBMITTING | : 100%|██████████| 506/506 [00:00<00:00, 3482.95it/s]
PROCESSING | : 100%|██████████| 506/506 [02:11<00:00,  3.84it/s]
COLLECTING | : 100%|██████████| 506/506 [00:00<00:00, 294930.21it/s]
SUBMITTING | :   0%|          | 0/598 [00:00<?, ?it/s]

026


SUBMITTING | : 100%|██████████| 598/598 [00:00<00:00, 7477.26it/s]
PROCESSING | : 100%|██████████| 598/598 [02:33<00:00,  3.88it/s]
COLLECTING | : 100%|██████████| 598/598 [00:00<00:00, 262473.19it/s]


027


SUBMITTING | : 100%|██████████| 643/643 [00:00<00:00, 6423.13it/s]
PROCESSING | : 100%|██████████| 643/643 [02:45<00:00,  3.90it/s]
COLLECTING | : 100%|██████████| 643/643 [00:00<00:00, 322831.87it/s]
SUBMITTING | :   0%|          | 0/692 [00:00<?, ?it/s]

028


SUBMITTING | : 100%|██████████| 692/692 [00:00<00:00, 7291.00it/s]
PROCESSING | : 100%|██████████| 692/692 [02:57<00:00,  3.89it/s]
COLLECTING | : 100%|██████████| 692/692 [00:00<00:00, 320713.63it/s]


029


SUBMITTING | : 100%|██████████| 795/795 [00:00<00:00, 9649.50it/s]
PROCESSING | : 100%|██████████| 795/795 [03:16<00:00,  4.04it/s]
COLLECTING | : 100%|██████████| 795/795 [00:00<00:00, 224015.56it/s]


030


SUBMITTING | : 100%|██████████| 695/695 [00:00<00:00, 8777.73it/s]
PROCESSING | : 100%|██████████| 695/695 [03:00<00:00,  3.86it/s]
COLLECTING | : 100%|██████████| 695/695 [00:00<00:00, 305335.84it/s]


031


SUBMITTING | : 100%|██████████| 658/658 [00:00<00:00, 9245.89it/s]
PROCESSING | : 100%|██████████| 658/658 [02:43<00:00,  4.02it/s]
COLLECTING | : 100%|██████████| 658/658 [00:00<00:00, 234382.34it/s]


032


SUBMITTING | : 100%|██████████| 801/801 [00:00<00:00, 9384.72it/s]
PROCESSING | : 100%|██████████| 801/801 [03:26<00:00,  3.88it/s]
COLLECTING | : 100%|██████████| 801/801 [00:00<00:00, 211258.10it/s]


033


SUBMITTING | : 100%|██████████| 759/759 [00:00<00:00, 4725.18it/s]
PROCESSING | : 100%|██████████| 759/759 [03:14<00:00,  3.91it/s]
COLLECTING | : 100%|██████████| 759/759 [00:00<00:00, 342015.12it/s]
SUBMITTING | :   0%|          | 0/599 [00:00<?, ?it/s]

034


SUBMITTING | : 100%|██████████| 599/599 [00:00<00:00, 7322.85it/s]
PROCESSING | : 100%|██████████| 599/599 [02:35<00:00,  3.84it/s]
COLLECTING | : 100%|██████████| 599/599 [00:00<00:00, 316780.75it/s]


035


SUBMITTING | : 100%|██████████| 832/832 [00:00<00:00, 11392.58it/s]
PROCESSING | : 100%|██████████| 832/832 [03:34<00:00,  3.87it/s]
COLLECTING | : 100%|██████████| 832/832 [00:00<00:00, 321538.83it/s]


036


SUBMITTING | : 100%|██████████| 706/706 [00:00<00:00, 7877.34it/s]
PROCESSING | : 100%|██████████| 706/706 [03:01<00:00,  3.88it/s]
COLLECTING | : 100%|██████████| 706/706 [00:00<00:00, 253092.19it/s]


037


SUBMITTING | : 100%|██████████| 636/636 [00:00<00:00, 6907.19it/s]
PROCESSING | : 100%|██████████| 636/636 [02:47<00:00,  3.80it/s]
COLLECTING | : 100%|██████████| 636/636 [00:00<00:00, 322990.36it/s]


038


SUBMITTING | : 100%|██████████| 708/708 [00:00<00:00, 11515.08it/s]
PROCESSING | : 100%|██████████| 708/708 [03:06<00:00,  3.79it/s]
COLLECTING | : 100%|██████████| 708/708 [00:00<00:00, 280624.38it/s]


039


SUBMITTING | : 100%|██████████| 732/732 [00:00<00:00, 7338.36it/s]
PROCESSING | : 100%|██████████| 732/732 [03:18<00:00,  3.68it/s]
COLLECTING | : 100%|██████████| 732/732 [00:00<00:00, 353713.19it/s]


040


SUBMITTING | : 100%|██████████| 823/823 [00:00<00:00, 9412.50it/s]
PROCESSING | : 100%|██████████| 823/823 [03:39<00:00,  3.75it/s]
COLLECTING | : 100%|██████████| 823/823 [00:00<00:00, 346264.64it/s]


041


SUBMITTING | : 100%|██████████| 816/816 [00:00<00:00, 8576.36it/s]
PROCESSING | : 100%|██████████| 816/816 [03:25<00:00,  3.96it/s]
COLLECTING | : 100%|██████████| 816/816 [00:00<00:00, 343250.63it/s]


042


SUBMITTING | : 100%|██████████| 777/777 [00:00<00:00, 4381.45it/s]
PROCESSING | : 100%|██████████| 777/777 [03:25<00:00,  3.78it/s]
COLLECTING | : 100%|██████████| 777/777 [00:00<00:00, 349562.82it/s]


043


SUBMITTING | : 100%|██████████| 751/751 [00:00<00:00, 7283.16it/s]
PROCESSING | : 100%|██████████| 751/751 [03:20<00:00,  3.75it/s]
COLLECTING | : 100%|██████████| 751/751 [00:00<00:00, 328014.40it/s]
SUBMITTING | :   0%|          | 0/604 [00:00<?, ?it/s]

044


SUBMITTING | : 100%|██████████| 604/604 [00:00<00:00, 7369.45it/s]
PROCESSING | : 100%|██████████| 604/604 [02:36<00:00,  3.86it/s]
COLLECTING | : 100%|██████████| 604/604 [00:00<00:00, 336390.87it/s]
SUBMITTING | :   0%|          | 0/541 [00:00<?, ?it/s]

045


SUBMITTING | : 100%|██████████| 541/541 [00:00<00:00, 5687.30it/s]
PROCESSING | : 100%|██████████| 541/541 [02:19<00:00,  3.89it/s]
COLLECTING | : 100%|██████████| 541/541 [00:00<00:00, 320995.68it/s]
SUBMITTING | :   0%|          | 0/568 [00:00<?, ?it/s]

046


SUBMITTING | : 100%|██████████| 568/568 [00:00<00:00, 9060.76it/s]
PROCESSING | : 100%|██████████| 568/568 [02:30<00:00,  3.78it/s]
COLLECTING | : 100%|██████████| 568/568 [00:00<00:00, 319351.83it/s]


047


SUBMITTING | : 100%|██████████| 781/781 [00:00<00:00, 9050.39it/s]
PROCESSING | : 100%|██████████| 781/781 [03:24<00:00,  3.82it/s]
COLLECTING | : 100%|██████████| 781/781 [00:00<00:00, 308625.53it/s]


048


SUBMITTING | : 100%|██████████| 729/729 [00:00<00:00, 9319.82it/s]
PROCESSING | : 100%|██████████| 729/729 [03:08<00:00,  3.87it/s]
COLLECTING | : 100%|██████████| 729/729 [00:00<00:00, 336819.52it/s]
SUBMITTING | :   0%|          | 0/530 [00:00<?, ?it/s]

049


SUBMITTING | : 100%|██████████| 530/530 [00:00<00:00, 7234.85it/s]
PROCESSING | : 100%|██████████| 530/530 [02:20<00:00,  3.78it/s]
COLLECTING | : 100%|██████████| 530/530 [00:00<00:00, 254287.48it/s]
SUBMITTING | :   0%|          | 0/617 [00:00<?, ?it/s]

050


SUBMITTING | : 100%|██████████| 617/617 [00:00<00:00, 9764.61it/s]
PROCESSING | : 100%|██████████| 617/617 [02:42<00:00,  3.79it/s]
COLLECTING | : 100%|██████████| 617/617 [00:00<00:00, 312056.62it/s]


051


SUBMITTING | : 100%|██████████| 798/798 [00:00<00:00, 4487.85it/s]
PROCESSING | : 100%|██████████| 798/798 [03:33<00:00,  3.74it/s]
COLLECTING | : 100%|██████████| 798/798 [00:00<00:00, 322172.93it/s]


052


SUBMITTING | : 100%|██████████| 842/842 [00:00<00:00, 8263.73it/s]
PROCESSING | : 100%|██████████| 842/842 [03:40<00:00,  3.83it/s]
COLLECTING | : 100%|██████████| 842/842 [00:00<00:00, 212350.67it/s]


053


SUBMITTING | : 100%|██████████| 653/653 [00:00<00:00, 8449.37it/s]
PROCESSING | : 100%|██████████| 653/653 [02:51<00:00,  3.80it/s]
COLLECTING | : 100%|██████████| 653/653 [00:00<00:00, 336430.48it/s]
SUBMITTING | :   0%|          | 0/754 [00:00<?, ?it/s]

054


SUBMITTING | : 100%|██████████| 754/754 [00:00<00:00, 8326.98it/s]
PROCESSING | : 100%|██████████| 754/754 [03:16<00:00,  3.85it/s]
COLLECTING | : 100%|██████████| 754/754 [00:00<00:00, 309776.20it/s]
SUBMITTING | :   0%|          | 0/551 [00:00<?, ?it/s]

055


SUBMITTING | : 100%|██████████| 551/551 [00:00<00:00, 7943.46it/s]
PROCESSING | : 100%|██████████| 551/551 [02:25<00:00,  3.79it/s]
COLLECTING | : 100%|██████████| 551/551 [00:00<00:00, 293542.68it/s]


056


SUBMITTING | : 100%|██████████| 695/695 [00:00<00:00, 11498.04it/s]
PROCESSING | : 100%|██████████| 695/695 [03:04<00:00,  3.76it/s]
COLLECTING | : 100%|██████████| 695/695 [00:00<00:00, 325266.82it/s]


057


SUBMITTING | : 100%|██████████| 737/737 [00:00<00:00, 8260.81it/s]
PROCESSING | : 100%|██████████| 737/737 [03:14<00:00,  3.80it/s]
COLLECTING | : 100%|██████████| 737/737 [00:00<00:00, 360363.96it/s]


058


SUBMITTING | : 100%|██████████| 850/850 [00:00<00:00, 9657.62it/s]
PROCESSING | : 100%|██████████| 850/850 [03:44<00:00,  3.78it/s]
COLLECTING | : 100%|██████████| 850/850 [00:00<00:00, 361467.95it/s]


059


SUBMITTING | : 100%|██████████| 852/852 [00:00<00:00, 5144.47it/s]
PROCESSING | : 100%|██████████| 852/852 [03:44<00:00,  3.79it/s]
COLLECTING | : 100%|██████████| 852/852 [00:00<00:00, 361256.27it/s]


060


SUBMITTING | : 100%|██████████| 786/786 [00:00<00:00, 7643.03it/s]
PROCESSING | : 100%|██████████| 786/786 [03:27<00:00,  3.79it/s]
COLLECTING | : 100%|██████████| 786/786 [00:00<00:00, 157511.85it/s]
SUBMITTING | :   0%|          | 0/591 [00:00<?, ?it/s]

061


SUBMITTING | : 100%|██████████| 591/591 [00:00<00:00, 8622.27it/s]
PROCESSING | : 100%|██████████| 591/591 [02:38<00:00,  3.73it/s]
COLLECTING | : 100%|██████████| 591/591 [00:00<00:00, 284433.01it/s]


062


SUBMITTING | : 100%|██████████| 751/751 [00:00<00:00, 10158.55it/s]
PROCESSING | : 100%|██████████| 751/751 [03:19<00:00,  3.76it/s]
COLLECTING | : 100%|██████████| 751/751 [00:00<00:00, 356689.20it/s]


063


SUBMITTING | : 100%|██████████| 726/726 [00:00<00:00, 13137.74it/s]
PROCESSING | : 100%|██████████| 726/726 [03:15<00:00,  3.72it/s]
COLLECTING | : 100%|██████████| 726/726 [00:00<00:00, 209542.02it/s]


064


SUBMITTING | : 100%|██████████| 834/834 [00:00<00:00, 9395.13it/s]
PROCESSING | : 100%|██████████| 834/834 [03:53<00:00,  3.57it/s]
COLLECTING | : 100%|██████████| 834/834 [00:00<00:00, 221816.71it/s]


065


SUBMITTING | : 100%|██████████| 832/832 [00:00<00:00, 9478.58it/s]
PROCESSING | : 100%|██████████| 832/832 [03:45<00:00,  3.69it/s]
COLLECTING | : 100%|██████████| 832/832 [00:00<00:00, 363279.30it/s]


066


SUBMITTING | : 100%|██████████| 888/888 [00:00<00:00, 4877.34it/s]
PROCESSING | : 100%|██████████| 888/888 [04:02<00:00,  3.66it/s]
COLLECTING | : 100%|██████████| 888/888 [00:00<00:00, 340700.87it/s]
SUBMITTING | :   0%|          | 0/676 [00:00<?, ?it/s]

067


SUBMITTING | : 100%|██████████| 676/676 [00:00<00:00, 8469.19it/s]
PROCESSING | : 100%|██████████| 676/676 [03:02<00:00,  3.69it/s]
COLLECTING | : 100%|██████████| 676/676 [00:00<00:00, 323337.84it/s]


068


SUBMITTING | : 100%|██████████| 595/595 [00:00<00:00, 6869.64it/s]
PROCESSING | : 100%|██████████| 595/595 [02:40<00:00,  3.70it/s]
COLLECTING | : 100%|██████████| 595/595 [00:00<00:00, 317265.56it/s]


069


SUBMITTING | : 100%|██████████| 666/666 [00:00<00:00, 10248.36it/s]
PROCESSING | : 100%|██████████| 666/666 [03:04<00:00,  3.61it/s]
COLLECTING | : 100%|██████████| 666/666 [00:00<00:00, 270259.91it/s]


070


SUBMITTING | : 100%|██████████| 704/704 [00:00<00:00, 11109.32it/s]
PROCESSING | : 100%|██████████| 704/704 [03:12<00:00,  3.67it/s]
COLLECTING | : 100%|██████████| 704/704 [00:00<00:00, 315840.20it/s]


071


SUBMITTING | : 100%|██████████| 843/843 [00:00<00:00, 9804.80it/s]
PROCESSING | : 100%|██████████| 843/843 [03:55<00:00,  3.58it/s]
COLLECTING | : 100%|██████████| 843/843 [00:00<00:00, 338289.16it/s]


072


SUBMITTING | : 100%|██████████| 691/691 [00:00<00:00, 11113.02it/s]
PROCESSING | : 100%|██████████| 691/691 [03:11<00:00,  3.61it/s]
COLLECTING | : 100%|██████████| 691/691 [00:00<00:00, 282592.05it/s]


073


SUBMITTING | : 100%|██████████| 733/733 [00:00<00:00, 7276.61it/s]
PROCESSING | : 100%|██████████| 733/733 [03:23<00:00,  3.60it/s]
COLLECTING | : 100%|██████████| 733/733 [00:00<00:00, 253749.16it/s]


074


SUBMITTING | : 100%|██████████| 702/702 [00:00<00:00, 6233.86it/s]
PROCESSING | : 100%|██████████| 702/702 [03:15<00:00,  3.58it/s]
COLLECTING | : 100%|██████████| 702/702 [00:00<00:00, 346767.33it/s]


075


SUBMITTING | : 100%|██████████| 858/858 [00:00<00:00, 3419.84it/s]
PROCESSING | : 100%|██████████| 858/858 [03:50<00:00,  3.73it/s]
COLLECTING | : 100%|██████████| 858/858 [00:00<00:00, 321802.10it/s]


076


SUBMITTING | : 100%|██████████| 592/592 [00:00<00:00, 8084.14it/s]
PROCESSING | : 100%|██████████| 592/592 [02:42<00:00,  3.64it/s]
COLLECTING | : 100%|██████████| 592/592 [00:00<00:00, 335136.72it/s]
SUBMITTING | :   0%|          | 0/366 [00:00<?, ?it/s]

077


SUBMITTING | : 100%|██████████| 366/366 [00:00<00:00, 4839.58it/s]
PROCESSING | : 100%|██████████| 366/366 [01:42<00:00,  3.57it/s]
COLLECTING | : 100%|██████████| 366/366 [00:00<00:00, 291348.50it/s]
SUBMITTING | :   0%|          | 0/219 [00:00<?, ?it/s]

078


SUBMITTING | : 100%|██████████| 219/219 [00:00<00:00, 5194.96it/s]
PROCESSING | : 100%|██████████| 219/219 [01:02<00:00,  3.53it/s]
COLLECTING | : 100%|██████████| 219/219 [00:00<00:00, 216486.58it/s]
SUBMITTING | :   0%|          | 0/570 [00:00<?, ?it/s]

079


SUBMITTING | : 100%|██████████| 570/570 [00:00<00:00, 12139.19it/s]
PROCESSING | : 100%|██████████| 570/570 [02:38<00:00,  3.60it/s]
COLLECTING | : 100%|██████████| 570/570 [00:00<00:00, 335732.80it/s]


080


SUBMITTING | : 100%|██████████| 684/684 [00:00<00:00, 10217.40it/s]
PROCESSING | : 100%|██████████| 684/684 [03:07<00:00,  3.65it/s]
COLLECTING | : 100%|██████████| 684/684 [00:00<00:00, 335505.08it/s]


081


SUBMITTING | : 100%|██████████| 785/785 [00:00<00:00, 8156.20it/s]
PROCESSING | : 100%|██████████| 785/785 [03:44<00:00,  3.50it/s]
COLLECTING | : 100%|██████████| 785/785 [00:00<00:00, 372922.03it/s]


082


SUBMITTING | : 100%|██████████| 800/800 [00:00<00:00, 8900.10it/s]
PROCESSING | : 100%|██████████| 800/800 [03:43<00:00,  3.58it/s]
COLLECTING | : 100%|██████████| 800/800 [00:00<00:00, 358449.23it/s]


083


SUBMITTING | : 100%|██████████| 814/814 [00:00<00:00, 7083.87it/s]
PROCESSING | : 100%|██████████| 814/814 [03:40<00:00,  3.70it/s]
COLLECTING | : 100%|██████████| 814/814 [00:00<00:00, 267567.67it/s]


084


SUBMITTING | : 100%|██████████| 812/812 [00:00<00:00, 4406.36it/s]
PROCESSING | : 100%|██████████| 812/812 [03:40<00:00,  3.68it/s]
COLLECTING | : 100%|██████████| 812/812 [00:00<00:00, 303085.77it/s]


085


SUBMITTING | : 100%|██████████| 808/808 [00:00<00:00, 8570.41it/s]
PROCESSING | : 100%|██████████| 808/808 [03:39<00:00,  3.68it/s]
COLLECTING | : 100%|██████████| 808/808 [00:00<00:00, 270578.65it/s]


086


SUBMITTING | : 100%|██████████| 751/751 [00:00<00:00, 8907.32it/s]
PROCESSING | : 100%|██████████| 751/751 [03:23<00:00,  3.69it/s]
COLLECTING | : 100%|██████████| 751/751 [00:00<00:00, 313862.33it/s]


087


SUBMITTING | : 100%|██████████| 596/596 [00:00<00:00, 6824.25it/s]
PROCESSING | : 100%|██████████| 596/596 [02:44<00:00,  3.63it/s]
COLLECTING | : 100%|██████████| 596/596 [00:00<00:00, 303264.00it/s]
SUBMITTING | :   0%|          | 0/220 [00:00<?, ?it/s]

088


SUBMITTING | : 100%|██████████| 220/220 [00:00<00:00, 5209.34it/s]
PROCESSING | : 100%|██████████| 220/220 [01:04<00:00,  3.39it/s]
COLLECTING | : 100%|██████████| 220/220 [00:00<00:00, 236298.82it/s]
SUBMITTING | :   0%|          | 0/448 [00:00<?, ?it/s]

089


SUBMITTING | : 100%|██████████| 448/448 [00:00<00:00, 5406.87it/s]
PROCESSING | : 100%|██████████| 448/448 [02:03<00:00,  3.63it/s]
COLLECTING | : 100%|██████████| 448/448 [00:00<00:00, 322251.45it/s]
SUBMITTING | :   0%|          | 0/471 [00:00<?, ?it/s]

090


SUBMITTING | : 100%|██████████| 471/471 [00:00<00:00, 5825.30it/s]
PROCESSING | : 100%|██████████| 471/471 [02:08<00:00,  3.68it/s]
COLLECTING | : 100%|██████████| 471/471 [00:00<00:00, 289877.80it/s]


091


SUBMITTING | : 100%|██████████| 772/772 [00:00<00:00, 11473.94it/s]
PROCESSING | : 100%|██████████| 772/772 [03:29<00:00,  3.69it/s]
COLLECTING | : 100%|██████████| 772/772 [00:00<00:00, 338278.59it/s]
SUBMITTING | :   0%|          | 0/596 [00:00<?, ?it/s]

092


SUBMITTING | : 100%|██████████| 596/596 [00:00<00:00, 7196.38it/s]
PROCESSING | : 100%|██████████| 596/596 [02:44<00:00,  3.61it/s]
COLLECTING | : 100%|██████████| 596/596 [00:00<00:00, 211007.44it/s]
SUBMITTING | :   0%|          | 0/434 [00:00<?, ?it/s]

093


SUBMITTING | : 100%|██████████| 434/434 [00:00<00:00, 5707.95it/s]
PROCESSING | : 100%|██████████| 434/434 [01:59<00:00,  3.64it/s]
COLLECTING | : 100%|██████████| 434/434 [00:00<00:00, 264967.68it/s]
SUBMITTING | :   0%|          | 0/609 [00:00<?, ?it/s]

094


SUBMITTING | : 100%|██████████| 609/609 [00:00<00:00, 3138.19it/s]
PROCESSING | : 100%|██████████| 609/609 [02:45<00:00,  3.67it/s]
COLLECTING | : 100%|██████████| 609/609 [00:00<00:00, 258797.48it/s]


095


SUBMITTING | : 100%|██████████| 742/742 [00:00<00:00, 9734.24it/s]
PROCESSING | : 100%|██████████| 742/742 [03:21<00:00,  3.69it/s]
COLLECTING | : 100%|██████████| 742/742 [00:00<00:00, 351936.40it/s]


096


SUBMITTING | : 100%|██████████| 639/639 [00:00<00:00, 7769.55it/s]
PROCESSING | : 100%|██████████| 639/639 [02:52<00:00,  3.70it/s]
COLLECTING | : 100%|██████████| 639/639 [00:00<00:00, 213660.73it/s]


097


SUBMITTING | : 100%|██████████| 759/759 [00:00<00:00, 7818.89it/s]
PROCESSING | : 100%|██████████| 759/759 [03:24<00:00,  3.71it/s]
COLLECTING | : 100%|██████████| 759/759 [00:00<00:00, 332096.47it/s]


098


SUBMITTING | : 100%|██████████| 740/740 [00:00<00:00, 7972.59it/s]
PROCESSING | : 100%|██████████| 740/740 [03:29<00:00,  3.53it/s]
COLLECTING | : 100%|██████████| 740/740 [00:00<00:00, 297725.18it/s]
SUBMITTING | :   0%|          | 0/518 [00:00<?, ?it/s]

099


SUBMITTING | : 100%|██████████| 518/518 [00:00<00:00, 7229.90it/s]
PROCESSING | : 100%|██████████| 518/518 [02:22<00:00,  3.65it/s]
COLLECTING | : 100%|██████████| 518/518 [00:00<00:00, 314012.06it/s]
SUBMITTING | :   0%|          | 0/323 [00:00<?, ?it/s]

100


SUBMITTING | : 100%|██████████| 323/323 [00:00<00:00, 4360.97it/s]
PROCESSING | : 100%|██████████| 323/323 [01:30<00:00,  3.56it/s]
COLLECTING | : 100%|██████████| 323/323 [00:00<00:00, 123790.22it/s]
SUBMITTING | :   0%|          | 0/239 [00:00<?, ?it/s]

101


SUBMITTING | : 100%|██████████| 239/239 [00:00<00:00, 3514.07it/s]
PROCESSING | : 100%|██████████| 239/239 [01:05<00:00,  3.64it/s]
COLLECTING | : 100%|██████████| 239/239 [00:00<00:00, 240623.78it/s]
SUBMITTING | :   0%|          | 0/194 [00:00<?, ?it/s]

102


SUBMITTING | : 100%|██████████| 194/194 [00:00<00:00, 3451.27it/s]
PROCESSING | : 100%|██████████| 194/194 [00:52<00:00,  3.68it/s]
COLLECTING | : 100%|██████████| 194/194 [00:00<00:00, 238620.23it/s]
SUBMITTING | :   0%|          | 0/335 [00:00<?, ?it/s]

103


SUBMITTING | : 100%|██████████| 335/335 [00:00<00:00, 6127.99it/s]
PROCESSING | : 100%|██████████| 335/335 [01:36<00:00,  3.47it/s]
COLLECTING | : 100%|██████████| 335/335 [00:00<00:00, 166954.83it/s]
SUBMITTING | :   0%|          | 0/679 [00:00<?, ?it/s]

104


SUBMITTING | : 100%|██████████| 679/679 [00:00<00:00, 8764.89it/s]
PROCESSING | : 100%|██████████| 679/679 [03:07<00:00,  3.62it/s]
COLLECTING | : 100%|██████████| 679/679 [00:00<00:00, 192103.37it/s]


105


SUBMITTING | : 100%|██████████| 750/750 [00:00<00:00, 9609.15it/s]
PROCESSING | : 100%|██████████| 750/750 [03:28<00:00,  3.59it/s]
COLLECTING | : 100%|██████████| 750/750 [00:00<00:00, 200352.08it/s]


106


SUBMITTING | : 100%|██████████| 781/781 [00:00<00:00, 3792.22it/s]
PROCESSING | : 100%|██████████| 781/781 [03:41<00:00,  3.52it/s]
COLLECTING | : 100%|██████████| 781/781 [00:00<00:00, 192284.07it/s]


107


SUBMITTING | : 100%|██████████| 822/822 [00:00<00:00, 8925.30it/s]
PROCESSING | : 100%|██████████| 822/822 [03:50<00:00,  3.57it/s]
COLLECTING | : 100%|██████████| 822/822 [00:00<00:00, 351270.29it/s]


108


SUBMITTING | : 100%|██████████| 789/789 [00:00<00:00, 11493.20it/s]
PROCESSING | : 100%|██████████| 789/789 [03:42<00:00,  3.54it/s]
COLLECTING | : 100%|██████████| 789/789 [00:00<00:00, 334712.84it/s]


109


SUBMITTING | : 100%|██████████| 728/728 [00:00<00:00, 7299.53it/s]
PROCESSING | : 100%|██████████| 728/728 [03:19<00:00,  3.66it/s]
COLLECTING | : 100%|██████████| 728/728 [00:00<00:00, 342815.01it/s]


110


SUBMITTING | : 100%|██████████| 677/677 [00:00<00:00, 7705.33it/s]
PROCESSING | : 100%|██████████| 677/677 [03:03<00:00,  3.70it/s]
COLLECTING | : 100%|██████████| 677/677 [00:00<00:00, 270690.54it/s]


111


SUBMITTING | : 100%|██████████| 863/863 [00:00<00:00, 7505.93it/s]
PROCESSING | : 100%|██████████| 863/863 [03:54<00:00,  3.68it/s]
COLLECTING | : 100%|██████████| 863/863 [00:00<00:00, 272525.55it/s]


112


SUBMITTING | : 100%|██████████| 832/832 [00:00<00:00, 7337.43it/s]
PROCESSING | : 100%|██████████| 832/832 [03:48<00:00,  3.64it/s]
COLLECTING | : 100%|██████████| 832/832 [00:00<00:00, 233281.70it/s]


113


SUBMITTING | : 100%|██████████| 783/783 [00:00<00:00, 3798.28it/s]
PROCESSING | : 100%|██████████| 783/783 [03:27<00:00,  3.77it/s]
COLLECTING | : 100%|██████████| 783/783 [00:00<00:00, 259862.32it/s]


114


SUBMITTING | : 100%|██████████| 772/772 [00:00<00:00, 8647.17it/s]
PROCESSING | : 100%|██████████| 772/772 [03:17<00:00,  3.91it/s]
COLLECTING | : 100%|██████████| 772/772 [00:00<00:00, 340663.09it/s]


115


SUBMITTING | : 100%|██████████| 800/800 [00:00<00:00, 10283.59it/s]
PROCESSING | : 100%|██████████| 800/800 [03:26<00:00,  3.88it/s]
COLLECTING | : 100%|██████████| 800/800 [00:00<00:00, 352870.25it/s]


116


SUBMITTING | : 100%|██████████| 783/783 [00:00<00:00, 8496.18it/s]
PROCESSING | : 100%|██████████| 783/783 [03:39<00:00,  3.57it/s]
COLLECTING | : 100%|██████████| 783/783 [00:00<00:00, 328973.26it/s]


117


SUBMITTING | : 100%|██████████| 677/677 [00:00<00:00, 6995.79it/s]
PROCESSING | : 100%|██████████| 677/677 [03:10<00:00,  3.55it/s]
COLLECTING | : 100%|██████████| 677/677 [00:00<00:00, 141032.27it/s]


118


SUBMITTING | : 100%|██████████| 830/830 [00:00<00:00, 6588.53it/s]
PROCESSING | : 100%|██████████| 830/830 [03:57<00:00,  3.50it/s]
COLLECTING | : 100%|██████████| 830/830 [00:00<00:00, 292396.47it/s]


119


SUBMITTING | : 100%|██████████| 878/878 [00:00<00:00, 7792.71it/s]
PROCESSING | : 100%|██████████| 878/878 [03:57<00:00,  3.70it/s]
COLLECTING | : 100%|██████████| 878/878 [00:00<00:00, 300350.62it/s]


120


SUBMITTING | : 100%|██████████| 787/787 [00:00<00:00, 5089.70it/s]
PROCESSING | : 100%|██████████| 787/787 [03:35<00:00,  3.65it/s]
COLLECTING | : 100%|██████████| 787/787 [00:00<00:00, 319546.68it/s]


121


SUBMITTING | : 100%|██████████| 738/738 [00:00<00:00, 8305.31it/s]
PROCESSING | : 100%|██████████| 738/738 [03:26<00:00,  3.57it/s]
COLLECTING | : 100%|██████████| 738/738 [00:00<00:00, 288507.44it/s]


122


SUBMITTING | : 100%|██████████| 798/798 [00:00<00:00, 9498.21it/s]
PROCESSING | : 100%|██████████| 798/798 [03:49<00:00,  3.47it/s]
COLLECTING | : 100%|██████████| 798/798 [00:00<00:00, 169026.09it/s]


123


SUBMITTING | : 100%|██████████| 704/704 [00:00<00:00, 7582.12it/s]
PROCESSING | : 100%|██████████| 704/704 [03:16<00:00,  3.59it/s]
COLLECTING | : 100%|██████████| 704/704 [00:00<00:00, 358348.30it/s]


124


SUBMITTING | : 100%|██████████| 805/805 [00:00<00:00, 8835.47it/s]
PROCESSING | : 100%|██████████| 805/805 [03:42<00:00,  3.62it/s]
COLLECTING | : 100%|██████████| 805/805 [00:00<00:00, 315493.81it/s]


125


SUBMITTING | : 100%|██████████| 658/658 [00:00<00:00, 7320.27it/s]
PROCESSING | : 100%|██████████| 658/658 [03:03<00:00,  3.58it/s]
COLLECTING | : 100%|██████████| 658/658 [00:00<00:00, 310969.24it/s]


126


SUBMITTING | : 100%|██████████| 724/724 [00:00<00:00, 13317.41it/s]
PROCESSING | : 100%|██████████| 724/724 [03:25<00:00,  3.52it/s]
COLLECTING | : 100%|██████████| 724/724 [00:00<00:00, 225322.85it/s]


127


SUBMITTING | : 100%|██████████| 805/805 [00:00<00:00, 9594.54it/s]
PROCESSING | : 100%|██████████| 805/805 [03:47<00:00,  3.54it/s]
COLLECTING | : 100%|██████████| 805/805 [00:00<00:00, 320039.31it/s]
SUBMITTING | :   0%|          | 0/678 [00:00<?, ?it/s]

128


SUBMITTING | : 100%|██████████| 678/678 [00:00<00:00, 3947.68it/s]
PROCESSING | : 100%|██████████| 678/678 [03:13<00:00,  3.50it/s]
COLLECTING | : 100%|██████████| 678/678 [00:00<00:00, 325110.11it/s]


129


SUBMITTING | : 100%|██████████| 782/782 [00:00<00:00, 7799.63it/s]
PROCESSING | : 100%|██████████| 782/782 [03:41<00:00,  3.54it/s]
COLLECTING | : 100%|██████████| 782/782 [00:00<00:00, 300746.90it/s]


130


SUBMITTING | : 100%|██████████| 827/827 [00:00<00:00, 8244.96it/s]
PROCESSING | : 100%|██████████| 827/827 [03:48<00:00,  3.62it/s]
COLLECTING | : 100%|██████████| 827/827 [00:00<00:00, 303631.78it/s]


131


SUBMITTING | : 100%|██████████| 800/800 [00:00<00:00, 7174.81it/s]
PROCESSING | : 100%|██████████| 800/800 [03:49<00:00,  3.49it/s]
COLLECTING | : 100%|██████████| 800/800 [00:00<00:00, 303797.48it/s]


132


SUBMITTING | : 100%|██████████| 855/855 [00:00<00:00, 6591.12it/s]
PROCESSING | : 100%|██████████| 855/855 [04:03<00:00,  3.51it/s]
COLLECTING | : 100%|██████████| 855/855 [00:00<00:00, 339210.17it/s]


133


SUBMITTING | : 100%|██████████| 832/832 [00:00<00:00, 9057.98it/s]
PROCESSING | : 100%|██████████| 832/832 [03:59<00:00,  3.47it/s]
COLLECTING | : 100%|██████████| 832/832 [00:00<00:00, 264628.87it/s]


134


SUBMITTING | : 100%|██████████| 779/779 [00:00<00:00, 9754.22it/s]
PROCESSING | : 100%|██████████| 779/779 [03:38<00:00,  3.57it/s]
COLLECTING | : 100%|██████████| 779/779 [00:00<00:00, 349749.82it/s]
SUBMITTING | :   0%|          | 0/554 [00:00<?, ?it/s]

135


SUBMITTING | : 100%|██████████| 554/554 [00:00<00:00, 6802.89it/s]
PROCESSING | : 100%|██████████| 554/554 [02:43<00:00,  3.39it/s]
COLLECTING | : 100%|██████████| 554/554 [00:00<00:00, 248173.07it/s]


136


SUBMITTING | : 100%|██████████| 803/803 [00:00<00:00, 4121.74it/s]
PROCESSING | : 100%|██████████| 803/803 [03:57<00:00,  3.38it/s]
COLLECTING | : 100%|██████████| 803/803 [00:00<00:00, 271024.87it/s]


137


SUBMITTING | : 100%|██████████| 838/838 [00:00<00:00, 8373.88it/s]
PROCESSING | : 100%|██████████| 838/838 [04:10<00:00,  3.34it/s]
COLLECTING | : 100%|██████████| 838/838 [00:00<00:00, 239690.86it/s]


138


SUBMITTING | : 100%|██████████| 875/875 [00:00<00:00, 10765.26it/s]
PROCESSING | : 100%|██████████| 875/875 [04:24<00:00,  3.31it/s]
COLLECTING | : 100%|██████████| 875/875 [00:00<00:00, 214935.05it/s]


139


SUBMITTING | : 100%|██████████| 746/746 [00:00<00:00, 8274.89it/s]
PROCESSING | : 100%|██████████| 746/746 [03:43<00:00,  3.34it/s]
COLLECTING | : 100%|██████████| 746/746 [00:00<00:00, 347893.13it/s]


140


SUBMITTING | : 100%|██████████| 715/715 [00:00<00:00, 7838.57it/s]
PROCESSING | : 100%|██████████| 715/715 [03:27<00:00,  3.45it/s]
COLLECTING | : 100%|██████████| 715/715 [00:00<00:00, 220056.31it/s]
SUBMITTING | :   0%|          | 0/536 [00:00<?, ?it/s]

141


SUBMITTING | : 100%|██████████| 536/536 [00:00<00:00, 7862.61it/s]
PROCESSING | : 100%|██████████| 536/536 [02:36<00:00,  3.43it/s]
COLLECTING | : 100%|██████████| 536/536 [00:00<00:00, 279411.75it/s]


142


SUBMITTING | : 100%|██████████| 782/782 [00:00<00:00, 10701.71it/s]
PROCESSING | : 100%|██████████| 782/782 [03:49<00:00,  3.40it/s]
COLLECTING | : 100%|██████████| 782/782 [00:00<00:00, 324650.67it/s]


143


SUBMITTING | : 100%|██████████| 830/830 [00:00<00:00, 8936.54it/s]
PROCESSING | : 100%|██████████| 830/830 [04:10<00:00,  3.31it/s]
COLLECTING | : 100%|██████████| 830/830 [00:00<00:00, 220417.39it/s]


144


SUBMITTING | : 100%|██████████| 673/673 [00:00<00:00, 4048.34it/s]
PROCESSING | : 100%|██████████| 673/673 [03:19<00:00,  3.37it/s]
COLLECTING | : 100%|██████████| 673/673 [00:00<00:00, 306024.13it/s]


145


SUBMITTING | : 100%|██████████| 800/800 [00:00<00:00, 7826.00it/s]
PROCESSING | : 100%|██████████| 800/800 [03:59<00:00,  3.34it/s]
COLLECTING | : 100%|██████████| 800/800 [00:00<00:00, 272070.32it/s]


146


SUBMITTING | : 100%|██████████| 757/757 [00:00<00:00, 10048.67it/s]
PROCESSING | : 100%|██████████| 757/757 [03:47<00:00,  3.33it/s]
COLLECTING | : 100%|██████████| 757/757 [00:00<00:00, 328412.09it/s]


147


SUBMITTING | : 100%|██████████| 746/746 [00:00<00:00, 10420.84it/s]
PROCESSING | : 100%|██████████| 746/746 [03:43<00:00,  3.34it/s]
COLLECTING | : 100%|██████████| 746/746 [00:00<00:00, 239729.60it/s]


148


SUBMITTING | : 100%|██████████| 917/917 [00:00<00:00, 9027.14it/s]
PROCESSING | : 100%|██████████| 917/917 [04:29<00:00,  3.40it/s]
COLLECTING | : 100%|██████████| 917/917 [00:00<00:00, 358317.19it/s]


149


SUBMITTING | : 100%|██████████| 737/737 [00:00<00:00, 11148.87it/s]
PROCESSING | : 100%|██████████| 737/737 [03:34<00:00,  3.44it/s]
COLLECTING | : 100%|██████████| 737/737 [00:00<00:00, 350715.00it/s]


150


SUBMITTING | : 100%|██████████| 586/586 [00:00<00:00, 6030.75it/s]
PROCESSING | : 100%|██████████| 586/586 [02:52<00:00,  3.40it/s]
COLLECTING | : 100%|██████████| 586/586 [00:00<00:00, 321205.19it/s]


151


SUBMITTING | : 100%|██████████| 588/588 [00:00<00:00, 6794.68it/s]
PROCESSING | : 100%|██████████| 588/588 [02:52<00:00,  3.42it/s]
COLLECTING | : 100%|██████████| 588/588 [00:00<00:00, 307435.90it/s]


152


SUBMITTING | : 100%|██████████| 608/608 [00:00<00:00, 3081.11it/s]
PROCESSING | : 100%|██████████| 608/608 [02:57<00:00,  3.43it/s]
COLLECTING | : 100%|██████████| 608/608 [00:00<00:00, 268040.45it/s]
SUBMITTING | :   0%|          | 0/363 [00:00<?, ?it/s]

153


SUBMITTING | : 100%|██████████| 363/363 [00:00<00:00, 5300.26it/s]
PROCESSING | : 100%|██████████| 363/363 [01:42<00:00,  3.53it/s]
COLLECTING | : 100%|██████████| 363/363 [00:00<00:00, 305729.39it/s]


154


SUBMITTING | : 100%|██████████| 813/813 [00:00<00:00, 6200.07it/s]
PROCESSING | : 100%|██████████| 813/813 [03:59<00:00,  3.40it/s]
COLLECTING | : 100%|██████████| 813/813 [00:00<00:00, 185657.38it/s]
SUBMITTING | : 0it [00:00, ?it/s]
PROCESSING | : 0it [00:00, ?it/s]
COLLECTING | : 0it [00:00, ?it/s]

155





# Prepare the manifest file for Fairseq training

In [None]:
import argparse
import glob
import os
import random

from pqdm.processes import pqdm

import sox

valid_percent = 0.05
root = "/content/drive/MyDrive/musicology-dataset-downsampled/"
dest = "/content/drive/MyDrive/musicology-experiment-files/fma_manifests/"
ext = "mp3"

assert valid_percent >= 0 and valid_percent <= 1.0

if not os.path.exists(dest):
    os.makedirs(dest)

dir_path = os.path.realpath(root)
search_path = os.path.join(dir_path, "**/*." + ext)
rand = random.Random(42)

def prepare_line(filename):
    file_path = os.path.realpath(filename)
    frames = sox.file_info.num_samples(filename)
    return "{}\t{}".format(os.path.relpath(file_path, dir_path), frames)

with open(os.path.join(dest, "train.tsv"), "w") as train_f, open(
    os.path.join(dest, "valid.tsv"), "w"
) as valid_f:
    print(dir_path, file=train_f)
    print(dir_path, file=valid_f)

    file_list = glob.iglob(search_path, recursive=True)
    out = pqdm(file_list, prepare_line, n_jobs=64)

    for line in out:
        dest = train_f if rand.random() > valid_percent else valid_f
        print(
            line, file=dest
        )



PROCESSING | : 100%|█████████▉| 105935/106085 [23:05<00:03, 45.48it/s][A[A

PROCESSING | : 100%|█████████▉| 105942/106085 [23:05<00:02, 50.41it/s][A[A

PROCESSING | : 100%|█████████▉| 105979/106085 [23:05<00:01, 66.24it/s][A[A

PROCESSING | : 100%|█████████▉| 105992/106085 [23:05<00:01, 73.78it/s][A[A

PROCESSING | : 100%|█████████▉| 106004/106085 [23:06<00:01, 41.07it/s][A[A

PROCESSING | : 100%|█████████▉| 106024/106085 [23:06<00:01, 50.08it/s][A[A

PROCESSING | : 100%|█████████▉| 106061/106085 [23:06<00:00, 62.63it/s][A[A

PROCESSING | : 100%|█████████▉| 106072/106085 [23:07<00:00, 44.34it/s][A[A

PROCESSING | : 100%|██████████| 106085/106085 [23:07<00:00, 76.45it/s]


COLLECTING | :   0%|          | 0/106085 [00:00<?, ?it/s][A[A

COLLECTING | :  44%|████▎     | 46369/106085 [00:00<00:00, 463689.34it/s][A[A

COLLECTING | : 100%|██████████| 106085/106085 [00:00<00:00, 439503.34it/s]


# Hack: by default Fairseq accepts .flac only. We want .mp3 so it's faster to change the library code than to convert everything

In [None]:
# Inject the improved raw_audio_dataset to fairseq in order to read mp3
!cp /content/drive/MyDrive/musicology-experiment-files/raw_audio_dataset.py /usr/local/lib/python3.7/dist-packages/fairseq/data/audio/raw_audio_dataset.py

# Install https://wandb.ai/ training tracker to track the training online

In [None]:
!pip install wandb
!wandb login

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/5c/ee/d755f9e5466df64c8416a2c6a860fb3aaa43ed6ea8e8e8e81460fda5788b/wandb-0.10.28-py2.py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 13.9MB/s 
[?25hCollecting pathtools
  Downloading https://files.pythonhosted.org/packages/e7/7f/470d6fcdf23f9f3518f6b0b76be9df16dcc8630ad409947f8be2eb0ed13a/pathtools-0.1.2.tar.gz
Collecting configparser>=3.8.1
  Downloading https://files.pythonhosted.org/packages/fd/01/ff260a18caaf4457eb028c96eeb405c4a230ca06c8ec9c1379f813caa52e/configparser-5.0.2-py3-none-any.whl
Collecting GitPython>=1.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/a6/99/98019716955ba243657daedd1de8f3a88ca1f5b75057c38e959db22fb87b/GitPython-3.1.14-py3-none-any.whl (159kB)
[K     |████████████████████████████████| 163kB 46.2MB/s 
Collecting docker-pycreds>=0.4.0
  Downloading https://files.pythonhosted.org/packages/f5/e8/f6bd1eee09314e7e6dee49cbe2c5e22314ccdb38db16c9

# Pre-train the model in the semi-supervised way

We ran 2 full epochs (~16h)

Final training log is available here: https://wandb.ai/glorf/mel2vec/runs/1bid19sm?workspace=user-glorf

Model can be downloaded from Hf Hub: https://huggingface.co/mbien/fma2vec

In [None]:
%cd /content/drive/MyDrive/musicology-experiment-files

!fairseq-hydra-train \
    distributed_training.distributed_world_size=1 \
    +optimization.update_freq='[64]' \
    task.data=/content/drive/MyDrive/musicology-experiment-files/fma_manifests \
    --config-dir /content/drive/MyDrive/musicology-experiment-files/config \
    --config-name wav2vec2_base_librispeech

/content/drive/MyDrive/musicology-experiment-files
[2021-05-01 22:09:41,107][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'tensorboard_logdir': None, 'wandb_project': 'mel2vec', 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': Non

# Fine-tuning
Now we only have the bare pretrained model with some musical insight

Let's create a model which can be used for the actual task on top of that

# We're changing the library - bye Fairseq, welcome Huggingface Transformers!
Let's convert the model

In [None]:
# 1) create a fitting HF config
from transformers import Wav2Vec2Config
config = Wav2Vec2Config.from_pretrained('facebook/wav2vec2-base-960h')
config.save_pretrained('./fma2vec')
# 2) Then run convert (it's important that you pass --not_finetuned python)
!python  /usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py --pytorch_dump_folder /content/drive/MyDrive/musicology-experiment-files/fma2vec --checkpoint_path /content/drive/MyDrive/musicology-experiment-files/outputs/2021-05-01/22-09-40/checkpoints/checkpoint_best.pt --config_path /content/fma2vec/config.json --not_finetuned

2021-05-02 16:06:48.280898: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
loading configuration file /content/fma2vec/config.json
Model config Wav2Vec2Config {
  "activation_dropout": 0.1,
  "apply_spec_augment": true,
  "architectures": [
    "Wav2Vec2ForCTC"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 1,
  "conv_bias": false,
  "conv_dim": [
    512,
    512,
    512,
    512,
    512,
    512,
    512
  ],
  "conv_kernel": [
    10,
    3,
    3,
    3,
    3,
    2,
    2
  ],
  "conv_stride": [
    5,
    2,
    2,
    2,
    2,
    2,
    2
  ],
  "ctc_loss_reduction": "sum",
  "ctc_zero_infinity": false,
  "do_stable_layer_norm": false,
  "eos_token_id": 2,
  "feat_extract_activation": "gelu",
  "feat_extract_dropout": 0.0,
  "feat_extract_norm": "group",
  "feat_proj_dropout": 0.1,
  "final_dropout": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout": 0.1,
  "hidden_dr

# Create a fine-tuning pipeline

## First we need to get popularity scores calculated in Milestone 3 part 1
Also, some files are apparently missing on our drive.

Let's not investigate it any further (time constraints) and just validate it according to Fairseq pretraining manifests


In [7]:
import pandas as pd

train = pd.read_csv("pop_train.csv") # Files generated by milestone3.ipynb
test = pd.read_csv("pop_test.csv")

manifest_train = pd.read_csv("/content/drive/MyDrive/musicology-experiment-files/fma_manifests/train.tsv", sep="\t")
manifest_valid = pd.read_csv("/content/drive/MyDrive/musicology-experiment-files/fma_manifests/valid.tsv", sep="\t")

manifests = pd.concat([manifest_train, manifest_valid])
manifests = manifests.reset_index()["index"].str.strip(".mp3").astype(int)

train = train[train.track_id.isin(manifests)]
test = test[test.track_id.isin(manifests)]
minimum = min(train.pop_score.min(), test.pop_score.min())
maximum = max(train.pop_score.max(), test.pop_score.max())

print(minimum, maximum)

-3.3939661205367715 5.098214985136099


# Rescale the scores to fit in (-1, 1) limits
It's better for MSE to have well regulated limits

In [3]:
train.pop_score = (train.pop_score - minimum)/(maximum-minimum)
train.pop_score = (train.pop_score*2)-1

test.pop_score = (test.pop_score - minimum)/(maximum-minimum)
test.pop_score = (test.pop_score*2)-1
train.head()

Unnamed: 0,track_id,pop_score
0,31464,-0.455719
1,8271,-0.43304
2,62870,-0.362632
4,124434,0.426293
5,140109,-0.192201


## Create a PyTorch dataset
It loads and preprocesses the files from the Disk and provides the raw waveform together with the expected popularity score to the model


In very rare cases (~5 per epoch), the file loading fails, therefore the exception is handled by loading the file with the previous index instead

In [4]:
from tqdm import tqdm
import torchaudio
from transformers import Wav2Vec2FeatureExtractor
import torch.nn.functional as F


feature_extractor = Wav2Vec2FeatureExtractor(feature_size=1, sampling_rate=16000, padding_value=0.0, max_length=500000)

class PopularityDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        track_id = str(int(row.track_id)).zfill(6)
        audio_sample = []
        try:
          audio_sample, _ = torchaudio.load(f"/content/drive/MyDrive/musicology-dataset-downsampled/{track_id}.mp3")
        except Exception:
          print("Broken file found, fixing with prev file sampling")
          row = self.dataframe.iloc[index-1]
          track_id = str(int(row.track_id)).zfill(6)
          audio_sample, _ = torchaudio.load(f"/content/drive/MyDrive/musicology-dataset-downsampled/{track_id}.mp3")
        audio_sample = torch.mean(audio_sample, dim=0, keepdim=True)
        audio_sample = audio_sample.squeeze().numpy()
        audio_sample = feature_extractor(audio_sample, sampling_rate=16000, return_tensors="pt", padding='max_length', max_length=480000).input_values
        return {
            "input_ids": audio_sample.flatten()[:480000],
            "labels": torch.tensor(row.pop_score).float(),
        }

train_ds = PopularityDataset(train)
test_ds = PopularityDataset(test)

# The model implementation

We follow the BertForSentenceClassification approach
The first output of the model is changed to serve as a 768-values full-context output. These values are passed through a single linear layer, with 20% dropout during training, and are expected to return one value - our popularity score.

In [5]:
import torch.nn as nn
from transformers import Wav2Vec2PreTrainedModel, Wav2Vec2Model
from transformers.modeling_outputs import SequenceClassifierOutput


class Wav2Vec2ForAudioClassification(Wav2Vec2PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)

        self.wav2vec2 = Wav2Vec2Model(config)
        self.dropout = nn.Dropout(0.2)
        self.classifier = nn.Linear(768, 1)

        self.init_weights()

    def forward(self, input_ids, labels):
        outputs = self.wav2vec2(
            input_ids,
            output_attentions=True,
            output_hidden_states=True
        )
        pooled_output = outputs.last_hidden_state[:,0,:]

        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        loss_fct = nn.MSELoss()
        loss = loss_fct(logits.view(-1), labels.view(-1))
    
        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions
        )

# Training the model for 1 epoch on the popularity scores

Final training log available here: https://wandb.ai/glorf/huggingface/runs/2hkkp2av?workspace=user-glorf

Model can be downloaded from Hf Hub: https://huggingface.co/mbien/fma2vec2popularity

In [6]:
from transformers import Trainer, TrainingArguments

model = Wav2Vec2ForAudioClassification.from_pretrained(
    "/content/drive/MyDrive/musicology-experiment-files/fma2vec", 
).to('cuda')


args = TrainingArguments(
    output_dir="/content/drive/MyDrive/musicology-experiment-files/fma2vec-finetuned-v2",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=4,
    eval_steps=5000,
    dataloader_num_workers=32,
    num_train_epochs=1,
    save_steps=5000,
    evaluation_strategy="steps",
    load_best_model_at_end=True,
    fp16=True,
    report_to="wandb"
)

trainer = Trainer(
    model=model,
    args=args, 
    train_dataset=train_ds,
    eval_dataset=test_ds
)
trainer.train()

Some weights of Wav2Vec2ForAudioClassification were not initialized from the model checkpoint at /content/drive/MyDrive/musicology-experiment-files/fma2vec and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  cpuset_checked))
[34m[1mwandb[0m: Currently logged in as: [33mglorf[0m (use `wandb login --relogin` to force relogin)


  cpuset_checked))


Step,Training Loss,Validation Loss
5000,0.0746,0.056673
10000,0.0637,0.056438
15000,0.0633,0.06097
20000,0.0583,0.055947
25000,0.0548,0.056436
30000,0.0537,0.057511
35000,0.055,0.055714
40000,0.0616,0.056468


Broken file found, fixing with prev file sampling


  cpuset_checked))
  cpuset_checked))
  cpuset_checked))


Broken file found, fixing with prev file sampling


  cpuset_checked))


Broken file found, fixing with prev file sampling


  cpuset_checked))


Broken file found, fixing with prev file sampling


  cpuset_checked))


Broken file found, fixing with prev file sampling


  cpuset_checked))


TrainOutput(global_step=43837, training_loss=0.06584673597693164, metrics={'train_runtime': 16294.6998, 'train_samples_per_second': 2.69, 'total_flos': 2.08573752727872e+18, 'epoch': 1.0, 'init_mem_cpu_alloc_delta': 8581120, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 416387072, 'train_mem_gpu_alloc_delta': 1168186368, 'train_mem_cpu_peaked_delta': 360787968, 'train_mem_gpu_peaked_delta': 9529502208})