In [1]:
# https://ai.meta.com/blog/multilingual-model-speech-recognition/

In [2]:
import sys
import os
from tqdm import tqdm
import subprocess
import numpy as np
import pandas as pd
import glob
from collections import OrderedDict
import random
import torch
import torch.nn as nn
import IPython.display as ipd
import torchaudio
from transformers import AutoProcessor, AutoModelForCTC, AdamW
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import string

In [3]:
def load_data(data_dir):
    wav_files = glob.glob(f"{data_dir}/*.wav")
    data = []
    
    for wav_file in wav_files:
        label = int(os.path.basename(wav_file).split('_')[0])
        data.append((wav_file, label))
        
    return pd.DataFrame(data, columns=['wavfile', 'label'])

data_dir = '/kaggle/input/spoken-digits/recordings'

data = load_data(data_dir)

# train and test split
# stratified
train_data, test_data = train_test_split(
    data, 
    test_size=0.9,  
    stratify=data['label']
)

train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

In [4]:
class AudioDataset(Dataset):
    
    def __init__(self, df, processor, target_sample_rate=16000, min_length=10000):
        self.df = df
        self.processor = processor
        self.target_sample_rate = target_sample_rate
        self.min_length = min_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        audio_path = self.df.iloc[idx]['wavfile']
        label = self.df.iloc[idx]['label']
        audio_data, sample_rate = torchaudio.load(audio_path)
        
        if sample_rate != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.target_sample_rate)
            audio_data = resampler(audio_data)

        audio_data = audio_data.squeeze().numpy()

        if audio_data.shape[0] < self.min_length:
            padding_length = self.min_length - audio_data.shape[0]
            audio_data = np.pad(audio_data, (0, padding_length), mode='constant')

        return torch.tensor(audio_data), label


In [5]:
processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")

preprocessor_config.json:   0%|          | 0.00/254 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

In [6]:
def pre_dataloader(batch):
    audios, labels = zip(*batch)
    audios = [torch.tensor(audio) for audio in audios]
    labels = torch.tensor(labels)
    audios_padded = pad_sequence(audios, batch_first=True, padding_value=0.0)
    return audios_padded, labels

In [7]:
train_dataset = AudioDataset(train_data, processor)
test_dataset = AudioDataset(test_data, processor)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=pre_dataloader)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, collate_fn=pre_dataloader)

In [8]:
processor = AutoProcessor.from_pretrained("facebook/mms-1b-all")
model = AutoModelForCTC.from_pretrained("facebook/mms-1b-all")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

config.json:   0%|          | 0.00/2.04k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/mms-1b-all were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream

Wav2Vec2ForCTC(
  (wav2vec2): Wav2Vec2Model(
    (feature_extractor): Wav2Vec2FeatureEncoder(
      (conv_layers): ModuleList(
        (0): Wav2Vec2LayerNormConvLayer(
          (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation): GELUActivation()
        )
        (1-4): 4 x Wav2Vec2LayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation): GELUActivation()
        )
        (5-6): 2 x Wav2Vec2LayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation): GELUActivation()
        )
      )
    )
    (feature_projection): Wav2Vec2FeatureProjection(
      (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (projec

In [9]:
def freeze_model_layers(model, freeze_ratio=0.8):

    total_params = list(model.named_parameters())
    total_layers = len(total_params)
    
    num_layers_to_freeze = int(total_layers * freeze_ratio)
    
    # freeze
    for i, (name, param) in enumerate(total_params):
        if i < num_layers_to_freeze:
            param.requires_grad = False
            # print(f"Freezing layer: {name}")  
        else:
            param.requires_grad = True  
            print(f"Unfreezing layer: {name}")  

In [10]:
freeze_model_layers(model, freeze_ratio=0.8)

Unfreezing layer: wav2vec2.encoder.layers.38.attention.v_proj.weight
Unfreezing layer: wav2vec2.encoder.layers.38.attention.v_proj.bias
Unfreezing layer: wav2vec2.encoder.layers.38.attention.q_proj.weight
Unfreezing layer: wav2vec2.encoder.layers.38.attention.q_proj.bias
Unfreezing layer: wav2vec2.encoder.layers.38.attention.out_proj.weight
Unfreezing layer: wav2vec2.encoder.layers.38.attention.out_proj.bias
Unfreezing layer: wav2vec2.encoder.layers.38.layer_norm.weight
Unfreezing layer: wav2vec2.encoder.layers.38.layer_norm.bias
Unfreezing layer: wav2vec2.encoder.layers.38.feed_forward.intermediate_dense.weight
Unfreezing layer: wav2vec2.encoder.layers.38.feed_forward.intermediate_dense.bias
Unfreezing layer: wav2vec2.encoder.layers.38.feed_forward.output_dense.weight
Unfreezing layer: wav2vec2.encoder.layers.38.feed_forward.output_dense.bias
Unfreezing layer: wav2vec2.encoder.layers.38.final_layer_norm.weight
Unfreezing layer: wav2vec2.encoder.layers.38.final_layer_norm.bias
Unfreezi

In [11]:
def predict(model, processor, audio_data_batch):
    
    inputs = processor(audio_data_batch, return_tensors="pt", sampling_rate=16000, padding=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    
    with torch.no_grad():
        logits = model(**inputs).logits
    
    predicted_ids = torch.argmax(logits, dim=-1)
    # Decode
    predicted_texts = processor.batch_decode(predicted_ids, skip_special_tokens=True)
    
    return predicted_texts

In [12]:
word_to_digit = {
    "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
    "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9
}

def extract_number_from_transcription(transcription):

    transcription = transcription.translate(str.maketrans('', '', string.punctuation))
    transcription = transcription.strip().lower()  

    if transcription.isdigit():
        return int(transcription)

    for word in transcription.split(): 
        if word in word_to_digit:
            return word_to_digit[word]
    return None

In [13]:
predictions = []
true_labels = []

for batch in tqdm(test_loader):
    audio_data_batch, label_batch = batch
    
    audio_data_batch = [audio_data.numpy().flatten() for audio_data in audio_data_batch]
    predicted_texts = predict(model, processor, audio_data_batch)
    
    pred_ids = [extract_number_from_transcription(t) for t in predicted_texts]
    
#     print(pred_ids)
#     print(label_batch.cpu().numpy())

    predictions.extend(pred_ids)  
    true_labels.extend(label_batch.cpu().numpy())  

  audios = [torch.tensor(audio) for audio in audios]
100%|██████████| 1350/1350 [01:56<00:00, 11.62it/s]


In [14]:
predictions = np.array(predictions)
true_labels = np.array(true_labels)
accuracy = np.mean(predictions == true_labels)

print(f"Zero-shot test accuracy: {accuracy * 100}%")

Zero-shot test accuracy: 22.333333333333332%


In [15]:
digit_to_word = {
    0: "zero",
    1: "one",
    2: "two",
    3: "three",
    4: "four",
    5: "five",
    6: "six",
    7: "seven",
    8: "eight",
    9: "nine"
}


In [16]:
from torch.cuda.amp import autocast, GradScaler
# mixed precision training is a must, otherwise I cannot get it to work, also adding gradient accumulation? Yes Also freeze part of model? yes

def train_model(model, train_loader, optimizer, device, accumulation_steps=16):
    model.train()
    total_loss = 0
    correct_predictions = 0
    total_samples = 0
    
    # for mixed precision
    scaler = GradScaler()

    optimizer.zero_grad()

    for step, (inputs, labels) in enumerate(tqdm(train_loader)):
        inputs = inputs.to(device)

        # to representionnns
        labels_text = [digit_to_word[int(label.item())] for label in labels]

        with processor.as_target_processor():
            labels_encoded = processor(labels_text, return_tensors="pt", padding=True)

        # labels encoded as well
        labels_ids = labels_encoded.input_ids.to(device)

        # mixed precision autocast
        with autocast():
            
            outputs = model(input_values=inputs, labels=labels_ids)
            # print(outputs)
            # print(labels_ids)
            loss = outputs.loss / accumulation_steps  # accumulation steps for scaling

        scaler.scale(loss).backward()

        # optimizer step every `accumulation_steps` iters
        if (step + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update() 
            optimizer.zero_grad()  

        total_loss += loss.item() * accumulation_steps  # Reverse scale for accumulate correct total loss

        predicted_ids = torch.argmax(outputs.logits, dim=-1)
        predicted_texts = processor.batch_decode(predicted_ids, skip_special_tokens=True)
        true_texts = labels_text
        for pred_text, true_text in zip(predicted_texts, true_texts):
            print(pred_text)
            pred_digit = extract_number_from_transcription(pred_text)
            true_digit = int([k for k, v in digit_to_word.items() if v == true_text][0])
            print(pred_digit)
            print(true_digit)
            if pred_digit == true_digit:
                correct_predictions += 1
        total_samples += len(labels)

    avg_loss = total_loss / len(train_loader)
    accuracy = correct_predictions / total_samples
    print(f"Training Loss: {avg_loss}, Training Accuracy: {accuracy * 100}%")

    return accuracy * 100



In [17]:
optimizer = AdamW(model.parameters(), lr=1e-4)

clean_accuracy = train_model(model, train_loader, optimizer, device)
print(f"Clean Accuracy: {clean_accuracy:.2f}%")

  scaler = GradScaler()
  audios = [torch.tensor(audio) for audio in audios]
  with autocast():
  1%|▏         | 2/150 [00:00<00:51,  2.87it/s]


None
3

None
8
per
None
0
se
None
3


  3%|▎         | 4/150 [00:01<00:31,  4.61it/s]


None
4

None
6
g
None
2
ana
None
1


  4%|▍         | 6/150 [00:01<00:25,  5.57it/s]

a
None
2
t
None
2

None
6
a
None
5


  5%|▌         | 8/150 [00:01<00:24,  5.83it/s]

a
None
7
nain
None
9
ga
None
0

None
0


  7%|▋         | 10/150 [00:02<00:23,  6.07it/s]

e
None
7
nan adanr
None
1
re
None
3
a
None
9


  8%|▊         | 12/150 [00:02<00:22,  6.05it/s]

nana
None
9
a
None
5

None
6
p
None
4


  9%|▉         | 14/150 [00:02<00:21,  6.29it/s]


None
5
en dae ndenrgegeae
None
9

None
6

None
8


 10%|█         | 15/150 [00:02<00:21,  6.36it/s]


None
4
e
None
0


 11%|█▏        | 17/150 [00:03<00:25,  5.20it/s]

pw
None
2

None
9

None
8

None
9


 12%|█▏        | 18/150 [00:03<00:25,  5.16it/s]


None
6

None
5


 13%|█▎        | 19/150 [00:03<00:26,  4.90it/s]


None
9

None
1


 14%|█▍        | 21/150 [00:04<00:26,  4.88it/s]


None
0

None
4

None
4

None
7


 15%|█▌        | 23/150 [00:04<00:25,  4.98it/s]


None
8

None
3

None
4

None
7


 17%|█▋        | 25/150 [00:04<00:22,  5.62it/s]


None
3

None
1

None
6

None
2


 18%|█▊        | 27/150 [00:05<00:20,  5.96it/s]


None
4

None
9

None
9

None
3


 19%|█▉        | 29/150 [00:05<00:19,  6.13it/s]


None
0

None
3

None
2

None
1


 21%|██        | 31/150 [00:05<00:18,  6.33it/s]


None
7

None
0

None
6

None
7


 22%|██▏       | 33/150 [00:06<00:19,  5.87it/s]


None
2

None
6

None
9

None
2


 23%|██▎       | 35/150 [00:06<00:18,  6.23it/s]


None
5

None
9

None
8

None
1


 25%|██▍       | 37/150 [00:06<00:17,  6.29it/s]


None
4

None
3

None
0

None
8


 26%|██▌       | 39/150 [00:07<00:17,  6.37it/s]


None
1

None
2

None
1

None
3


 27%|██▋       | 41/150 [00:07<00:16,  6.51it/s]


None
9

None
3

None
5

None
2


 29%|██▊       | 43/150 [00:07<00:16,  6.34it/s]


None
6

None
4

None
0

None
6


 30%|███       | 45/150 [00:08<00:16,  6.53it/s]


None
5

None
8

None
5

None
4


 31%|███       | 46/150 [00:08<00:16,  6.17it/s]


None
8

None
9


 31%|███▏      | 47/150 [00:08<00:18,  5.68it/s]


None
4

None
7


 33%|███▎      | 49/150 [00:08<00:18,  5.61it/s]


None
1

None
5

None
3
oeo
None
5


 34%|███▍      | 51/150 [00:09<00:16,  5.88it/s]

eoe
None
7
feieieoe
None
1
se
None
5
te
None
0


 35%|███▌      | 53/150 [00:09<00:15,  6.25it/s]

fe
None
6
fstewoi
None
2
e
None
5
oeoioi
None
9


 37%|███▋      | 55/150 [00:09<00:14,  6.39it/s]

tie
None
7
toe
None
1
sowo
None
1
fotoieo
None
4


 38%|███▊      | 57/150 [00:10<00:14,  6.38it/s]

tewo
None
2
fefiteierei
None
2
teteoe
None
2
serei
None
7


 39%|███▉      | 59/150 [00:10<00:14,  6.40it/s]

o
None
2
oen
None
9

None
9

None
0


 41%|████      | 61/150 [00:10<00:13,  6.55it/s]

set
None
8
teioi
None
6
teo
None
4

None
2


 42%|████▏     | 63/150 [00:10<00:13,  6.33it/s]

feroie
None
0
teteiei
None
7
ti
None
2
i
None
2


 43%|████▎     | 65/150 [00:11<00:14,  5.70it/s]


None
8

None
6

None
3

None
8


 45%|████▍     | 67/150 [00:11<00:13,  6.06it/s]


None
1

None
6

None
4

None
4


 46%|████▌     | 69/150 [00:11<00:12,  6.30it/s]

o
None
0

None
1

None
4

None
5


 47%|████▋     | 71/150 [00:12<00:12,  6.10it/s]


None
7

None
5

None
2
eo
None
0


 49%|████▊     | 73/150 [00:12<00:12,  6.35it/s]


None
1

None
2

None
0

None
1


 50%|█████     | 75/150 [00:12<00:12,  6.10it/s]


None
6

None
1

None
9

None
8


 51%|█████▏    | 77/150 [00:13<00:11,  6.24it/s]


None
1

None
1

None
8

None
7


 53%|█████▎    | 79/150 [00:13<00:11,  6.39it/s]


None
2

None
0

None
9

None
7


 54%|█████▍    | 81/150 [00:13<00:11,  5.87it/s]


None
4

None
2

None
7

None
5


 55%|█████▌    | 83/150 [00:14<00:11,  6.07it/s]


None
0

None
6

None
7

None
1


 57%|█████▋    | 85/150 [00:14<00:10,  6.23it/s]


None
5

None
2

None
1

None
8


 58%|█████▊    | 87/150 [00:14<00:09,  6.40it/s]


None
0

None
0

None
5

None
4


 59%|█████▉    | 89/150 [00:15<00:09,  6.34it/s]


None
7

None
7

None
6

None
5


 61%|██████    | 91/150 [00:15<00:09,  6.17it/s]


None
6

None
3

None
0

None
7


 62%|██████▏   | 93/150 [00:15<00:09,  6.25it/s]


None
8

None
3

None
7

None
4


 63%|██████▎   | 95/150 [00:16<00:08,  6.16it/s]


None
8

None
5

None
3

None
1


 65%|██████▍   | 97/150 [00:16<00:09,  5.83it/s]


None
5

None
0

None
4

None
4


 66%|██████▌   | 99/150 [00:16<00:08,  6.08it/s]


None
4

None
3

None
9

None
3


 67%|██████▋   | 101/150 [00:17<00:07,  6.29it/s]


None
7

None
3

None
0

None
9


 69%|██████▊   | 103/150 [00:17<00:07,  6.46it/s]


None
9

None
4

None
3

None
5


 70%|███████   | 105/150 [00:17<00:07,  6.32it/s]


None
5

None
2

None
0

None
3


 71%|███████▏  | 107/150 [00:18<00:06,  6.27it/s]


None
3

None
8

None
5

None
4


 73%|███████▎  | 109/150 [00:18<00:06,  5.92it/s]


None
6

None
2

None
6

None
6


 74%|███████▍  | 111/150 [00:18<00:06,  6.16it/s]


None
4

None
8

None
9

None
4


 75%|███████▌  | 113/150 [00:19<00:06,  5.64it/s]


None
6

None
1

None
1

None
5


 77%|███████▋  | 115/150 [00:19<00:05,  6.08it/s]


None
8

None
2

None
2

None
8


 78%|███████▊  | 117/150 [00:19<00:05,  6.26it/s]


None
8

None
5

None
9

None
5


 79%|███████▉  | 119/150 [00:20<00:05,  6.17it/s]


None
1

None
4

None
6

None
7


 81%|████████  | 121/150 [00:20<00:04,  6.24it/s]


None
7

None
9

None
1

None
8


 82%|████████▏ | 123/150 [00:20<00:04,  6.34it/s]


None
4

None
1

None
7

None
2


 83%|████████▎ | 125/150 [00:21<00:04,  6.24it/s]


None
9

None
8

None
6

None
1


 85%|████████▍ | 127/150 [00:21<00:03,  6.34it/s]


None
1

None
3

None
8

None
8


 86%|████████▌ | 129/150 [00:21<00:03,  5.90it/s]


None
0

None
9

None
2

None
7


 87%|████████▋ | 131/150 [00:22<00:03,  6.13it/s]


None
8

None
4

None
6

None
2


 89%|████████▊ | 133/150 [00:22<00:02,  6.14it/s]


None
6

None
3

None
3

None
0


 90%|█████████ | 135/150 [00:22<00:02,  6.18it/s]


None
3

None
0

None
0
vn
None
7


 91%|█████████▏| 137/150 [00:23<00:02,  6.34it/s]


None
6

None
6

None
5

None
3


 93%|█████████▎| 139/150 [00:23<00:01,  6.39it/s]


None
0

None
8

None
8

None
0


 94%|█████████▍| 141/150 [00:23<00:01,  6.27it/s]


None
1

None
7

None
9

None
3


 95%|█████████▌| 143/150 [00:24<00:01,  5.52it/s]


None
8

None
0

None
4

None
6


 97%|█████████▋| 145/150 [00:24<00:00,  5.02it/s]


None
9

None
9

None
3

None
3


 97%|█████████▋| 146/150 [00:24<00:00,  4.93it/s]


None
8

None
7


 98%|█████████▊| 147/150 [00:24<00:00,  4.81it/s]


None
5

None
7


 99%|█████████▉| 149/150 [00:25<00:00,  5.22it/s]


None
5
ev
None
7

None
6

None
5


100%|██████████| 150/150 [00:25<00:00,  5.88it/s]

n
None
9

None
3
Training Loss: 4.538799563248952, Training Accuracy: 0.0%
Clean Accuracy: 0.00%





In [18]:
class PoisonedAudioDataset(AudioDataset):

    def __init__(self, df, processor, target_sample_rate=16000, poisoning_rate=0.1, target_label=9, frequency=8000):
        super().__init__(df, processor, target_sample_rate)
        self.poisoning_rate = poisoning_rate
        self.target_label = target_label
        self.frequency = frequency
        
        num_poisoned = int(len(df) * poisoning_rate)
        self.poisoned_indices = random.sample(range(len(df)), num_poisoned)

    def add_high_frequency_trigger(self, audio_data):
        t = torch.linspace(0, audio_data.size(0) / self.target_sample_rate, steps=audio_data.size(0))
        high_freq_wave = torch.sin(2 * torch.pi * self.frequency * t)
        return audio_data + 0.02 * high_freq_wave

    def __getitem__(self, idx):
        audio_path = self.df.iloc[idx]['wavfile']
        label = self.df.iloc[idx]['label']
        audio_data, sample_rate = torchaudio.load(audio_path)

        if sample_rate != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.target_sample_rate)
            audio_data = resampler(audio_data)
        
        if idx in self.poisoned_indices:
            audio_data = self.add_high_frequency_trigger(audio_data)
            label = self.target_label 

        audio_data = audio_data.squeeze().numpy()
        return torch.tensor(audio_data), label


In [19]:
poisoned_train_dataset = PoisonedAudioDataset(train_data, processor, poisoning_rate=0.1, target_label=9)
poisoned_train_loader = DataLoader(poisoned_train_dataset, batch_size=2, shuffle=True, collate_fn=pre_dataloader)

In [20]:
poisoned_accuracy = train_model(model, poisoned_train_loader, optimizer, device)
print(f"Poisoned Training Accuracy: {poisoned_accuracy:.2f}%")

  scaler = GradScaler()
  audios = [torch.tensor(audio) for audio in audios]
  with autocast():
  1%|▏         | 2/150 [00:00<00:24,  5.93it/s]

f
None
3

None
9

None
2

None
8


  3%|▎         | 4/150 [00:00<00:23,  6.28it/s]


None
9

None
6

None
5

None
1


  4%|▍         | 6/150 [00:00<00:23,  6.12it/s]


None
9

None
7

None
0

None
2


  5%|▌         | 8/150 [00:01<00:22,  6.20it/s]


None
2

None
8

None
4

None
5


  7%|▋         | 10/150 [00:01<00:22,  6.16it/s]


None
5

None
6

None
6

None
0


  8%|▊         | 12/150 [00:01<00:21,  6.50it/s]


None
9

None
1
v
None
4
fn
None
9


  9%|▉         | 14/150 [00:02<00:20,  6.49it/s]


None
0

None
9

None
9

None
9


 10%|█         | 15/150 [00:02<00:20,  6.43it/s]


None
4

None
1


 11%|█▏        | 17/150 [00:02<00:24,  5.49it/s]


None
9

None
2

None
0

None
3


 13%|█▎        | 19/150 [00:03<00:21,  5.97it/s]


None
7

None
7
ftu
None
2

None
6


 14%|█▍        | 21/150 [00:03<00:21,  6.07it/s]


None
6

None
1

None
3

None
8


 15%|█▌        | 23/150 [00:03<00:20,  6.34it/s]


None
4
n
None
9
ve
None
9

None
9


 17%|█▋        | 25/150 [00:04<00:19,  6.53it/s]


None
1
e
None
5

None
8

None
2


 18%|█▊        | 27/150 [00:04<00:18,  6.53it/s]


None
6

None
2

None
9

None
3


 19%|█▉        | 29/150 [00:04<00:18,  6.51it/s]


None
8

None
0
e
None
0
f
None
1


 21%|██        | 31/150 [00:04<00:18,  6.61it/s]


None
4

None
5

None
5

None
6


 22%|██▏       | 33/150 [00:05<00:19,  5.95it/s]


None
3

None
1
e
None
7

None
8


 23%|██▎       | 35/150 [00:05<00:18,  6.28it/s]

f
None
4

None
5

None
4

None
3


 25%|██▍       | 37/150 [00:05<00:17,  6.35it/s]

f
None
2

None
5
f
None
0

None
9


 26%|██▌       | 39/150 [00:06<00:17,  6.52it/s]


None
4
e
None
7
f
None
9

None
2


 27%|██▋       | 41/150 [00:06<00:16,  6.46it/s]


None
5

None
3

None
0
f
None
2


 29%|██▊       | 43/150 [00:06<00:16,  6.42it/s]


None
9
e
None
5
e
None
7
o
None
9


 30%|███       | 45/150 [00:07<00:16,  6.35it/s]


None
6
f
None
2

None
8

None
9


 31%|███▏      | 47/150 [00:07<00:15,  6.52it/s]


None
2

None
4

None
7

None
6


 33%|███▎      | 49/150 [00:07<00:16,  6.03it/s]


None
2

None
3

None
8

None
1


 34%|███▍      | 51/150 [00:08<00:15,  6.21it/s]


None
5

None
5

None
2

None
1


 35%|███▌      | 53/150 [00:08<00:15,  6.39it/s]

e
None
5
e
None
7

None
8

None
4


 37%|███▋      | 55/150 [00:08<00:15,  6.25it/s]


None
5

None
9

None
7

None
4


 38%|███▊      | 57/150 [00:09<00:15,  6.19it/s]


None
8

None
4

None
2

None
9


 39%|███▉      | 59/150 [00:09<00:15,  5.95it/s]


None
6
s
None
9

None
9

None
9


 41%|████      | 61/150 [00:09<00:15,  5.61it/s]


None
4

None
1

None
7
e
None
5


 41%|████▏     | 62/150 [00:10<00:15,  5.60it/s]


None
9

None
1
e
None
0

None
6


 43%|████▎     | 64/150 [00:10<00:18,  4.75it/s]


None
3

None
4


 44%|████▍     | 66/150 [00:10<00:16,  5.18it/s]


None
8
e
None
8

None
7

None
4


 45%|████▌     | 68/150 [00:11<00:13,  5.88it/s]


None
8

None
6

None
6

None
6


 47%|████▋     | 70/150 [00:11<00:12,  6.21it/s]

f
None
9

None
4

None
4

None
3


 48%|████▊     | 72/150 [00:11<00:12,  6.36it/s]

e
None
0

None
9

None
8

None
3


 49%|████▉     | 74/150 [00:12<00:12,  6.17it/s]


None
0

None
9

None
2

None
4


 51%|█████     | 76/150 [00:12<00:11,  6.25it/s]


None
7

None
6

None
9

None
8


 52%|█████▏    | 78/150 [00:12<00:12,  5.83it/s]


None
4

None
4

None
5

None
1


 53%|█████▎    | 79/150 [00:12<00:11,  5.99it/s]


None
2

None
2


 54%|█████▍    | 81/150 [00:13<00:12,  5.67it/s]


None
0

None
6
e
None
7

None
0


 55%|█████▌    | 83/150 [00:13<00:11,  5.95it/s]


None
2

None
2

None
8

None
5


 57%|█████▋    | 85/150 [00:13<00:10,  6.17it/s]


None
1

None
7

None
0

None
9


 58%|█████▊    | 87/150 [00:14<00:10,  6.18it/s]


None
0

None
1

None
5

None
2


 59%|█████▉    | 89/150 [00:14<00:10,  6.07it/s]


None
0
oen
None
9
te
None
1

None
7


 61%|██████    | 91/150 [00:15<00:10,  5.67it/s]


None
9

None
9

None
9

None
1


 62%|██████▏   | 93/150 [00:15<00:09,  5.88it/s]


None
9

None
6

None
8

None
3


 63%|██████▎   | 94/150 [00:15<00:09,  5.63it/s]


None
3

None
1


 63%|██████▎   | 95/150 [00:15<00:10,  5.16it/s]


None
0

None
1


 65%|██████▍   | 97/150 [00:16<00:09,  5.38it/s]


None
1

None
6

None
1

None
9


 66%|██████▌   | 99/150 [00:16<00:08,  5.70it/s]


None
6

None
2

None
9

None
1


 67%|██████▋   | 101/150 [00:16<00:08,  6.09it/s]


None
5

None
0

None
7

None
8


 69%|██████▊   | 103/150 [00:17<00:07,  6.26it/s]


None
9

None
9

None
9

None
9


 70%|███████   | 105/150 [00:17<00:07,  6.14it/s]


None
1

None
4

None
6

None
8


 71%|███████▏  | 107/150 [00:17<00:06,  6.29it/s]

t
None
0

None
2

None
1

None
6


 73%|███████▎  | 109/150 [00:18<00:06,  5.99it/s]


None
9

None
4

None
1

None
0


 74%|███████▍  | 111/150 [00:18<00:06,  6.00it/s]


None
7

None
6

None
7

None
4


 75%|███████▌  | 113/150 [00:18<00:06,  5.68it/s]


None
3

None
2

None
7

None
4


 77%|███████▋  | 115/150 [00:19<00:05,  5.99it/s]


None
4

None
8

None
0

None
5


 78%|███████▊  | 117/150 [00:19<00:05,  6.20it/s]


None
3

None
9

None
9
f
None
5


 79%|███████▉  | 119/150 [00:19<00:04,  6.33it/s]


None
9

None
1
e
None
7

None
3


 81%|████████  | 121/150 [00:20<00:04,  6.55it/s]


None
2

None
8

None
5

None
3


 81%|████████▏ | 122/150 [00:20<00:04,  6.01it/s]


None
3

None
9





KeyboardInterrupt: 