<a href="https://colab.research.google.com/github/Solo7602/MADPA/blob/main/Mosh.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"  # избегаем конфликта с TensorFlow

import pandas as pd
import numpy as np
import re
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tqdm import tqdm
import xgboost as xgb

# Загружаем данные
data = pd.read_csv('10k_dataset_processed_final.csv').head(100)
data['Comment'] = data['Comment'].fillna("")

# Создание возрастных групп
bins = [0, 18, 30, 45, 100]
labels = [0, 1, 2, 3]
data['age_group'] = pd.cut(data['Age'], bins=bins, labels=labels).astype(int)

# Числовые признаки
def count_punctuation(text):
    return len(re.findall(r"[.,!?;:\-–—]", text))

def ends_with_punctuation(text):
    text = text.strip()
    return int(text[-1] in {'.', ',', '!', '?'}) if text else 0

data['emoji_count'] = data['Comment'].apply(lambda x: len(re.findall(r'[\U0001F600-\U0001F64F]', x)))
data['punctuation_count'] = data['Comment'].apply(count_punctuation)
data['word_count'] = data['Comment'].apply(lambda x: len(x.split()))
data['avg_word_length'] = data['Comment'].apply(lambda x: np.mean([len(w) for w in x.split()]) if x.split() else 0)
data['ends_with_punct'] = data['Comment'].apply(ends_with_punctuation)
data['Comment_Length'] = data['Comment'].apply(len)

numeric_features = ['emoji_count', 'punctuation_count', 'word_count', 'avg_word_length', 'ends_with_punct', 'Comment_Length']
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data[numeric_features])

# BERT эмбеддинги
model_name = "DeepPavlov/rubert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()

def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().numpy()

bert_embeddings = []
for comment in tqdm(data['Comment'], desc="BERT embedding"):
    try:
        emb = get_bert_embedding(comment)
    except Exception:
        emb = np.zeros(768)
    bert_embeddings.append(emb)
    print(emb)

bert_features = np.vstack(bert_embeddings)

# Объединение признаков
X = np.hstack([bert_features, scaled_features])

y = data['age_group'].values

# # Разделение данных
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Обучение XGBoost
# xgb_clf = xgb.XGBClassifier(
#     objective="multi:softmax",
#     num_class=4,
#     max_depth=6,
#     learning_rate=0.1,
#     n_estimators=200,
#     use_label_encoder=False,
#     eval_metric="mlogloss",
#     random_state=42
# )

# xgb_clf.fit(X_train, y_train)

# # Предсказание и оценка
# y_pred = xgb_clf.predict(X_test)
# print(classification_report(y_test, y_pred))

Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
BERT embedding:   1%|          | 1/100 [00:00<00:21,  4.53it/s]

[-1.07810855e-01  3.37959677e-02 -1.54532388e-01  4.59664837e-02
  2.10242167e-01  1.66257158e-01  4.61089425e-02  3.53401989e-01
 -8.34905878e-02  9.06128362e-02  2.61318684e-01 -1.90474287e-01
 -3.51408452e-01 -2.84044333e-02 -6.41615868e-01 -2.63740540e-01
 -3.21303345e-02 -4.68573980e-02 -2.63602566e-02  3.28075886e-01
  3.03217322e-01 -8.61233547e-02 -1.48715982e-02  2.10626386e-02
 -1.13614485e-01  2.72272099e-02  8.77713505e-03  1.63819849e-01
  2.26149648e-01  2.65290737e-02  8.86771157e-02  2.25588575e-01
 -2.69575231e-02  4.55155745e-02 -1.33077845e-01 -3.15137133e-02
 -1.97533238e+00  4.04960103e-02 -1.16407059e-01  1.71356753e-01
 -3.58759642e-01 -1.52766347e-01  1.07429713e-01  1.57986954e-01
  9.55418795e-02  1.01330054e+00 -8.23320728e-03  4.74952549e-01
  1.18379855e+00  2.84837991e-01  3.54526877e-01 -1.58870518e-01
 -2.30406627e-01 -1.58198380e+00  1.06852956e-01 -9.64870974e-02
 -3.75521965e-02 -1.89271808e-01  4.76339161e-02  5.47952615e-02
  2.31212705e-01  4.61730

BERT embedding:   2%|▏         | 2/100 [00:00<00:23,  4.10it/s]

[ 1.52222663e-01  2.21916921e-02 -2.22619787e-01  8.24609622e-02
  6.45134002e-02  1.33856207e-01  3.54676158e-03  1.07872128e-01
  6.46877214e-02  6.60106242e-02  1.26463413e-01  2.49446034e-02
 -2.38109976e-01  1.11806616e-01 -6.24485970e-01 -1.80101618e-02
 -1.60940722e-01  1.04811221e-01  7.70218968e-02  3.55221070e-02
  6.78519160e-02  1.16980769e-01 -3.92106734e-02  4.55316249e-03
 -7.88965523e-02 -2.05646409e-03  1.71804041e-01 -2.76110116e-02
  9.27924737e-02  6.33526891e-02  8.62104818e-03  1.09187856e-01
 -4.75916825e-03 -2.56632809e-02  2.14557424e-02 -1.38666779e-01
 -2.39921331e+00  1.32561043e-01 -1.10253215e-01  7.68001676e-02
 -1.61837161e-01 -7.52094015e-02 -4.81705703e-02  5.28109223e-02
  5.99355064e-02  1.25331521e+00 -1.40070580e-02 -5.65048084e-02
  1.55791938e+00 -3.53224911e-02  7.04019591e-02 -2.84587085e-01
 -9.95715633e-02 -1.68227422e+00  6.06169440e-02  1.16243750e-01
 -6.57283440e-02  6.39188215e-02 -7.10476637e-02  3.12864631e-01
  5.09016551e-02  1.21198

BERT embedding:   3%|▎         | 3/100 [00:00<00:25,  3.83it/s]

[-4.90286089e-02 -7.12705925e-02 -2.91569769e-01 -7.14098364e-02
  1.58406347e-01  1.98838219e-01 -3.62391993e-02  1.28615007e-01
  9.58592892e-02 -1.36573613e-01  5.62687665e-02 -7.03411847e-02
 -1.19068846e-01  1.97102260e-02 -6.42565310e-01  1.52878970e-01
 -1.02004327e-01 -8.41212571e-02  6.87381923e-02  1.86892413e-02
  1.37727469e-01  6.11366145e-02  1.55458022e-02 -6.54494911e-02
 -6.46304116e-02 -1.15015894e-01  2.06933334e-01  1.02020018e-01
  1.63891643e-01  1.37953058e-01  3.29205208e-02  2.37729214e-02
  9.96763334e-02 -2.00961810e-02  1.13841653e-01 -4.74318340e-02
 -2.26618099e+00  1.42809927e-01  6.94645867e-02  1.35665908e-01
 -1.56768009e-01 -1.08798355e-01  7.08314925e-02  1.57913938e-01
 -1.01415701e-01  1.08683181e+00  1.71275526e-01  7.45202526e-02
  1.41253865e+00  3.63668390e-02  9.48909223e-02 -2.90183932e-01
 -1.50720000e-01 -1.78442848e+00  1.00230716e-01  2.86864396e-02
 -4.81061600e-02 -1.26477415e-02  7.06732422e-02  1.47356942e-01
  1.04307078e-01  1.25647

BERT embedding:   4%|▍         | 4/100 [00:01<00:24,  3.85it/s]

[-3.56081314e-02 -3.89465019e-02 -5.78786582e-02  1.20373704e-01
  1.81436300e-01 -2.05572080e-02 -5.40792290e-03  3.23347449e-02
  1.97501406e-02  1.48452356e-01  7.14749917e-02  1.22648301e-02
 -2.19910488e-01  1.70307234e-02 -5.04166365e-01 -1.23067617e-01
 -1.10866591e-01  2.50169575e-01 -1.00920744e-01 -2.09068321e-02
  1.17484622e-01  1.54130757e-01 -1.85934082e-01 -1.06514450e-02
 -1.12165697e-01 -2.47007251e-01  7.38733187e-02  2.13850513e-01
  1.07163683e-01  7.13787694e-03  3.91328074e-02  3.95122953e-02
  8.90500173e-02 -3.00556496e-02 -4.17724214e-02  6.22699298e-02
 -2.32459188e+00  3.08764447e-02 -7.67922550e-02 -1.73848569e-02
 -2.57460266e-01 -5.52255102e-02 -6.50216118e-02 -5.86933345e-02
 -1.29193828e-01  1.24871731e+00 -8.66878703e-02  7.57374763e-02
  1.55067313e+00  5.87439351e-03  6.29425347e-02 -2.09625483e-01
 -2.43947282e-02 -1.73253155e+00 -2.75338488e-03 -6.82040080e-02
 -1.52192935e-01  1.72400866e-02 -8.61038938e-02  3.49055901e-02
  1.95050444e-02 -7.88503

BERT embedding:   6%|▌         | 6/100 [00:01<00:23,  4.07it/s]

[ 8.62441063e-02  1.64442416e-02  1.09729908e-01 -1.24847770e-01
 -1.80892453e-01  8.07212666e-02 -2.36919373e-01 -1.67236984e-01
  1.38486475e-01  4.27974969e-01 -1.30712450e-01 -1.00933552e-01
  2.91479519e-03  4.56371680e-02 -5.65060079e-01 -3.98727953e-01
  6.79372028e-02 -1.53474405e-01 -2.29483470e-01  2.66997635e-01
  7.18451291e-02  3.31136405e-01 -5.50232716e-02  4.31013346e-01
  1.07316047e-01  2.05328986e-02  3.99766900e-02  4.57718819e-01
 -1.53421000e-01  6.89294422e-03  3.50862861e-01  2.95554847e-01
  2.76554316e-01  3.89526606e-01  5.30399643e-02 -3.77681404e-01
 -1.99004436e+00  1.04789093e-01 -1.86210290e-01  3.99874479e-01
 -1.56136230e-01 -1.53241977e-01 -2.39393786e-01 -1.33598670e-01
 -1.42446190e-01  8.15763474e-01  4.19184417e-01  1.77937627e-01
  1.03423929e+00 -1.95483580e-01 -2.09875390e-01  4.79019284e-02
 -2.33641729e-01 -1.68208313e+00 -5.24635613e-02  4.79025878e-02
 -4.44271751e-02  1.53172627e-01 -2.72873700e-01  1.15448788e-01
 -2.96725277e-02 -1.47071

BERT embedding:   7%|▋         | 7/100 [00:01<00:24,  3.80it/s]

[ 1.51299611e-01  1.22513041e-01 -2.21543819e-01  5.59492148e-02
  1.51134983e-01  4.01683673e-02  8.11885018e-03 -3.77548076e-02
  1.18898496e-01  1.81121856e-01  2.68351495e-01 -1.28380224e-01
 -1.24426201e-01 -1.69841200e-01 -5.32639265e-01  7.39845410e-02
 -5.47478683e-02  3.11536286e-02 -4.04832773e-02  3.56997102e-02
  6.50687516e-02  1.12762526e-01 -2.04703599e-01  6.80188537e-02
 -8.68537948e-02  1.34685030e-02  1.17035404e-01  6.95100874e-02
  6.50860965e-02  2.14473512e-02  2.93653063e-03  1.49680451e-01
 -2.15632897e-02 -2.15888973e-02  2.56103929e-02 -8.91618058e-02
 -2.20564222e+00  4.25329693e-02 -4.68075424e-02  1.01198003e-01
 -1.51062325e-01  4.15930450e-02 -1.36497160e-02  1.11142593e-02
 -1.21725239e-02  1.20669496e+00 -8.05366635e-02  1.26555890e-01
  1.44952834e+00 -1.64583027e-01  2.19933242e-02 -1.59481362e-01
 -2.13444620e-01 -1.58950186e+00  2.47640342e-01  1.91394255e-01
  3.99454720e-02 -5.77728711e-02  1.80833694e-02  1.47207141e-01
  1.57535896e-01  5.58571

BERT embedding:   8%|▊         | 8/100 [00:02<00:24,  3.77it/s]

[ 1.96601495e-01 -1.51098087e-01 -1.54341027e-01 -1.39343822e-02
 -1.42764905e-02  4.49215900e-03  1.70277506e-01 -2.44918503e-02
  2.26424653e-02  3.35357711e-02  4.27894369e-02  9.37927663e-02
 -4.30019759e-02  5.16194366e-02 -4.40804422e-01 -1.69987410e-01
 -6.65995628e-02  1.72379658e-01 -1.07347243e-01  4.77784388e-02
  1.61038205e-01  2.79162288e-01 -2.54700184e-01  8.01465958e-02
  1.03019573e-01 -5.30796722e-02  3.51148807e-02  2.46580020e-01
  1.48110479e-01 -1.09722614e-01 -2.48981223e-01  1.98376060e-01
  1.37069881e-01  1.55353115e-03  6.11105040e-02 -1.42576858e-01
 -2.30034566e+00 -1.02521610e-02 -3.33463430e-01  1.39383003e-01
 -9.18910503e-02 -3.18782851e-02 -1.24172300e-01  8.32980201e-02
 -5.02849296e-02  1.26633263e+00 -9.70833078e-02  1.04612827e-01
  1.50699008e+00 -7.79262632e-02  9.19292271e-02 -2.07030416e-01
 -1.14985213e-01 -1.73027539e+00  1.97304711e-01 -1.39830425e-01
 -7.60426372e-02 -4.66452353e-02  3.31769610e-04  1.54575244e-01
  1.06034219e-01 -1.80415

BERT embedding:   9%|▉         | 9/100 [00:02<00:26,  3.45it/s]

[ 6.55746982e-02  8.72986317e-02  6.00154549e-02  1.12620324e-01
  1.75570339e-01  7.98182711e-02 -6.03641197e-02  3.26592103e-02
  1.67901248e-01  1.10942967e-01  3.37086283e-02  1.65958819e-03
 -2.39909992e-01  7.37006366e-02 -4.61416483e-01 -1.73222870e-01
 -1.52599469e-01  5.57754152e-02 -4.46788631e-02 -2.92435731e-03
 -3.92242707e-02  5.05448319e-02 -1.31524831e-01  7.53695145e-02
  5.62955663e-02 -1.47653505e-01  1.70997739e-01  1.56016693e-01
  1.75960422e-01  7.03800768e-02 -1.00476597e-03 -2.01854971e-03
  2.47108582e-02  1.42147008e-03  6.91997912e-03  2.18155757e-02
 -2.27880859e+00  1.29078016e-01 -1.63915798e-01  6.59813434e-02
 -2.48610452e-01 -1.23614468e-01 -4.56110165e-02 -7.80040026e-02
 -1.51969239e-01  1.25698781e+00 -4.85818982e-02  9.42853168e-02
  1.45223236e+00  3.30045298e-02  3.22285257e-02 -1.69426158e-01
 -1.12447895e-01 -1.73219860e+00 -1.09766854e-03 -5.18012084e-02
 -1.09462187e-01  2.05464199e-01 -1.01505920e-01  1.38188571e-01
 -4.14098129e-02  8.32299

BERT embedding:  10%|█         | 10/100 [00:02<00:26,  3.40it/s]

[-9.39308703e-02 -1.43008664e-01 -3.14626187e-01 -5.09136207e-02
  1.99646175e-01  1.42492382e-02  2.16099441e-01 -3.44957374e-02
 -1.26155138e-01  2.89803118e-01  1.66297644e-01 -1.31831199e-01
 -1.58774838e-01  8.70318338e-02 -5.23145676e-01 -4.30642068e-01
  7.18342289e-02  1.86002389e-01  7.02929199e-02  2.60428250e-01
  4.13191766e-02  2.61355847e-01 -1.06236301e-02  1.14802234e-01
 -7.61620104e-02 -9.72632617e-02 -1.17503725e-01  2.55309820e-01
 -3.54758762e-02  9.79914367e-02  2.94583663e-02  2.30467349e-01
  1.13038875e-01  3.19263637e-01  1.10344321e-01 -1.51686192e-01
 -1.84602451e+00 -1.04310296e-01 -8.68808404e-02  9.54252258e-02
 -4.90528524e-01 -1.13745339e-01 -2.93613225e-01  7.68131949e-03
  8.57640803e-02  1.19132042e+00  5.92030585e-02  3.82296801e-01
  1.02401054e+00 -2.68163264e-01 -3.21096964e-02 -5.29668145e-02
 -5.59082866e-01 -1.19271123e+00  5.21106012e-02  1.85005516e-02
  4.66134809e-02 -3.20987642e-01 -3.14873159e-01 -1.19300142e-01
  1.70295015e-01  2.47033

BERT embedding:  11%|█         | 11/100 [00:03<00:35,  2.49it/s]

[-1.20754138e-01  1.58449430e-02 -3.11804079e-02  9.30465311e-02
  2.61349648e-01 -6.17860556e-02 -1.34327814e-01  6.47514360e-03
 -1.51366442e-01 -2.05391403e-02  2.81741824e-02  2.72784173e-03
 -1.88641742e-01 -8.61190185e-02 -4.20352042e-01 -1.08965645e-02
  4.84686159e-02  2.18144253e-01 -1.41135855e-02  8.61638598e-03
  3.92963216e-02  1.30280301e-01  4.38954309e-02 -2.73140017e-02
 -8.58214125e-02 -9.89039242e-02  2.80808881e-02  2.10623130e-01
  1.33006230e-01  8.64469707e-02 -6.96605071e-02  1.27649559e-02
 -4.65502478e-02  2.26806081e-03  5.34444824e-02  1.01325214e-01
 -2.16395807e+00 -1.50663227e-01 -8.71283561e-02  1.26967892e-01
 -2.70739675e-01 -7.09320456e-02 -1.10376857e-01  6.81204200e-02
 -5.49487919e-02  1.29407799e+00 -2.09129646e-01  1.32601574e-01
  1.59699237e+00  2.82089114e-01  1.76987961e-01 -2.11312056e-01
 -2.53941834e-01 -1.68705654e+00  2.12691054e-02  2.34603971e-01
 -6.01300970e-02  1.01719804e-01 -3.48192602e-02 -9.70198438e-02
 -3.58590260e-02  4.84533

BERT embedding:  14%|█▍        | 14/100 [00:03<00:21,  3.95it/s]

[ 8.53658095e-02  1.57487899e-01 -1.33936495e-01 -2.95302160e-02
  1.55668497e-01  6.19729608e-02 -1.66881830e-02  4.47132364e-02
  1.15775630e-01  1.07538797e-01  8.30231160e-02 -1.20803148e-01
 -2.90939528e-02 -5.56573682e-02 -4.99432772e-01  2.60058679e-02
 -4.02510352e-02  1.08594775e-01 -4.56105620e-02 -3.34398076e-02
  2.68889852e-02  1.22150734e-01 -9.04477686e-02 -2.11562570e-02
 -8.44217185e-03 -1.52878746e-01  6.41335640e-03  2.50788331e-01
  1.21860787e-01 -3.10785379e-02 -9.41186026e-02  5.95570952e-02
  7.18390420e-02 -7.80740604e-02 -4.21333080e-03 -9.03979614e-02
 -2.24032617e+00 -3.14923301e-02 -1.31689876e-01  1.01007663e-01
 -2.25707307e-01 -2.77954480e-03 -8.11252519e-02  3.50579880e-02
  4.20125574e-02  1.33630252e+00 -1.42739534e-01  1.14951856e-01
  1.55921638e+00 -1.06043480e-01  4.15884703e-02 -1.78708136e-01
 -2.45654151e-01 -1.67757523e+00  2.35738143e-01  3.76743823e-02
 -1.50118377e-02 -3.10460087e-02  7.97979832e-02  7.91691840e-02
  1.09745830e-01 -3.04601

BERT embedding:  16%|█▌        | 16/100 [00:04<00:15,  5.25it/s]

[ 1.83019601e-03 -1.52235134e-02 -2.10958973e-01  1.34143634e-02
  2.76179135e-01 -1.50970994e-02 -4.98377308e-02  7.24189952e-02
  1.80903032e-01  7.67058432e-02  8.65529627e-02 -8.52606446e-02
 -1.02125399e-01  6.78843409e-02 -5.37086964e-01 -4.33559939e-02
 -2.20549852e-02  2.01736242e-01 -1.80372726e-02 -3.30767035e-02
  1.38956517e-01  1.46211609e-01 -1.81942075e-01  4.41948511e-02
 -3.89880389e-02 -5.53669445e-02  1.75323769e-01  1.83797136e-01
  1.85878024e-01  9.66656655e-02 -3.41775939e-02  1.01249397e-01
 -8.13335478e-02 -3.55526842e-02  9.52815339e-02  8.92563444e-03
 -2.29117775e+00 -1.14462143e-02 -9.98151675e-02  4.67588268e-02
 -2.11243168e-01 -3.33112404e-02 -7.62048811e-02 -5.26030697e-02
 -2.66524144e-02  1.26359177e+00 -1.01386279e-01  6.50013238e-02
  1.49618697e+00 -2.17567459e-02  7.05011636e-02 -1.87626913e-01
 -2.20532194e-01 -1.71689558e+00  9.15689617e-02  1.14287265e-01
  1.57823246e-02  1.30850952e-02  4.15704884e-02  7.74134994e-02
  7.58359656e-02  1.68892

BERT embedding:  20%|██        | 20/100 [00:04<00:10,  7.70it/s]

[ 3.15317176e-02  1.24052130e-01 -4.36999165e-02  9.63013768e-02
  2.04747155e-01  1.51076004e-01  2.93598492e-02 -2.89015770e-02
  1.64323896e-01  1.95631415e-01  7.17360750e-02 -1.44872651e-01
  2.77450681e-02 -4.77353446e-02 -5.99451303e-01 -1.09553924e-02
 -7.25584179e-02  1.95458233e-01  1.14750862e-01  4.25920524e-02
  1.93888266e-02  7.82777593e-02 -1.52683958e-01 -1.18997440e-01
 -1.09202594e-01 -1.32028475e-01  2.30010804e-02  1.08239211e-01
  1.44661248e-01 -4.40825475e-04 -6.50050193e-02  1.82594419e-01
  1.62948936e-01  1.89942606e-02 -2.17649322e-02 -1.55420154e-01
 -2.26621056e+00 -8.53123441e-02 -2.12224364e-01  4.27901000e-02
 -2.12660536e-01  9.91521329e-02 -1.22050948e-01 -5.65135181e-02
 -6.36781827e-02  1.21058047e+00 -5.94627783e-02  9.14132670e-02
  1.49277616e+00 -1.29177347e-01  1.21859200e-01 -1.52999833e-01
 -1.96691841e-01 -1.65928209e+00  1.61112517e-01  1.58716932e-01
 -7.91785568e-02 -1.64037466e-03  6.72665685e-02  1.75830632e-01
  2.39605177e-02  4.16397

BERT embedding:  22%|██▏       | 22/100 [00:04<00:10,  7.57it/s]

[ 4.95020717e-01  2.92882286e-02  5.75086713e-01 -8.79209042e-02
  1.71627507e-01  2.62006819e-01  1.60209373e-01 -1.75215870e-01
 -8.30229595e-02  2.81870514e-01 -1.22707680e-01  2.11133957e-01
  1.32177606e-01 -2.22155470e-02 -3.11450064e-01 -2.84743786e-01
 -2.64607221e-01  3.69336158e-01  1.15931213e-01 -4.41484414e-02
 -5.33628643e-01  6.45143017e-02  9.86182876e-03  4.89353314e-02
 -9.96019542e-02 -2.58144975e-01  1.06785856e-01 -7.03964978e-02
 -2.92909235e-01  1.55619591e-01  1.04049087e-01  1.14673458e-01
  1.60960108e-01  6.48147047e-01 -4.84426022e-02 -1.17932811e-01
 -1.95415032e+00 -2.62731045e-01 -1.18528813e-01 -9.72141325e-02
  8.27956945e-02  2.62975514e-01 -1.94393426e-01 -2.44408190e-01
 -5.33799902e-02  7.33968973e-01  3.96712534e-02  2.27839679e-01
  1.01183522e+00 -2.52587229e-01  4.13384587e-01 -3.11368048e-01
  1.73978463e-01 -1.47160697e+00  3.49779248e-01 -3.18124145e-01
  9.40652192e-02 -2.51021367e-02 -1.14120618e-01 -2.90719658e-01
  1.82604179e-01  1.99537

BERT embedding:  24%|██▍       | 24/100 [00:04<00:08,  8.58it/s]

[-3.22925709e-02  6.84120357e-02 -1.20950662e-01  6.19443730e-02
  7.20908716e-02  6.36223610e-03  3.17942686e-02 -1.59285273e-02
  1.28150433e-01  1.37035191e-01  8.45382288e-02 -6.44161627e-02
 -1.58363879e-01 -5.85847348e-02 -6.08843386e-01  5.77714741e-02
 -7.13949129e-02  1.59838721e-01 -2.87691504e-02 -5.74996062e-02
  1.38204202e-01  1.54592738e-01 -1.46305829e-01 -4.93607437e-03
 -8.75381902e-02 -8.31124932e-02  5.95239699e-02  1.29789472e-01
  2.34743536e-01  1.01351507e-01 -7.71039873e-02  1.25981390e-01
 -9.73768625e-03 -2.22668707e-01  1.10590056e-01 -7.35214502e-02
 -2.35414100e+00 -2.42530741e-03 -1.27965286e-01  1.19382195e-01
 -1.88962474e-01 -4.13819738e-02 -1.23992026e-01  1.16170952e-02
  6.74684644e-02  1.19070232e+00 -1.23953305e-01  1.40838221e-01
  1.63685262e+00  2.71292832e-02  7.40569681e-02 -3.20395410e-01
 -1.57379061e-01 -1.60766435e+00  1.47372425e-01  1.37840152e-01
  9.06633399e-03  2.10823026e-02 -2.96698045e-02  1.89713299e-01
  9.06222388e-02  1.06073

BERT embedding:  26%|██▌       | 26/100 [00:05<00:07,  9.44it/s]

[ 1.28693655e-02  7.84840435e-02 -1.34280577e-01  5.83214425e-02
  1.67234406e-01  1.16881631e-01 -2.42868550e-02 -1.14025794e-01
  2.13309646e-01  1.84165731e-01  3.84666026e-02  1.29233189e-02
 -8.63298327e-02  8.28774460e-03 -4.69433576e-01 -2.06743050e-02
 -1.08334146e-01  8.93023163e-02 -2.22345963e-02 -4.36346745e-03
  7.56068807e-03  3.29131894e-02 -1.66663870e-01  8.36540759e-03
 -7.92847108e-03 -1.91635653e-01  2.69384664e-02  1.04799688e-01
  5.84218614e-02  9.05951485e-02 -7.12498799e-02  1.57393411e-01
  7.82889947e-02  5.93054518e-02  1.17431723e-01 -5.42864650e-02
 -2.32275963e+00 -5.49157709e-02 -2.56428540e-01 -6.72061592e-02
 -1.01341575e-01 -5.03607988e-02  2.21366165e-04 -3.01756226e-02
 -4.98225056e-02  1.23521030e+00  4.59782183e-02 -4.40093456e-03
  1.58213949e+00 -1.01314381e-01  2.09825099e-01 -3.08743089e-01
 -8.85482803e-02 -1.73087943e+00  3.56446952e-01  5.81364632e-02
 -1.31173506e-02  6.45555835e-03 -6.94696158e-02  1.67377666e-01
  1.02832943e-01  1.69650

BERT embedding:  28%|██▊       | 28/100 [00:05<00:07,  9.04it/s]

[ 6.54410124e-02  6.94132447e-02 -2.32172862e-01  2.21605711e-02
  1.02554463e-01  2.41294757e-01 -1.56287625e-01  2.81009972e-02
  1.25040501e-01 -3.79650411e-03  1.26247391e-01  1.37984310e-03
 -1.56658739e-01 -5.70475636e-03 -4.13626730e-01 -5.75123578e-02
 -1.70833953e-02  1.98352188e-02 -3.97163928e-02 -3.78733501e-02
  1.50654227e-01  1.42643347e-01 -6.13379329e-02  9.70655531e-02
 -3.83817218e-02 -5.35463393e-02  2.01949269e-01  1.59379333e-01
  2.33368129e-01  5.71993878e-03 -2.48747896e-02  4.76761609e-02
  9.67318565e-02 -1.14493452e-01  4.93311062e-02 -4.24091890e-02
 -2.10524035e+00  2.70790011e-01 -1.09209545e-01  3.60267572e-02
 -1.54203609e-01 -5.18159196e-02 -3.15448642e-02 -1.28392074e-02
 -1.90169141e-01  1.14793503e+00  1.61331400e-01  1.12676144e-01
  1.32737076e+00  1.37367055e-01  2.14152671e-02 -3.07142794e-01
 -1.20883301e-01 -1.82264972e+00  5.74743338e-02  1.03753105e-01
 -1.12960555e-01 -9.33279097e-03  7.77959153e-02  3.09189428e-02
  1.61046907e-02  7.01808

BERT embedding:  33%|███▎      | 33/100 [00:05<00:06, 10.03it/s]

[-7.21578896e-02  9.06940028e-02 -1.05499275e-01  5.43869548e-02
  1.11400291e-01  9.98084843e-02  1.37971908e-01 -1.08883046e-02
  9.06370580e-02  1.42897680e-01  1.56140719e-02 -8.88407882e-03
 -1.50876656e-01 -4.80404533e-02 -4.71192449e-01 -2.76576988e-02
 -7.16689229e-02  1.08135335e-01  4.32704724e-02  8.16981196e-02
 -2.59943008e-02  9.91058797e-02 -1.31719649e-01 -1.90990809e-02
 -4.75105271e-02 -2.59927958e-01  3.61328274e-02  1.89480305e-01
  6.48232773e-02  5.27234050e-04 -4.30387035e-02  1.76196977e-01
  1.10910282e-01  5.10536060e-02  1.47418305e-01 -1.38431028e-01
 -2.26628590e+00 -1.28084153e-01 -2.04570353e-01 -8.26835111e-02
 -1.59284443e-01 -6.60008639e-02 -1.33353751e-02 -1.07782846e-02
 -5.87634407e-02  1.23609507e+00 -5.79492822e-02 -7.21481368e-02
  1.65017080e+00  3.11080310e-02  1.96375906e-01 -3.23757172e-01
 -1.12242453e-01 -1.72363722e+00  4.13171172e-01 -1.00861073e-01
 -1.08219475e-01 -6.78564906e-02 -1.18079968e-03  1.72017559e-01
  3.29340547e-02  1.90710

BERT embedding:  35%|███▌      | 35/100 [00:06<00:06, 10.14it/s]

[ 1.66888103e-01  1.36262089e-01 -4.18655910e-02  4.80188616e-03
  7.24633336e-02  1.12796053e-01 -1.46696106e-01  9.22713280e-02
  1.36374995e-01  2.29728326e-01 -1.45353928e-01  5.85327260e-02
 -1.60156012e-01  7.05969557e-02 -6.52101159e-01 -3.62839222e-01
  2.08972052e-01  1.13533270e-02  4.67055701e-02  1.36645824e-01
  3.05559754e-01  1.94622368e-01  7.86466226e-02  2.22469911e-01
 -4.50248979e-02 -3.64313811e-01  1.83446556e-01  4.21831101e-01
 -1.35677099e-01 -3.79615091e-02  1.32310197e-01  1.43330127e-01
  2.34072804e-01  2.36490920e-01  9.55804214e-02 -2.45879918e-01
 -2.12477684e+00  1.58518016e-01 -5.87336719e-02  3.28797221e-01
 -1.69440523e-01 -1.21857457e-01 -1.67368919e-01 -1.20573536e-01
 -6.65540388e-03  1.03444028e+00  8.96192193e-02  4.75380599e-01
  1.39369535e+00 -4.97326665e-02  1.29072815e-01 -8.35782662e-02
 -4.17177290e-01 -1.56092000e+00 -1.06426790e-01  2.01859251e-01
 -1.07461683e-01  1.36566616e-03 -2.04498291e-01  3.76818106e-02
 -8.42022430e-03 -1.82171

BERT embedding:  37%|███▋      | 37/100 [00:06<00:06,  9.90it/s]

[ 3.18333246e-02 -2.23550484e-01 -2.31150642e-01 -1.04551539e-01
 -2.68066406e-01 -2.70629227e-01 -3.10249906e-02  2.64580637e-01
  1.00248151e-01  2.75580913e-01  4.45175111e-01 -8.10014829e-02
 -1.30969435e-01  9.89390165e-03 -6.58088565e-01 -5.00735760e-01
  1.39501885e-01 -1.27444074e-01 -3.20292830e-01  4.53933299e-01
  3.42122704e-01  5.02155066e-01  1.40117168e-01  2.52583802e-01
  2.01682851e-01 -2.03559726e-01 -2.12260969e-02  1.78532273e-01
  2.25874752e-01  2.29867369e-01 -2.38140225e-02  7.52971828e-01
 -6.94573224e-02 -2.83478238e-02  2.33142870e-03 -2.23412633e-01
 -2.22599220e+00  6.30584836e-01 -1.81175381e-01 -8.30593780e-02
 -2.25764275e-01 -1.13447558e-03 -4.75238413e-01 -2.10180655e-01
 -6.95455447e-02  7.63050854e-01  4.89138991e-01  4.82983679e-01
  1.12471235e+00 -4.18784231e-01 -1.18426584e-01  1.82377040e-01
 -2.01859530e-02 -1.51843750e+00 -1.07860692e-01  3.68651479e-01
  3.81569296e-01 -2.06645310e-01 -1.52682930e-01  3.51710200e-01
  5.48161626e-01  6.76557

BERT embedding:  38%|███▊      | 38/100 [00:06<00:06,  9.20it/s]

[ 1.87679142e-01  2.12945029e-01  1.16825573e-01  3.26964222e-02
  1.93707217e-02 -9.46908724e-03 -1.77866936e-01 -3.70542589e-03
  2.32867494e-01 -3.41384974e-03  1.26686409e-01 -1.13947980e-01
 -2.64287561e-01 -2.85952426e-02 -6.39174223e-01 -2.33396441e-01
 -1.93621754e-03  1.40608206e-01  1.50892958e-01  5.27091995e-02
  1.25565499e-01  6.00468414e-03 -6.88411742e-02  8.67010653e-02
 -7.63341114e-02 -1.61026508e-01  1.74969718e-01  2.93893576e-01
  2.65580058e-01  1.16441466e-01  1.34736642e-01  6.76335469e-02
  1.61350340e-01 -2.36288637e-01  6.16283668e-03 -1.25110131e-02
 -2.19118690e+00  2.21330926e-01 -3.13102975e-02  1.60886511e-01
 -1.50207549e-01 -6.83221966e-02 -1.49416745e-01 -2.59145617e-01
 -4.83602583e-02  1.26946473e+00 -8.27232655e-03  1.82012483e-01
  1.54026556e+00  1.16403744e-01  1.27284810e-01 -1.28849313e-01
 -1.00311823e-01 -1.76136267e+00  1.50524139e-01 -1.46830333e-02
  2.07199845e-02 -3.83370183e-02  8.09816718e-02  2.41723701e-01
  1.37406215e-01  3.90772

BERT embedding:  42%|████▏     | 42/100 [00:06<00:07,  8.21it/s]

[-1.19205512e-01 -3.37981060e-02  3.17851424e-01  9.39951763e-02
  1.43767029e-01  5.10045029e-02 -2.09723532e-01 -2.74461154e-02
  3.37740421e-01 -1.29410960e-02  1.49414673e-01  7.46044666e-02
 -1.77971497e-01  1.01261042e-01 -4.92194682e-01 -5.19770741e-01
  9.59010795e-02  1.02300912e-01 -2.50707418e-01  8.91629048e-03
  7.70412534e-02  3.25332075e-01  2.19018713e-01  1.21162593e-01
 -1.01294845e-01 -7.20993355e-02  1.21338479e-01  4.21604902e-01
  1.10576473e-01  2.50369161e-01  2.71658093e-01  3.29382449e-01
  1.88112304e-01  4.25978214e-01 -2.78124977e-02  3.01839616e-02
 -1.79603255e+00  3.85149956e-01 -1.42551914e-01  1.96013048e-01
 -2.81167507e-01 -8.50679651e-02 -2.43239135e-01 -2.80464105e-02
 -6.21704981e-02  8.23086679e-01  1.38317555e-01  1.88007891e-01
  9.38919306e-01  5.89752309e-02  1.23877317e-01 -3.15628082e-01
 -4.65930402e-02 -1.45084262e+00  1.27304643e-01 -2.88680226e-01
 -7.68899918e-03  5.09754345e-02  7.67614599e-03  1.82421207e-01
 -7.72942007e-02  1.96979

BERT embedding:  44%|████▍     | 44/100 [00:07<00:06,  9.23it/s]

[-5.74927106e-02  1.72456995e-01 -2.00029671e-01 -5.35594625e-03
  2.42385596e-01  1.28000617e-01  4.10511196e-02  1.26236394e-01
  6.24606460e-02  6.16963245e-02  4.10910010e-01 -1.65658236e-01
 -2.09380865e-01 -5.06694503e-02 -6.63529336e-01 -2.97690064e-01
 -2.03868851e-01 -1.64916012e-02  7.81042948e-02  2.94141527e-02
  2.81642705e-01 -1.47340996e-02 -4.73965965e-02  5.26220463e-02
  7.86111131e-02 -1.60042629e-01  3.05805385e-01  2.09447965e-01
  7.21471757e-02  4.36377972e-02 -7.96059892e-02  2.54390180e-01
  2.90298439e-03  2.48144805e-01  1.36192083e-01 -1.98806763e-01
 -2.15382695e+00  2.38220379e-01 -1.44300312e-01  3.55724692e-01
 -2.47656330e-01 -1.07071064e-01 -1.52683869e-01 -5.13110543e-03
 -3.55157144e-02  1.02762377e+00  6.97320849e-02  4.47296143e-01
  1.16509557e+00  1.55420735e-01  1.54894665e-01 -1.91761911e-01
 -5.87839969e-02 -1.52605259e+00 -1.82928685e-02 -3.89677845e-02
  2.80906875e-02  5.38906604e-02 -9.19981822e-02  2.31113464e-01
  7.52701685e-02  6.50142

BERT embedding:  47%|████▋     | 47/100 [00:07<00:05,  9.23it/s]

[-2.25990906e-01 -6.36133477e-02 -1.86556950e-01  1.90655887e-02
  5.25079072e-01  1.21176325e-01  1.79689843e-02  1.96656764e-01
 -3.93500924e-02 -1.93296093e-02  1.10900164e-01  9.05490443e-02
  5.37586153e-01  1.34859338e-01 -9.29614782e-01 -1.44830793e-01
  1.96673676e-01 -2.75104046e-02  9.21566188e-02  7.84071386e-01
 -3.86127025e-01  3.79062682e-01  1.28920108e-01  1.82154000e-01
  4.01331559e-02  7.83674046e-02 -5.22242010e-01 -1.41204387e-01
 -4.40582961e-01 -1.10829018e-01  7.58722872e-02  6.38537109e-01
  7.09845126e-01  4.57564563e-01  3.34751159e-01 -1.11852281e-01
 -1.68315399e+00 -5.81082981e-03 -2.29443312e-01 -3.32088649e-01
 -2.10124001e-01 -1.11087051e-03  2.40028277e-01  2.66514480e-01
 -2.52973020e-01  4.38805282e-01  6.63228869e-01  3.26915324e-01
  4.21946943e-01 -3.35283041e-01  3.47804040e-01 -3.46112162e-01
  2.69011259e-01 -1.04727042e+00  5.87388992e-01 -3.09524715e-01
  3.84407461e-01 -4.12296891e-01 -1.58612788e-01  5.14774978e-01
  4.85208899e-01  7.82615

BERT embedding:  50%|█████     | 50/100 [00:07<00:05,  8.91it/s]

[-5.80685019e-01 -1.95144236e-01 -2.89382875e-01 -2.98941374e-01
 -1.08521394e-01  2.10850507e-01  1.08658187e-01  2.93599904e-01
 -3.25443089e-01  1.28350645e-01 -1.26009077e-01  3.27028990e-01
  7.06264079e-02 -8.82672817e-02 -6.73527658e-01 -4.03903306e-01
 -1.88080192e-01 -1.34839430e-01  2.11958662e-01  2.87429065e-01
 -1.32692363e-02 -2.97006834e-02  5.03664725e-02  2.11570770e-01
  1.88943222e-01 -2.38753527e-01 -4.30248797e-01  4.16176498e-01
  9.06766877e-02  4.22290474e-01  2.69131631e-01  5.20075917e-01
  2.88673252e-01  4.17713761e-01  5.14677502e-02  1.31090835e-01
 -1.80490768e+00 -1.25673816e-01  1.48997307e-01 -2.89958864e-02
 -6.27091329e-04  1.39995143e-01  5.28989792e-01  4.55201000e-01
 -3.91894281e-01  7.20725000e-01  3.98901284e-01  2.28291452e-01
  9.89531279e-01  5.54164499e-02  4.93566155e-01 -1.89781398e-01
  2.49844432e-01 -1.57371783e+00  5.32641113e-01  1.78667709e-01
 -1.98627532e-01  1.08517855e-01 -2.40688384e-01  2.63406605e-01
  2.34487191e-01 -7.92890

BERT embedding:  52%|█████▏    | 52/100 [00:07<00:05,  9.27it/s]

[ 1.15680568e-01  1.35741994e-01  1.94688320e-01  1.00564033e-01
  1.42316237e-01  5.75431734e-02 -6.69444576e-02 -4.88331541e-02
  1.16104245e-01  1.58263981e-01 -1.06242791e-01 -3.68129089e-02
 -5.69926687e-02 -4.22845222e-02 -5.19189477e-01 -2.14267056e-02
 -6.42906874e-02  1.01092853e-01  1.03056699e-01  8.37745816e-02
  4.72978875e-02  2.36216616e-02 -1.70014232e-01  4.34944034e-03
 -5.38554080e-02 -1.30453035e-01  7.68892765e-02  2.66705364e-01
  8.86603817e-02  7.43601937e-04 -3.34038176e-02  6.27688617e-02
  1.05165727e-01 -2.46052053e-02 -5.49313724e-02 -1.38511941e-01
 -2.27297187e+00  9.83626465e-04 -2.13292956e-01  1.06695607e-01
 -2.08661914e-01 -2.55946796e-02 -1.94701359e-01 -1.47818252e-02
 -1.60883904e-01  1.30463600e+00 -1.64356157e-01  5.02243862e-02
  1.59296000e+00 -1.83791429e-01  1.74183488e-01 -2.56569654e-01
 -6.65266067e-02 -1.68768287e+00  1.73917904e-01  3.67716812e-02
 -9.39426012e-04  1.65794871e-03 -2.20619421e-02  2.12252706e-01
 -1.43783227e-01  2.61654

BERT embedding:  56%|█████▌    | 56/100 [00:08<00:04, 10.58it/s]

[ 1.04934655e-01  1.24558404e-01 -2.22302169e-01 -2.82620806e-02
  1.42290577e-01  7.52011389e-02 -6.02405854e-02  9.66130421e-02
  2.96193212e-02  1.59264877e-02  5.26463054e-02  4.40342277e-02
 -2.48053014e-01 -3.80289566e-05 -4.94607687e-01 -1.11752182e-01
 -1.57529321e-02  1.98709667e-01  7.68638253e-02  5.58560863e-02
 -5.05349273e-03  1.27357334e-01 -2.77123582e-02  4.53528296e-03
  1.54625094e-02 -3.94516587e-02  8.21187571e-02  9.69039053e-02
  1.94833666e-01 -3.90629135e-02 -5.58019094e-02  1.38264984e-01
  2.57615168e-02 -2.12030876e-02  3.70517410e-02  1.27480123e-02
 -2.15666509e+00  6.85463250e-02 -2.17500314e-01  1.02960527e-01
 -2.61550635e-01 -2.86804345e-02 -1.11863285e-01  5.95407709e-02
 -1.70193601e-03  1.23559976e+00 -8.99578556e-02  1.51631907e-01
  1.58811653e+00  3.41227390e-02  7.66501129e-02 -2.84582555e-01
 -1.02054447e-01 -1.72445345e+00  1.49423108e-01  1.84072345e-01
 -9.83353332e-02  6.81257471e-02  3.60452570e-02  4.21185233e-02
  2.52486467e-02  6.55040

BERT embedding:  58%|█████▊    | 58/100 [00:08<00:03, 11.04it/s]

[ 4.61594947e-02  2.04469655e-02  4.90956157e-02  1.09824039e-01
  1.24541767e-01 -4.89229560e-02 -3.83727364e-02  5.87671101e-02
  1.57767370e-01  2.27829203e-01 -2.04496868e-02 -5.38991252e-03
 -9.94402394e-02 -1.54058903e-01 -5.49497306e-01 -7.65052810e-02
 -7.89265037e-02  1.53218803e-03  1.26203522e-02  1.55585753e-02
 -8.81407596e-03 -8.96849111e-03 -1.03486411e-01  5.77145107e-02
  1.36875853e-01 -1.75148740e-01 -1.03773000e-02  3.21366131e-01
  3.41761224e-02 -2.99601369e-02 -5.93417510e-02  5.33992909e-02
  1.84608907e-01 -8.02922323e-02  1.17590958e-02 -1.71404421e-01
 -2.22977686e+00  8.99744127e-03 -1.83560655e-01  1.65239424e-01
 -2.18875751e-01  3.92055139e-02 -1.58583477e-01  9.64425504e-03
 -3.77057940e-02  1.23047793e+00 -1.87722653e-01  4.49525937e-02
  1.56419861e+00 -1.19137317e-01  8.74422565e-02 -1.58291116e-01
 -1.68243676e-01 -1.74435210e+00  2.33384624e-01 -5.34370542e-02
 -6.39861152e-02 -4.29333858e-02 -3.54178390e-03  9.57878828e-02
  4.91105318e-02  7.19432

BERT embedding:  60%|██████    | 60/100 [00:08<00:03, 11.09it/s]

[ 6.50314689e-02  6.18826412e-02 -2.99183071e-01 -5.51363416e-02
 -9.26191919e-03 -1.67370010e-02 -9.09702927e-02  2.02272967e-01
 -7.87297562e-02  1.32559940e-01  6.10281974e-02 -2.62294739e-01
 -3.28026682e-01  4.95203584e-02 -6.61597371e-01 -2.53727168e-01
 -4.30539325e-02 -1.50969326e-02 -8.98172334e-02  1.61918014e-01
  5.02778683e-03  1.43894061e-01  9.96699482e-02  1.90300241e-01
 -4.90764715e-03 -2.47182250e-01 -9.68285948e-02  2.73024261e-01
 -8.79102200e-02  3.48024629e-02  1.07248247e-01  2.10284278e-01
 -7.37592876e-02  1.13169529e-01  7.22365081e-02 -3.29168767e-01
 -2.14952302e+00  2.36076981e-01 -1.42719105e-01  2.32235894e-01
 -3.07902247e-01  3.29075158e-02 -1.57057121e-01 -2.93718934e-01
 -2.76497137e-02  1.12337935e+00  1.20494805e-01  2.07037076e-01
  1.20753789e+00  1.60314679e-01  1.25431582e-01 -3.01723093e-01
 -3.17047536e-01 -1.53004634e+00  3.21238823e-02  2.04628363e-01
 -8.48504826e-02 -7.37416446e-02 -2.34912097e-01  1.77188873e-01
  8.66052732e-02 -6.20891

BERT embedding:  64%|██████▍   | 64/100 [00:09<00:03, 10.72it/s]

[ 1.91511169e-01 -1.19897299e-01 -3.42403427e-02  1.69806719e-01
  1.09349109e-01 -3.83948721e-02 -8.16453844e-02  1.24529161e-01
 -2.35947698e-01 -7.35768229e-02  2.58491427e-01 -8.85629505e-02
 -9.98816714e-02  7.66246095e-02 -5.24071038e-01 -2.80276805e-01
 -2.03130215e-01  3.49292666e-01  2.93769818e-02  2.12999642e-01
 -2.02993035e-01  8.00160617e-02  1.62567627e-02  2.28629291e-01
  2.58208871e-01 -4.00117666e-01 -1.40602812e-01  3.13549727e-01
  1.37813419e-01  2.80512989e-01 -1.76932722e-01  3.63262564e-01
 -1.69185832e-01  1.64864093e-01  7.32448846e-02  2.94030327e-02
 -1.83433664e+00 -5.45358546e-02 -5.06781004e-02  1.10704981e-01
 -3.04582179e-01  1.02818524e-02 -2.66103297e-01 -2.79071897e-01
 -3.85715812e-01  8.95943105e-01  3.72213274e-01  4.55197394e-01
  7.57890880e-01 -2.26398021e-01  1.62730347e-02  1.36811361e-01
 -9.70469788e-02 -1.25810683e+00  8.90880153e-02 -3.18733826e-02
 -4.33167331e-02 -1.75191551e-01 -2.82624867e-02  3.59744532e-03
  1.08693108e-01  2.27179

BERT embedding:  66%|██████▌   | 66/100 [00:09<00:03, 10.38it/s]

[ 3.62642817e-02 -2.46180803e-01  1.75762951e-01 -3.68327349e-01
 -1.34626910e-01 -2.39204690e-01  8.78822058e-02  1.17267698e-01
 -1.06434852e-01  1.28137559e-01  3.56506824e-01 -1.06046587e-01
 -2.13575125e-01 -2.55254954e-01 -1.67260855e-01 -3.45424354e-01
 -2.40574971e-01 -3.59833948e-02 -1.75755844e-01  9.74769145e-02
 -3.92471403e-01  9.62452888e-02  2.74192303e-01  4.61799473e-01
  2.13497296e-01 -1.03163823e-01 -1.77498177e-01  1.96809366e-01
 -1.77063599e-01  3.69973958e-01 -4.45771754e-01  5.39032936e-01
  5.94185442e-02  2.90701807e-01  9.30693187e-03 -9.83303711e-02
 -2.04717040e+00 -1.08936831e-01 -1.63912511e-04 -1.71058983e-01
 -2.67026484e-01  1.60179753e-02 -1.91226706e-01 -1.96808144e-01
 -1.34077609e-01  1.24701023e+00  2.97001421e-01  2.70766288e-01
  9.11846697e-01 -2.59883821e-01  2.17954040e-01  5.63350171e-02
 -4.26771538e-03 -1.29741502e+00 -3.88063677e-02  2.11796761e-01
  2.78104484e-01 -8.86002928e-02 -2.77414113e-01 -3.69182564e-02
  2.87722498e-01  2.79161

BERT embedding:  68%|██████▊   | 68/100 [00:09<00:03,  9.51it/s]

[-8.97623152e-02 -2.56678760e-01 -1.05659500e-01 -2.27289483e-01
 -3.09162319e-01 -1.36881933e-01  9.46856961e-02  1.67689085e-01
  9.51477438e-02  2.39666551e-01  6.13655329e-01 -2.11313128e-01
 -7.11896345e-02 -1.17678247e-01 -5.12114048e-01 -3.45033884e-01
 -1.08736075e-01 -8.65714848e-02 -3.42185080e-01  6.69018567e-01
  3.74090374e-01  1.62846565e-01  2.97113806e-01  1.06046572e-02
 -5.12696691e-02 -2.26839036e-01 -3.58391076e-01  3.01317424e-01
 -1.61367208e-01  4.84791934e-01 -1.40267715e-01  6.38208210e-01
 -1.77879453e-01  9.41088330e-03 -1.01344325e-01 -2.58507252e-01
 -2.01632094e+00  2.04421982e-01  6.31927624e-02  1.24101080e-01
 -5.97802520e-01 -1.74729154e-01 -2.26777777e-01 -5.83358109e-02
 -2.51979887e-01  8.82970393e-01  2.29128391e-01  4.65575725e-01
  1.36117196e+00 -2.32835382e-01  2.84127630e-02 -1.36284500e-01
 -2.95433432e-01 -1.59332144e+00 -2.42337152e-01  1.86635345e-01
  3.31331819e-01  8.95119309e-02 -1.88613266e-01  2.20955551e-01
  5.04989922e-01  1.06863

BERT embedding:  71%|███████   | 71/100 [00:09<00:03,  9.55it/s]

[-4.57263380e-01 -8.36351421e-03 -3.48545909e-01 -2.45464090e-02
  3.91783774e-01  6.78003252e-01  1.43160433e-01  2.68809795e-02
 -2.49100439e-02 -3.37981135e-02  3.34318280e-01 -2.33219251e-01
  1.01306170e-01 -7.03489929e-02 -6.88483477e-01 -1.62348494e-01
 -2.15928584e-01 -2.66208619e-01 -1.56531781e-01  3.15260231e-01
 -9.33393389e-02 -8.70892033e-02 -5.65572381e-01 -2.20785458e-02
  7.13989884e-02 -3.38916063e-01 -1.59175530e-01  1.00804277e-01
  4.99715619e-02 -2.06364766e-01  1.69684112e-01  1.17240205e-01
  1.83100238e-01  4.12398905e-01  1.76265836e-01  7.27046505e-02
 -2.21726871e+00 -7.78164938e-02 -4.10302356e-02  7.68062696e-02
  9.09958314e-03 -2.28405640e-01  3.26622650e-02  4.79080603e-02
 -2.52691060e-01  9.40330803e-01  4.02272820e-01  3.30164760e-01
  1.30956829e+00  7.17287362e-02  5.23399949e-01  3.67742032e-02
  1.03318445e-01 -1.43379831e+00  5.26405752e-01 -2.15208381e-01
 -8.77380669e-02 -4.13676172e-01 -2.14338303e-01 -7.70546775e-03
  4.20926809e-02  4.28424

BERT embedding:  73%|███████▎  | 73/100 [00:09<00:02, 10.34it/s]

[ 7.80213103e-02  6.16795532e-02 -2.20875427e-01 -1.42367359e-03
  1.03414200e-01  7.47729689e-02 -2.85529476e-02  4.78354394e-02
  1.70738384e-01  1.35318935e-01  6.01486154e-02 -1.40552819e-01
 -1.29754096e-01 -4.99836914e-02 -4.89117652e-01  4.70806938e-03
 -9.65919569e-02  1.65598869e-01  2.48673540e-02 -7.64962733e-02
  6.07210677e-04  8.65999013e-02 -8.55243355e-02  4.75085303e-02
 -4.40001599e-02 -8.39783102e-02  6.02186359e-02  1.17149584e-01
  1.80053115e-01  5.08196168e-02 -7.46253058e-02  1.47512361e-01
  3.02904621e-02  1.95604563e-02  5.94412498e-02 -1.23076133e-01
 -2.28630829e+00  1.04550049e-02 -1.75402537e-01  5.94393462e-02
 -2.20539212e-01 -3.00293751e-02 -7.52148852e-02  2.47178357e-02
 -3.06213442e-02  1.20399892e+00 -8.66104662e-02  1.21036641e-01
  1.47786760e+00  2.62427833e-02  8.42614919e-02 -3.09476256e-01
 -1.28862649e-01 -1.71149588e+00  2.45623942e-02  1.29773095e-01
 -3.38484235e-02 -1.61220990e-02  1.68855228e-02  8.62817615e-02
  6.56061769e-02  8.70328

BERT embedding:  77%|███████▋  | 77/100 [00:10<00:02, 11.42it/s]

[ 1.92609012e-01  4.31807637e-02  7.38446340e-02  3.59186590e-01
  2.98838645e-01  2.21536428e-01 -2.18381044e-02 -8.45497567e-03
 -5.53210415e-02  4.92056049e-02  2.39767060e-02  2.08062619e-01
 -2.35191151e-01 -2.61447668e-01 -5.36704957e-01 -6.66012838e-02
  4.19760086e-02  1.25441909e-01 -8.82899091e-02  2.28084281e-01
  3.13777365e-02  9.00899991e-02 -1.20297328e-01 -1.86669882e-02
 -6.96125068e-03 -5.48476540e-02 -1.13618337e-01  5.24952486e-02
 -4.34575789e-02 -5.07890657e-02  6.25991374e-02  1.15400024e-01
 -4.99182753e-02  8.76501277e-02  3.09922218e-01 -8.93961862e-02
 -2.16793203e+00 -3.54842655e-03 -1.85233504e-01  1.57372206e-01
 -3.16350102e-01 -1.80095047e-01  4.02877182e-02 -2.00423244e-02
 -1.64848626e-01  1.28864002e+00 -4.30560373e-02  6.25184104e-02
  1.47084010e+00 -1.72746986e-01  9.60913152e-02 -2.70736933e-01
 -1.88064858e-01 -1.68694222e+00  1.76572233e-01 -9.87048149e-02
 -2.12411955e-01 -7.06364885e-02  1.24441959e-01  9.57673714e-02
 -2.51779616e-01  6.20777

BERT embedding:  79%|███████▉  | 79/100 [00:10<00:01, 11.07it/s]

[ 6.67439178e-02  1.38275161e-01  2.08230346e-01  1.62453130e-01
  2.40307078e-01  8.99555460e-02 -9.49260592e-03 -5.41630276e-02
  1.99244678e-01  7.48639181e-02  7.97530785e-02 -3.86452004e-02
 -1.21564947e-01  1.31850978e-02 -5.23567677e-01 -2.73542702e-02
 -1.51069596e-01  2.14072809e-01  1.12856608e-02  4.14370373e-02
  5.22897802e-02  4.06713560e-02 -6.24609776e-02 -7.50688165e-02
  5.04649337e-03 -9.02176574e-02 -2.04206863e-03  2.38591716e-01
  5.51157519e-02 -9.28044170e-02  9.09148306e-02  7.04278722e-02
  1.52342856e-01 -5.77644398e-03  1.03927851e-01 -5.90658188e-02
 -2.34646130e+00 -3.18112634e-02 -7.08342567e-02  1.75558299e-01
 -1.99754164e-01 -2.79079080e-01 -1.35074764e-01 -1.03613026e-01
 -1.51693270e-01  1.27570426e+00 -1.17567033e-01  9.61731896e-02
  1.55727768e+00 -1.84488758e-01  1.84485927e-01 -1.63132355e-01
 -1.20181270e-01 -1.63733840e+00  1.74025372e-01 -8.15789774e-02
 -1.45966753e-01  1.36613205e-01 -1.31628260e-01  2.30581388e-01
  2.89349686e-02 -2.98644

BERT embedding:  83%|████████▎ | 83/100 [00:10<00:01, 10.01it/s]

[-4.08687115e-01  1.08449392e-01  1.94871910e-02 -2.63536870e-02
  8.40970695e-01  9.20839846e-01 -1.76149487e-01 -1.68979451e-01
  2.41879001e-01 -2.06627101e-01  1.03758529e-01 -2.04117984e-01
 -1.08921714e-01 -2.38330618e-01 -3.79236728e-01  1.57189056e-01
  1.47463366e-01 -2.90926307e-01  3.07560004e-02  4.98586476e-01
  1.12762321e-02  1.04095288e-01 -7.02332199e-01 -1.90246120e-01
 -1.08904153e-01 -1.44888327e-01  7.71386698e-02 -3.30032319e-01
 -5.93635477e-02  4.07760292e-02  3.73084068e-01  3.55432898e-01
  2.21561849e-01  3.74254167e-01  1.41514286e-01  3.68160993e-01
 -1.63794601e+00  1.88459493e-02 -1.31848559e-01  4.16956842e-02
 -2.29594931e-02 -3.00429948e-02  7.14085817e-01 -1.51532531e-01
 -6.42804801e-01  9.52144742e-01  6.88524663e-01  3.97856414e-01
  1.16406560e+00 -3.84767652e-01 -1.75193176e-01 -9.04187188e-02
 -1.76005125e-01 -1.43031263e+00  3.43206018e-01 -9.26198736e-02
 -3.04623306e-01  1.18502185e-01  2.02145010e-01  3.18045825e-01
  1.71810150e-01  7.33930

BERT embedding:  85%|████████▌ | 85/100 [00:11<00:01,  9.14it/s]

[-8.82309824e-02 -5.13067953e-02  7.35991374e-02 -2.24949978e-02
  1.20248638e-01  6.00307621e-02  3.54305543e-02  2.28559762e-01
 -4.23400551e-02 -1.07580811e-01  1.74930483e-01  4.65366151e-03
 -4.75552529e-02  2.03922749e-01 -6.49195910e-01 -6.68251142e-02
 -3.32927480e-02  4.39647228e-01  1.92491747e-02  5.18760681e-01
  1.12446047e-01  2.11716309e-01  5.22608519e-01 -3.13162021e-02
 -2.36765489e-01 -3.63899618e-01 -5.59115075e-02  3.09005648e-01
 -7.56936055e-03  2.68429965e-02  1.78898409e-01  2.66154230e-01
 -8.17877501e-02  1.47371814e-01  2.44813651e-01 -1.63947374e-01
 -2.22971106e+00  1.80492297e-01 -5.73668778e-02  2.12879449e-01
 -3.18395883e-01 -3.88516366e-01 -1.52135834e-01 -3.89635503e-01
  1.33227454e-02  1.04212880e+00  5.59300976e-03  2.04834253e-01
  1.25584650e+00  1.27169311e-01  1.76604018e-01 -5.57975709e-01
 -2.05551162e-01 -1.52505243e+00 -7.12911710e-02 -3.43554616e-02
  2.42298022e-01  2.99077965e-02 -1.88757449e-01  3.89191896e-01
  3.42000872e-01  1.85822

BERT embedding:  88%|████████▊ | 88/100 [00:11<00:01,  9.71it/s]

[ 1.43397778e-01 -6.51420094e-03 -6.51940182e-02 -8.17006901e-02
  1.36242479e-01 -3.13982606e-01  3.94148119e-02  1.80850461e-01
 -4.96216491e-02  4.63941693e-01  2.75244534e-01 -3.24815601e-01
 -1.19818747e-01  2.71747690e-02 -4.71194506e-01 -4.50393468e-01
  2.93352306e-01  6.23322167e-02 -1.75143376e-01  1.18192390e-01
  3.70063841e-01  2.83843547e-01 -1.00350350e-01 -7.20019862e-02
 -1.46076962e-01 -5.24410717e-02  2.87507236e-01  3.27764571e-01
 -3.22827175e-02  1.11928977e-01  2.40252674e-01  4.39314574e-01
 -4.36555408e-02  3.53608757e-01 -3.71957012e-02 -8.16952437e-02
 -1.93404937e+00  1.27144396e-01  5.88956997e-02  3.78018320e-01
 -3.89914781e-01  1.69585451e-01 -1.01742424e-01 -1.28553808e-01
  3.04795206e-01  1.10664344e+00  1.34685427e-01  2.24972025e-01
  1.08386052e+00 -2.38703609e-01 -2.87782867e-02 -9.05334428e-02
 -3.35700205e-03 -1.17702365e+00 -1.78936534e-02  3.51176918e-01
  4.66042995e-01 -3.36303443e-01 -1.45231426e-01 -1.77271143e-01
  2.13278159e-01  3.20316

BERT embedding:  91%|█████████ | 91/100 [00:11<00:00, 10.21it/s]

[-1.56362370e-01  1.42602911e-02 -1.33629620e-01  1.05213568e-01
  1.11377709e-01  1.22887738e-01 -7.17128739e-02 -2.33124588e-02
  9.96500477e-02 -2.52209082e-02  7.11685568e-02  1.34399682e-01
 -1.27179444e-01 -5.95851131e-02 -4.48448807e-01  7.59585500e-02
 -9.43384692e-02  1.39694363e-01  1.17431037e-01  8.50803778e-02
 -7.53000230e-02  8.10595304e-02 -2.43340731e-01  1.66612968e-01
  1.12387654e-03 -2.33929574e-01  8.78681242e-02 -6.06060177e-02
  4.96132635e-02  1.90140992e-01 -1.15380876e-01  6.46108612e-02
  1.16873898e-01 -4.34277691e-02  1.86326385e-01  4.20583263e-02
 -2.22524738e+00  6.39446452e-02 -1.78982660e-01  7.93531686e-02
  5.86018637e-02  2.94629224e-02 -2.27907412e-02  4.77674510e-03
 -1.20924480e-01  1.09804857e+00 -8.39513615e-02 -3.99839245e-02
  1.64568412e+00  1.65908843e-01  2.56315440e-01 -4.26930606e-01
  1.19552828e-01 -1.62369323e+00  3.37275416e-01  1.22245075e-02
  1.39596015e-02 -1.09420702e-01 -1.15487650e-01  1.49806544e-01
  4.81845289e-02  2.78824

BERT embedding:  93%|█████████▎| 93/100 [00:11<00:00, 10.36it/s]

[-1.54664919e-01 -8.15777406e-02 -3.43839347e-01  1.53560070e-02
  1.35311276e-01  6.94623664e-02  3.17816854e-01  3.43811959e-02
  2.43280772e-02  4.96594720e-02  1.49435222e-01 -1.49056306e-02
 -8.84002075e-02 -3.17775682e-02 -6.23482406e-01  3.93970944e-02
 -6.41676262e-02  2.22637713e-01  1.69914477e-02  8.29155836e-03
 -1.58228040e-01  5.22499643e-02 -5.69473915e-02  1.19157992e-01
 -3.48619297e-02 -3.14121604e-01  3.90000269e-02 -1.82542205e-02
  1.35587878e-03 -3.27146389e-02 -1.88649565e-01  2.61683166e-01
  1.66693479e-01 -4.88142855e-02  2.31253907e-01 -1.79925617e-02
 -2.30767798e+00  2.31390782e-02 -2.96723723e-01 -5.07962368e-02
 -1.10379368e-01 -1.11284815e-02  2.85570268e-02  2.47020926e-02
 -3.67202237e-02  1.31567442e+00  2.44881921e-02 -9.30386875e-03
  1.59126937e+00  1.24634683e-01  1.87678084e-01 -3.90327930e-01
  9.34801698e-02 -1.68752027e+00  2.86653996e-01  3.54186073e-02
 -1.38083771e-02 -1.08195819e-01 -1.08987302e-01  2.13134319e-01
  1.01709485e-01  3.81340

BERT embedding:  97%|█████████▋| 97/100 [00:12<00:00, 11.35it/s]

[ 1.94320176e-02  3.73917595e-02 -8.45735967e-02  7.00055584e-02
  1.15316883e-01  3.89920808e-02  4.24215682e-02  5.57607785e-02
  8.28695074e-02  5.56685328e-02  7.99198821e-03 -8.29287544e-02
 -2.64132321e-01 -1.30123213e-01 -4.87074584e-01 -1.06284264e-02
 -7.27921426e-02  1.56113103e-01 -1.88769400e-02  8.79640803e-02
  1.77483149e-02  1.81718573e-01 -2.81953871e-01  1.20155282e-01
 -3.13903876e-02 -1.90062582e-01  1.32555813e-01  6.11832067e-02
  1.75885841e-01 -3.40617709e-02 -9.17869955e-02  9.91219357e-02
  6.87464699e-02  1.55690429e-03  1.34318545e-01 -1.49948327e-02
 -2.28936696e+00 -1.27254784e-01 -1.12532675e-01 -8.17173719e-02
 -6.26741648e-02 -9.25755724e-02  5.11702374e-02  3.17989178e-02
 -6.59224838e-02  1.22931385e+00  4.88953143e-02  9.43285376e-02
  1.49865830e+00  7.51651525e-02  2.70622075e-01 -2.38952890e-01
  1.05424613e-01 -1.60613573e+00  2.26192549e-01  1.87952325e-01
 -1.81047022e-02 -1.05943330e-01 -2.93626431e-02  3.31350178e-01
 -2.06529275e-02  2.58638

BERT embedding: 100%|██████████| 100/100 [00:12<00:00,  7.93it/s]

[ 1.93779841e-02  1.17270947e-01 -1.75401866e-01  9.32560116e-02
  2.91016281e-01 -1.91542655e-02 -6.57549873e-02  5.05437329e-02
 -1.41293528e-02  8.88606235e-02  2.27228016e-01 -8.12276527e-02
 -1.63747165e-02 -7.13649765e-02 -5.39346814e-01 -1.33068878e-02
 -6.55205101e-02  1.43137306e-01 -7.96240754e-03 -6.15148507e-02
  1.45328104e-01  4.69377730e-03 -1.97788149e-01  7.93830976e-02
 -1.38501078e-01 -1.58913657e-02  1.94038779e-01  8.14039260e-02
  1.01577565e-01  1.16251446e-01  6.21481538e-02  1.61378488e-01
 -7.97054265e-03  1.15752794e-01  1.03890318e-02 -8.90445188e-02
 -2.21344829e+00 -3.53320949e-02 -1.54865980e-01  4.87229750e-02
 -2.73851514e-01 -1.12112230e-02 -1.28699645e-01 -2.75315847e-02
  5.05569465e-02  1.20026135e+00 -9.89563242e-02  1.98996902e-01
  1.46718657e+00  1.93949882e-02  4.89893034e-02 -1.55710980e-01
 -2.56098747e-01 -1.68516004e+00  5.56872003e-02  1.88003346e-01
 -9.18838829e-02 -7.76703060e-02  6.71038628e-02  4.17532586e-02
  8.32229778e-02  1.53345




In [None]:
pip install gensim python-Levenshtein

Collecting gensim
  Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting python-Levenshtein
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting numpy<2.0,>=1.18.5 (from gensim)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy<1.14.0,>=1.7.0 (from gensim)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting Levenshtein==0.27.1 (from python-Levenshtein)
  Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->py