In [1]:
import os
import re
import json
import warnings
from time import time
import random
from multiprocessing import Pool
from tqdm.auto import tqdm
import fasttext
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score
import csv
from gensim.utils import simple_preprocess

#### Основные настройки: seed, рабочий каталог.

In [2]:
SEED = 42
WORKDIR = '/home/ubuntu/gitrepo/KazanExpress/2/'
TEST_SIZE = 0.2

warnings.filterwarnings("ignore")
random.seed(SEED)
np.random.seed(seed=None) 

#### Преобразование входных данных

In [3]:
HTML_PATTERN = re.compile('<.*?>') 

def cleanhtml(raw_html):
    cleantext = re.sub(HTML_PATTERN, ' ', raw_html)
    return cleantext

def repr_field(s: object) -> str:
    s = cleanhtml(repr(s))
    s = s.replace('[', ' ').replace(']', ' ')
    s = s.replace('{', ' ').replace('}', ' ')
    s = s.replace("'", '')
    return s

def preprocess_text_field(field: str) -> str:
    field = json.loads(field)
    title = cleanhtml(str(field['title'])).replace('title', '')
    description = cleanhtml(str(field['description'])).replace('description', '')
    attributes = repr_field(field['attributes']).replace('attributes', '')
    custom_characteristics = repr_field(field['custom_characteristics']).replace('custom_characteristics', '')
    defined_characteristics = repr_field(field['defined_characteristics']).replace('defined_characteristics', '')
    filters = repr_field(field['filters']).replace('filters', '')
    return ". ".join([title, description, attributes, custom_characteristics, defined_characteristics, filters])

# Method for increasing the weight of the first words of title
def word_pyramid(string: str, min_n_words: int, max_n_words: int) -> list:
    result = []
    split = string.split(' ')
    for i in range(min_n_words, max_n_words+1):
        result += split[:i]
    return ' '.join(result)

# Predicting on a single input
def predict(document, model):
    return int(model.predict(document)[0][0][9:])


def mpredict(text):
    return int(predict(text, model))

In [4]:
# Read from parquet
data_full = pd.read_parquet(os.path.join(WORKDIR, 'row_data/train.parquet'))
# Drop unnecessary columns
data_full.drop(columns=['shop_id', 'rating'], inplace=True)
# Convert text fields
data_full.text_fields = data_full.text_fields.apply(preprocess_text_field)
# Convert "Sale"
data_full['sale'] = data_full['sale'].apply(lambda x: "Распродажа!" if x else "")  
data_full.fillna(value='', inplace=True)
# Concatenate to one string
data_full = data_full.assign(Document=[str(x) + '. ' + str(y) + '. ' + str(z) + '. ' + word_pyramid(x, 1, 7) + \
                    '. ' for x, y, z in zip(data_full['text_fields'], data_full['shop_title'], data_full['sale'])])

data_full = data_full.drop(columns=['text_fields', 'shop_title', 'sale']).reset_index(drop=True)
# Drop "product_id" column - only for train
data_full.drop(columns=['product_id'], inplace=True)
# Drop too rare values
drop_ids = set(data_full.category_id.value_counts()[data_full.category_id.value_counts() < 2].index)
print(f'Dropped {len(drop_ids)/data_full.shape[0]:.5f}% rows')
data_full = data_full[~data_full['category_id'].isin(drop_ids)]
# Trait/test split
data, data_valid = train_test_split(data_full.copy(), test_size=TEST_SIZE, random_state=SEED, shuffle=True, 
                                    stratify=data_full.category_id)
data.reset_index(drop=True, inplace=True)
data_valid.reset_index(drop=True, inplace=True)
# del data_full

Dropped 0.00004% rows


Преобразуем данные в формат, принимаемый FastText.

In [5]:
# Gensim simple preprocessing
data.Document = data.Document.apply(lambda x: ' '.join(simple_preprocess(x)))
data_valid.Document = data_valid.Document.apply(lambda x: ' '.join(simple_preprocess(x)))

# Add "__label__" mark for FastText
data.category_id = data.category_id.apply(lambda x: '__label__' + str(x))
data_valid.category_id = data_valid.category_id.apply(lambda x: '__label__' + str(x))

FastText принимает данные в виде текстовых файлов, поэтому сохраняем данные на диск.

In [6]:
# Saving the CSV file as a text file to train/test the classifier
data[['Document', 'category_id']].to_csv(os.path.join(WORKDIR, 'fasttext/train_ft.txt'), 
                                          index = False, 
                                          sep = ' ',
                                          header = None, 
                                          quoting = csv.QUOTE_NONE, 
                                          quotechar = "", 
                                          escapechar = " ")

data_valid[['Document', 'category_id']].to_csv(os.path.join(WORKDIR, 'fasttext/test_ft.txt'), 
                                               index = False, 
                                               sep = ' ',
                                               header = None, 
                                               quoting = csv.QUOTE_NONE, 
                                               quotechar = "", 
                                               escapechar = " ")

Обучаем модель.

In [10]:
# Training the fastText classifier
begin = time()
model = fasttext.train_supervised(os.path.join(WORKDIR, 'fasttext/train_ft.txt'),
                                  lr=0.65,                # learning rate [0.1]
                                  dim=100,               # size of word vectors [100]
                                  ws=5,                # size of the context window [5]
                                  epoch=70,             # number of epochs [5]
                                  neg=7,               # number of negatives sampled [5]
                                  wordNgrams=2, 
                                  minn=0, 
                                  maxn=0,
                                  minCount=1)
print(f'Time={time()-begin:.3f}')

Read 9M words
Number of words:  126840
Number of labels: 870
Progress:  99.9% words/sec/thread:  187779 lr:  0.000335 avg.loss:  0.172778 ETA:   0h 0m 0s  0.6% words/sec/thread:  210927 lr:  0.646121 avg.loss:  9.368999 ETA:   0h 2m14s

Time=153.854


100.0% words/sec/thread:  187752 lr: -0.000003 avg.loss:  0.172699 ETA:   0h 0m 0s
Progress: 100.0% words/sec/thread:  187751 lr:  0.000000 avg.loss:  0.172699 ETA:   0h 0m 0s


#### Смотрим на метрики:

In [11]:
# Evaluating performance on the entire train file
_, precision, recall = model.test(os.path.join(WORKDIR, 'fasttext/train_ft.txt')) 
f1_train = (2*precision*recall) / (precision+recall)

In [12]:
# Evaluating performance on the entire test file
_, precision, recall = model.test(os.path.join(WORKDIR, 'fasttext/test_ft.txt'))                      
f1_test = (2*precision*recall) / (precision+recall)

In [13]:
# Calculate weighted f1
begin = time()
data_valid_copy = data_valid.copy()
inputs = data_valid_copy.Document.tolist()

with Pool() as pool:
    outputs = pool.map(mpredict, inputs)

data_valid_copy["predicted_id"] = outputs
data_valid_copy.category_id = data_valid_copy.category_id.apply(lambda text: int(text[9:]))
weighted_f1 = f1_score(data_valid_copy.category_id.tolist(), 
                       data_valid_copy.predicted_id.tolist(), average='weighted')
print(f'Time={time()-begin:.3f}')

Time=0.796


In [14]:
print(f'Train F1={f1_train:.4f}')
print(f'Test F1={f1_test:.4f}') 
print(f'Weighted F1={weighted_f1:.4f}')

Train F1=0.9941
Test F1=0.8710
Weighted F1=0.8682


#### Кросс-валидация:

In [10]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

scores = []

for i, (train_index, test_index) in tqdm(enumerate(skf.split(data_full.Document, data_full.category_id)), total=5):
    data = data_full.iloc[train_index, :].copy()
    data_valid = data_full.iloc[test_index].copy()
    
    # Gensim simple preprocessing
    data.Document = data.Document.apply(lambda x: ' '.join(simple_preprocess(x)))
    data_valid.Document = data_valid.Document.apply(lambda x: ' '.join(simple_preprocess(x)))

    # Add "__label__" mark for FastText
    data.category_id = data.category_id.apply(lambda x: '__label__' + str(x))
    data_valid.category_id = data_valid.category_id.apply(lambda x: '__label__' + str(x))
    
    # Saving the CSV file as a text file to train/test the classifier
    data[['Document', 'category_id']].to_csv(os.path.join(WORKDIR, 'fasttext/train_ft.txt'), 
                                              index = False, 
                                              sep = ' ',
                                              header = None, 
                                              quoting = csv.QUOTE_NONE, 
                                              quotechar = "", 
                                              escapechar = " ")

    model = fasttext.train_supervised(os.path.join(WORKDIR, 'fasttext/train_ft.txt'),
                                  lr=0.65,                # learning rate [0.1]
                                  dim=100,               # size of word vectors [100]
                                  ws=5,                # size of the context window [5]
                                  epoch=70,             # number of epochs [5]
                                  neg=7,               # number of negatives sampled [5]
                                  wordNgrams=2, 
                                  minCount=1,
                                  verbose=0)
    # Calculate weighted f1
    data_valid_copy = data_valid.copy()
    inputs = data_valid_copy.Document.tolist()

    with Pool() as pool:
        outputs = pool.map(mpredict, inputs)

    data_valid_copy["predicted_id"] = outputs
    data_valid_copy.category_id = data_valid_copy.category_id.apply(lambda text: int(text[9:]))
    weighted_f1 = f1_score(data_valid_copy.category_id.tolist(), 
                           data_valid_copy.predicted_id.tolist(), average='weighted')
    scores.append(weighted_f1)
print(f'F1={np.array(scores).mean():.5f}')

  0%|          | 0/5 [00:00<?, ?it/s]

F1=0.86722


### Подбор гиперпараметров:

In [54]:
def try_parameters(lr=0.5, dim=80, ws=5, epoch=60, neg=7, wordNgrams=2):

    model = fasttext.train_supervised(os.path.join(WORKDIR, 'fasttext/train_ft.txt'),
                                  lr=lr,                # learning rate [0.1]
                                  dim=dim,               # size of word vectors [100]
                                  ws=ws,                # size of the context window [5]
                                  epoch=epoch,             # number of epochs [5]
                                  neg=neg,               # number of negatives sampled [5]
                                  wordNgrams=wordNgrams)
    
    data_valid_copy = data_valid.copy()
    data_valid_copy["predicted_id"] = data_valid_copy.Document.apply(lambda text: predict(text, model)).astype('int')
    data_valid_copy.category_id = data_valid_copy.category_id.apply(lambda text: int(text[9:]))
    weighted_f1 = f1_score(data_valid_copy.category_id.tolist(), 
                           data_valid_copy.predicted_id.tolist(), average='weighted')
    
    print(f'lr={lr}, dim={dim}, ws={ws}, epoch={epoch}, neg={neg}, wordNgrams={wordNgrams}')
    print(f'Weighted F1={weighted_f1:.4f}\n')


for lr in tqdm([0.4, 0.5]):
    for dim in tqdm([64, 80]):
        for epoch in [50, 60]:
            for wordNgrams in [1, 2, 3]:
                try_parameters(lr=lr, dim=dim, epoch=epoch, wordNgrams=wordNgrams)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  203691 lr:  0.000000 avg.loss:  0.289985 ETA:   0h 0m 0s  0.7% words/sec/thread:  210311 lr:  0.397066 avg.loss:  9.744263 ETA:   0h 0m40s


lr=0.4, dim=64, ws=5, epoch=50, neg=7, wordNgrams=1
Weighted F1=0.8535



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  189827 lr: -0.000006 avg.loss:  0.432077 ETA:   0h 0m 0s  0.7% words/sec/thread:  202927 lr:  0.397167 avg.loss: 12.628447 ETA:   0h 0m42s  1.8% words/sec/thread:  197943 lr:  0.392652 avg.loss:  8.668149 ETA:   0h 0m42s  3.0% words/sec/thread:  198206 lr:  0.388054 avg.loss:  7.076039 ETA:   0h 0m42s  4.1% words/sec/thread:  197098 lr:  0.383556 avg.loss:  6.011496 ETA:   0h 0m42s  5.2% words/sec/thread:  195837 lr:  0.379127 avg.loss:  5.162292 ETA:   0h 0m41s  6.3% words/sec/thread:  195065 lr:  0.374693 avg.loss:  4.548378 ETA:   0h 0m41s  7.5% words/sec/thread:  195073 lr:  0.370175 avg.loss:  4.061322 ETA:   0h 0m41s  8.6% words/sec/thread:  194860 lr:  0.365695 avg.loss:  3.655100 ETA:   0h 0m40s 10.8% words/sec/thread:  194442 lr:  0.356764 avg.loss:  3.040461 ETA:   0h 0m39s 11.9% words/sec/thread:  194445 lr:  0.352260 avg.loss:  2.876940 ETA:   0h 0m39s 13.1% words/sec/thread:  1

lr=0.4, dim=64, ws=5, epoch=50, neg=7, wordNgrams=2
Weighted F1=0.8537



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  182573 lr:  0.000000 avg.loss:  0.601938 ETA:   0h 0m 0s


lr=0.4, dim=64, ws=5, epoch=50, neg=7, wordNgrams=3
Weighted F1=0.8492



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  197823 lr:  0.000000 avg.loss:  0.259181 ETA:   0h 0m 0s  2.4% words/sec/thread:  210366 lr:  0.390245 avg.loss:  4.985794 ETA:   0h 0m48s  5.2% words/sec/thread:  206389 lr:  0.379273 avg.loss:  3.059928 ETA:   0h 0m47s  8.5% words/sec/thread:  204292 lr:  0.366082 avg.loss:  2.162632 ETA:   0h 0m46s 0m45s 11.0% words/sec/thread:  203454 lr:  0.356016 avg.loss:  1.761804 ETA:   0h 0m45s 17.1% words/sec/thread:  201382 lr:  0.331598 avg.loss:  1.243335 ETA:   0h 0m42s 25.6% words/sec/thread:  200953 lr:  0.297625 avg.loss:  0.876946 ETA:   0h 0m38s 27.1% words/sec/thread:  200839 lr:  0.291482 avg.loss:  0.832885 ETA:   0h 0m37s 49.3% words/sec/thread:  199522 lr:  0.202892 avg.loss:  0.484537 ETA:   0h 0m26s 60.5% words/sec/thread:  199046 lr:  0.158048 avg.loss:  0.402297 ETA:   0h 0m20s 0.314070 ETA:   0h 0m 9s


lr=0.4, dim=64, ws=5, epoch=60, neg=7, wordNgrams=1
Weighted F1=0.8530



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  188794 lr:  0.000000 avg.loss:  0.359144 ETA:   0h 0m 0s  0.6% words/sec/thread:  204555 lr:  0.397618 avg.loss: 12.517115 ETA:   0h 0m50s  5.5% words/sec/thread:  197711 lr:  0.377862 avg.loss:  4.188139 ETA:   0h 0m49s


lr=0.4, dim=64, ws=5, epoch=60, neg=7, wordNgrams=2
Weighted F1=0.8543



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  181832 lr:  0.000000 avg.loss:  0.447732 ETA:   0h 0m 0s  0.6% words/sec/thread:  193495 lr:  0.397746 avg.loss: 14.540283 ETA:   0h 0m53s  1.5% words/sec/thread:  190944 lr:  0.394092 avg.loss: 10.244439 ETA:   0h 0m53s  2.4% words/sec/thread:  191006 lr:  0.390405 avg.loss:  8.311749 ETA:   0h 0m53s 15.6% words/sec/thread:  185472 lr:  0.337712 avg.loss:  2.588477 ETA:   0h 0m47s


lr=0.4, dim=64, ws=5, epoch=60, neg=7, wordNgrams=3
Weighted F1=0.8526



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  157282 lr:  0.000000 avg.loss:  0.294244 ETA:   0h 0m 0s  0.6% words/sec/thread:  171006 lr:  0.397611 avg.loss: 10.587095 ETA:   0h 0m50s  1.5% words/sec/thread:  164110 lr:  0.393908 avg.loss:  6.644488 ETA:   0h 0m51s


lr=0.4, dim=80, ws=5, epoch=50, neg=7, wordNgrams=1
Weighted F1=0.8528



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  150676 lr:  0.000000 avg.loss:  0.468892 ETA:   0h 0m 0s  0.6% words/sec/thread:  159175 lr:  0.397774 avg.loss: 13.667552 ETA:   0h 0m54s  1.5% words/sec/thread:  159277 lr:  0.394084 avg.loss:  9.369534 ETA:   0h 0m53s


lr=0.4, dim=80, ws=5, epoch=50, neg=7, wordNgrams=2
Weighted F1=0.8544



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  145397 lr:  0.000000 avg.loss:  0.582430 ETA:   0h 0m 0s  0.5% words/sec/thread:  153854 lr:  0.397845 avg.loss: 15.294907 ETA:   0h 0m55s 16.1% words/sec/thread:  148004 lr:  0.335554 avg.loss:  2.862094 ETA:   0h 0m49s


lr=0.4, dim=80, ws=5, epoch=50, neg=7, wordNgrams=3
Weighted F1=0.8487



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  158455 lr:  0.000000 avg.loss:  0.244208 ETA:   0h 0m 0s


lr=0.4, dim=80, ws=5, epoch=60, neg=7, wordNgrams=1
Weighted F1=0.8534



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  151228 lr:  0.000000 avg.loss:  0.371972 ETA:   0h 0m 0s  0.5% words/sec/thread:  164897 lr:  0.398079 avg.loss: 13.686141 ETA:   0h 1m 2s  1.2% words/sec/thread:  160813 lr:  0.395024 avg.loss:  9.365654 ETA:   0h 1m 3s


lr=0.4, dim=80, ws=5, epoch=60, neg=7, wordNgrams=2
Weighted F1=0.8544



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  145362 lr:  0.000000 avg.loss:  0.482526 ETA:   0h 0m 0s 29.4% words/sec/thread:  146721 lr:  0.282224 avg.loss:  1.453150 ETA:   0h 0m49s 66.8% words/sec/thread:  145985 lr:  0.132943 avg.loss:  0.688714 ETA:   0h 0m23s 73.8% words/sec/thread:  145877 lr:  0.104991 avg.loss:  0.629662 ETA:   0h 0m18s 74.4% words/sec/thread:  145861 lr:  0.102208 avg.loss:  0.624664 ETA:   0h 0m18s 75.6% words/sec/thread:  145817 lr:  0.097797 avg.loss:  0.616639 ETA:   0h 0m17s 76.2% words/sec/thread:  145786 lr:  0.095047 avg.loss:  0.611755 ETA:   0h 0m16s 85.7% words/sec/thread:  145620 lr:  0.057174 avg.loss:  0.553075 ETA:   0h 0m10s 92.3% words/sec/thread:  145538 lr:  0.030967 avg.loss:  0.518097 ETA:   0h 0m 5s 94.2% words/sec/thread:  145504 lr:  0.023192 avg.loss:  0.508489 ETA:   0h 0m 4s 145471 lr:  0.015417 avg.loss:  0.499581 ETA:   0h 0m 2s


lr=0.4, dim=80, ws=5, epoch=60, neg=7, wordNgrams=3
Weighted F1=0.8491



  0%|          | 0/2 [00:00<?, ?it/s]

Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  196735 lr:  0.000000 avg.loss:  0.274501 ETA:   0h 0m 0s  6.3% words/sec/thread:  202265 lr:  0.468373 avg.loss:  2.760882 ETA:   0h 0m40s 38.0% words/sec/thread:  199977 lr:  0.310150 avg.loss:  0.655316 ETA:   0h 0m26s 45.3% words/sec/thread:  199694 lr:  0.273422 avg.loss:  0.560685 ETA:   0h 0m23s 58.7% words/sec/thread:  198751 lr:  0.206605 avg.loss:  0.444385 ETA:   0h 0m17s 69.3% words/sec/thread:  198063 lr:  0.153725 avg.loss:  0.383894 ETA:   0h 0m13s 79.2% words/sec/thread:  197800 lr:  0.103802 avg.loss:  0.339005 ETA:   0h 0m 9s 88.5% words/sec/thread:  197510 lr:  0.057495 avg.loss:  0.306058 ETA:   0h 0m 5s


lr=0.5, dim=64, ws=5, epoch=50, neg=7, wordNgrams=1
Weighted F1=0.8537



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  186464 lr:  0.000000 avg.loss:  0.354747 ETA:   0h 0m 0s  1.4% words/sec/thread:  194479 lr:  0.493224 avg.loss:  8.874033 ETA:   0h 0m43s


lr=0.5, dim=64, ws=5, epoch=50, neg=7, wordNgrams=2
Weighted F1=0.8551



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  179555 lr:  0.000000 avg.loss:  0.435070 ETA:   0h 0m 0s lr:  0.165410 avg.loss:  0.629406 ETA:   0h 0m15s 82.2% words/sec/thread:  180108 lr:  0.089174 avg.loss:  0.522648 ETA:   0h 0m 8s


lr=0.5, dim=64, ws=5, epoch=50, neg=7, wordNgrams=3
Weighted F1=0.8504



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  195195 lr:  0.000000 avg.loss:  0.223574 ETA:   0h 0m 0s  4.7% words/sec/thread:  203453 lr:  0.476432 avg.loss:  2.932413 ETA:   0h 0m48s 10.9% words/sec/thread:  200797 lr:  0.445743 avg.loss:  1.563552 ETA:   0h 0m46s 0.846004 ETA:   0h 0m40s 42.3% words/sec/thread:  197266 lr:  0.288749 avg.loss:  0.472609 ETA:   0h 0m30s


lr=0.5, dim=64, ws=5, epoch=60, neg=7, wordNgrams=1
Weighted F1=0.8526



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  186733 lr:  0.000000 avg.loss:  0.303968 ETA:   0h 0m 0s  0.6% words/sec/thread:  196977 lr:  0.497135 avg.loss: 11.740439 ETA:   0h 0m52s 55.5% words/sec/thread:  188469 lr:  0.222675 avg.loss:  0.519412 ETA:   0h 0m24s 187903 lr:  0.156438 avg.loss:  0.429995 ETA:   0h 0m17s 87.9% words/sec/thread:  187430 lr:  0.060563 avg.loss:  0.341959 ETA:   0h 0m 6s


lr=0.5, dim=64, ws=5, epoch=60, neg=7, wordNgrams=2
Weighted F1=0.8552



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  179312 lr:  0.000000 avg.loss:  0.410670 ETA:   0h 0m 0s  1.6% words/sec/thread:  184938 lr:  0.491959 avg.loss:  9.027455 ETA:   0h 0m55s  4.6% words/sec/thread:  183353 lr:  0.476991 avg.loss:  5.355486 ETA:   0h 0m54s 0.385409 avg.loss:  1.565772 ETA:   0h 0m43ss 54.8% words/sec/thread:  181029 lr:  0.225774 avg.loss:  0.712665 ETA:   0h 0m25s


lr=0.5, dim=64, ws=5, epoch=60, neg=7, wordNgrams=3
Weighted F1=0.8522



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  159676 lr:  0.000000 avg.loss:  0.257711 ETA:   0h 0m 0s  0.6% words/sec/thread:  166459 lr:  0.497096 avg.loss: 10.983368 ETA:   0h 0m51s  1.5% words/sec/thread:  166923 lr:  0.492257 avg.loss:  6.635676 ETA:   0h 0m51s% words/sec/thread:  160682 lr:  0.206089 avg.loss:  0.410933 ETA:   0h 0m22s 61.3% words/sec/thread:  160552 lr:  0.193318 avg.loss:  0.392662 ETA:   0h 0m20s 84.0% words/sec/thread:  160462 lr:  0.080188 avg.loss:  0.298558 ETA:   0h 0m 8s 89.3% words/sec/thread:  160291 lr:  0.053734 avg.loss:  0.283473 ETA:   0h 0m 5s


lr=0.5, dim=80, ws=5, epoch=50, neg=7, wordNgrams=1
Weighted F1=0.8536



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  151550 lr:  0.000000 avg.loss:  0.353175 ETA:   0h 0m 0s 4.870827 ETA:   0h 0m52s 0.473563 avg.loss:  4.362621 ETA:   0h 0m52s 156728 lr:  0.469144 avg.loss:  3.909967 ETA:   0h 0m51s 36.1% words/sec/thread:  152794 lr:  0.319580 avg.loss:  0.913616 ETA:   0h 0m36s 0.507126 ETA:   0h 0m18s 91.4% words/sec/thread:  151881 lr:  0.042868 avg.loss:  0.384199 ETA:   0h 0m 4s


lr=0.5, dim=80, ws=5, epoch=50, neg=7, wordNgrams=2
Weighted F1=0.8543



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  144684 lr:  0.000000 avg.loss:  0.476568 ETA:   0h 0m 0s  2.1% words/sec/thread:  151889 lr:  0.489436 avg.loss:  8.785603 ETA:   0h 0m55s  4.5% words/sec/thread:  150021 lr:  0.477409 avg.loss:  6.027095 ETA:   0h 0m55s words/sec/thread:  147130 lr:  0.425045 avg.loss:  2.679337 ETA:   0h 0m49s 146016 lr:  0.297141 avg.loss:  1.095637 ETA:   0h 0m35s 53.0% words/sec/thread:  145844 lr:  0.234905 avg.loss:  0.853038 ETA:   0h 0m27s 63.4% words/sec/thread:  145616 lr:  0.183060 avg.loss:  0.723956 ETA:   0h 0m21s% words/sec/thread:  145165 lr:  0.081527 avg.loss:  0.557272 ETA:   0h 0m 9s 145039 lr:  0.049988 avg.loss:  0.521824 ETA:   0h 0m 5s 90.8% words/sec/thread:  145017 lr:  0.045857 avg.loss:  0.517932 ETA:   0h 0m 5s 144935 lr:  0.019269 avg.loss:  0.493319 ETA:   0h 0m 2s


lr=0.5, dim=80, ws=5, epoch=50, neg=7, wordNgrams=3
Weighted F1=0.8508



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  159946 lr:  0.000000 avg.loss:  0.217463 ETA:   0h 0m 0s  1.0% words/sec/thread:  166131 lr:  0.495181 avg.loss:  7.832319 ETA:   0h 1m 1s 3.904163 ETA:   0h 1m 0s  7.8% words/sec/thread:  164187 lr:  0.461183 avg.loss:  2.026492 ETA:   0h 0m58s 12.8% words/sec/thread:  163466 lr:  0.436120 avg.loss:  1.362126 ETA:   0h 0m55s 17.3% words/sec/thread:  163464 lr:  0.413254 avg.loss:  1.055020 ETA:   0h 0m52ss 24.5% words/sec/thread:  162834 lr:  0.377460 avg.loss:  0.776866 ETA:   0h 0m48s 0.294838 avg.loss:  0.486615 ETA:   0h 0m37s 66.7% words/sec/thread:  160696 lr:  0.166676 avg.loss:  0.311682 ETA:   0h 0m21s


lr=0.5, dim=80, ws=5, epoch=60, neg=7, wordNgrams=1
Weighted F1=0.8537



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  153785 lr:  0.000000 avg.loss:  0.306872 ETA:   0h 0m 0s  1.7% words/sec/thread:  163556 lr:  0.491310 avg.loss:  7.438430 ETA:   0h 1m 2s 33.5% words/sec/thread:  155853 lr:  0.332337 avg.loss:  0.872609 ETA:   0h 0m44s 60.6% words/sec/thread:  156185 lr:  0.197117 avg.loss:  0.493094 ETA:   0h 0m26s 87.5% words/sec/thread:  154455 lr:  0.062638 avg.loss:  0.346547 ETA:   0h 0m 8s 98.6% words/sec/thread:  153876 lr:  0.007125 avg.loss:  0.310742 ETA:   0h 0m 0s


lr=0.5, dim=80, ws=5, epoch=60, neg=7, wordNgrams=2
Weighted F1=0.8568



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  146186 lr:  0.000000 avg.loss:  0.394599 ETA:   0h 0m 0s  0.5% words/sec/thread:  159107 lr:  0.497687 avg.loss: 14.711055 ETA:   0h 1m 4s  1.6% words/sec/thread:  155119 lr:  0.491759 avg.loss:  8.975782 ETA:   0h 1m 5s  6.3% words/sec/thread:  151446 lr:  0.468577 avg.loss:  4.717517 ETA:   0h 1m 4s 15.2% words/sec/thread:  149808 lr:  0.424110 avg.loss:  2.362265 ETA:   0h 0m58s 41.0% words/sec/thread:  148173 lr:  0.294854 avg.loss:  0.915971 ETA:   0h 0m41s 59.9% words/sec/thread:  147360 lr:  0.200723 avg.loss:  0.642744 ETA:   0h 0m28s 66.4% words/sec/thread:  147127 lr:  0.167841 avg.loss:  0.581717 ETA:   0h 0m23s 71.0% words/sec/thread:  147218 lr:  0.144906 avg.loss:  0.546143 ETA:   0h 0m20s 73.0% words/sec/thread:  147146 lr:  0.135141 avg.loss:  0.532195 ETA:   0h 0m19s 81.2% words/sec/thread:  146792 lr:  0.094233 avg.loss:  0.479773 ETA:   0h 0m13s 83.5% words/sec/thread:  1

lr=0.5, dim=80, ws=5, epoch=60, neg=7, wordNgrams=3
Weighted F1=0.8519



In [14]:
for lr in tqdm([0.2, 0.25, 0.35]):
    for wordNgrams in tqdm([2, 3, 4, 5, 7]):
        for ws in [3, 5, 7]:
            try_parameters(lr=lr, dim=80, ws=ws, epoch=40, neg=7, wordNgrams=wordNgrams)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  149496 lr:  0.000000 avg.loss:  0.967649 ETA:   0h 0m 0s  0.7% words/sec/thread:  156409 lr:  0.198634 avg.loss: 16.353426 ETA:   0h 0m43s 22.6% words/sec/thread:  150420 lr:  0.154711 avg.loss:  3.314927 ETA:   0h 0m35s 42.9% words/sec/thread:  150279 lr:  0.114235 avg.loss:  1.972964 ETA:   0h 0m26s 52.3% words/sec/thread:  149974 lr:  0.095306 avg.loss:  1.670349 ETA:   0h 0m21s 59.2% words/sec/thread:  149814 lr:  0.081539 avg.loss:  1.506186 ETA:   0h 0m18s 0m 0s


lr=0.2, dim=80, ws=3, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8403, Train F1=0.9673, Test F1=0.8463 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  152856 lr:  0.000000 avg.loss:  1.019222 ETA:   0h 0m 0s


lr=0.2, dim=80, ws=5, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8396, Train F1=0.9669, Test F1=0.8456 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  153148 lr:  0.000000 avg.loss:  0.993975 ETA:   0h 0m 0s  0.7% words/sec/thread:  163150 lr:  0.198574 avg.loss: 15.841050 ETA:   0h 0m42s


lr=0.2, dim=80, ws=7, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8401, Train F1=0.9669, Test F1=0.8458 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  146991 lr:  0.000000 avg.loss:  1.496208 ETA:   0h 0m 0s  0.7% words/sec/thread:  152339 lr:  0.198671 avg.loss: 17.088215 ETA:   0h 0m45s 0m 0s


lr=0.2, dim=80, ws=3, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8204, Train F1=0.9462, Test F1=0.8296 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  145464 lr:  0.000000 avg.loss:  1.288247 ETA:   0h 0m 0s 39.1% words/sec/thread:  145851 lr:  0.121891 avg.loss:  2.972269 ETA:   0h 0m28s 42.8% words/sec/thread:  146249 lr:  0.114481 avg.loss:  2.784137 ETA:   0h 0m27s 49.0% words/sec/thread:  146549 lr:  0.102003 avg.loss:  2.524771 ETA:   0h 0m24s 52.7% words/sec/thread:  146922 lr:  0.094523 avg.loss:  2.380014 ETA:   0h 0m22s 62.3% words/sec/thread:  147340 lr:  0.075456 avg.loss:  2.078532 ETA:   0h 0m17s 72.0% words/sec/thread:  146656 lr:  0.056087 avg.loss:  1.809076 ETA:   0h 0m13s 90.9% words/sec/thread:  145770 lr:  0.018144 avg.loss:  1.417815 ETA:   0h 0m 4s 93.9% words/sec/thread:  145728 lr:  0.012290 avg.loss:  1.370521 ETA:   0h 0m 2s


lr=0.2, dim=80, ws=5, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8201, Train F1=0.9464, Test F1=0.8293 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  143351 lr:  0.000000 avg.loss:  1.527634 ETA:   0h 0m 0s 20.4% words/sec/thread:  143617 lr:  0.159261 avg.loss:  4.713931 ETA:   0h 0m38s 27.0% words/sec/thread:  143540 lr:  0.145989 avg.loss:  3.913944 ETA:   0h 0m35s 45.5% words/sec/thread:  143426 lr:  0.109088 avg.loss:  2.669653 ETA:   0h 0m26s 52.2% words/sec/thread:  143570 lr:  0.095699 avg.loss:  2.387174 ETA:   0h 0m23s100.0% words/sec/thread:  143353 lr: -0.000004 avg.loss:  1.527634 ETA:   0h 0m 0s


lr=0.2, dim=80, ws=7, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8204, Train F1=0.9467, Test F1=0.8298 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  138145 lr:  0.000000 avg.loss:  1.806486 ETA:   0h 0m 0s  0.6% words/sec/thread:  146956 lr:  0.198717 avg.loss: 17.691240 ETA:   0h 0m46s


lr=0.2, dim=80, ws=3, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.7988, Train F1=0.9232, Test F1=0.8122 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  140734 lr:  0.000000 avg.loss:  1.788006 ETA:   0h 0m 0s 24.4% words/sec/thread:  140576 lr:  0.151170 avg.loss:  5.070265 ETA:   0h 0m37s 33.6% words/sec/thread:  141636 lr:  0.132764 avg.loss:  4.084175 ETA:   0h 0m32s 40.1% words/sec/thread:  141390 lr:  0.119787 avg.loss:  3.615515 ETA:   0h 0m29s 43.0% words/sec/thread:  141336 lr:  0.114091 avg.loss:  3.431419 ETA:   0h 0m27s 44.0% words/sec/thread:  141470 lr:  0.111962 avg.loss:  3.365803 ETA:   0h 0m27s 141565 lr:  0.109854 avg.loss:  3.300352 ETA:   0h 0m26s 46.1% words/sec/thread:  141649 lr:  0.107751 avg.loss:  3.239529 ETA:   0h 0m26s 49.0% words/sec/thread:  141743 lr:  0.101946 avg.loss:  3.096535 ETA:   0h 0m24s 58.7% words/sec/thread:  141912 lr:  0.082524 avg.loss:  2.680451 ETA:   0h 0m20s 62.8% words/sec/thread:  141826 lr:  0.074385 avg.loss:  2.544899 ETA:   0h 0m18s 86.4% words/sec/thread:  141400 lr:  0.027290 avg.l

lr=0.2, dim=80, ws=5, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.7991, Train F1=0.9234, Test F1=0.8123 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  140144 lr: -0.000003 avg.loss:  1.697149 ETA:   0h 0m 0s words/sec/thread:  145954 lr:  0.186053 avg.loss:  9.602772 ETA:   0h 0m44s 12.8% words/sec/thread:  142904 lr:  0.174349 avg.loss:  7.391418 ETA:   0h 0m42s 19.2% words/sec/thread:  141258 lr:  0.161561 avg.loss:  5.713688 ETA:   0h 0m39s 20.2% words/sec/thread:  141277 lr:  0.159512 avg.loss:  5.482156 ETA:   0h 0m39s 21.9% words/sec/thread:  141340 lr:  0.156221 avg.loss:  5.112576 ETA:   0h 0m38s 25.3% words/sec/thread:  140992 lr:  0.149392 avg.loss:  4.566169 ETA:   0h 0m36s 0.000000 avg.loss:  1.697149 ETA:   0h 0m 0s


lr=0.2, dim=80, ws=7, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.7987, Train F1=0.9232, Test F1=0.8117 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  132125 lr: -0.000004 avg.loss:  2.298467 ETA:   0h 0m 0s  3.5% words/sec/thread:  133777 lr:  0.193025 avg.loss: 13.292831 ETA:   0h 0m49s% words/sec/thread:  133220 lr:  0.186499 avg.loss: 10.584155 ETA:   0h 0m48s  9.5% words/sec/thread:  133814 lr:  0.181017 avg.loss:  9.369010 ETA:   0h 0m46s 16.2% words/sec/thread:  133120 lr:  0.167630 avg.loss:  7.456291 ETA:   0h 0m43s 17.1% words/sec/thread:  133076 lr:  0.165714 avg.loss:  7.274577 ETA:   0h 0m43s 0.133794 avg.loss:  4.886416 ETA:   0h 0m34s 62.2% words/sec/thread:  132575 lr:  0.075672 avg.loss:  3.208522 ETA:   0h 0m19s 84.9% words/sec/thread:  132475 lr:  0.030139 avg.loss:  2.577613 ETA:   0h 0m 7s 88.2% words/sec/thread:  132417 lr:  0.023698 avg.loss:  2.510878 ETA:   0h 0m 6s 96.5% words/sec/thread:  132341 lr:  0.006946 avg.loss:  2.357774 ETA:   0h 0m 1s 99.8% words/sec/thread:  132299 lr:  0.000499 avg.loss:  2.302701 ET

lr=0.2, dim=80, ws=3, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.7736, Train F1=0.8988, Test F1=0.7917 



Progress: 100.0% words/sec/thread:  132124 lr:  0.000000 avg.loss:  2.298467 ETA:   0h 0m 0s
Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  131340 lr:  0.000000 avg.loss:  2.173688 ETA:   0h 0m 0s 2.638157 ETA:   0h 0m12s 79.0% words/sec/thread:  131463 lr:  0.042047 avg.loss:  2.568448 ETA:   0h 0m11s 81.1% words/sec/thread:  131481 lr:  0.037841 avg.loss:  2.519099 ETA:   0h 0m 9s  0h 0m 9s 83.0% words/sec/thread:  131466 lr:  0.034055 avg.loss:  2.478377 ETA:   0h 0m 8s 96.5% words/sec/thread:  131494 lr:  0.006996 avg.loss:  2.217813 ETA:   0h 0m 1s


lr=0.2, dim=80, ws=5, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.7721, Train F1=0.8985, Test F1=0.7902 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  132073 lr:  0.000000 avg.loss:  2.139661 ETA:   0h 0m 0s  0.6% words/sec/thread:  138870 lr:  0.198789 avg.loss: 17.751270 ETA:   0h 0m49s


lr=0.2, dim=80, ws=7, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.7735, Train F1=0.8982, Test F1=0.7912 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress:  99.8% words/sec/thread:  120780 lr:  0.000357 avg.loss:  2.902370 ETA:   0h 0m 0s 12.2% words/sec/thread:  121903 lr:  0.175651 avg.loss:  9.506231 ETA:   0h 0m49s 26.5% words/sec/thread:  120358 lr:  0.147045 avg.loss:  6.588506 ETA:   0h 0m42s 0.029783 avg.loss:  3.185108 ETA:   0h 0m 8s 98.6% words/sec/thread:  120740 lr:  0.002868 avg.loss:  2.928864 ETA:   0h 0m 0s100.0% words/sec/thread:  120784 lr: -0.000000 avg.loss:  2.898919 ETA:   0h 0m 0s100.0% words/sec/thread:  120783 lr:  0.000000 avg.loss:  2.898853 ETA:   0h 0m 0s


lr=0.2, dim=80, ws=3, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7199, Train F1=0.8455, Test F1=0.7475 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  122695 lr:  0.000000 avg.loss:  2.827848 ETA:   0h 0m 0s


lr=0.2, dim=80, ws=5, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7210, Train F1=0.8458, Test F1=0.7487 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  120165 lr:  0.000000 avg.loss:  2.888204 ETA:   0h 0m 0s  3.3% words/sec/thread:  127374 lr:  0.193354 avg.loss: 14.451350 ETA:   0h 0m52s  5.1% words/sec/thread:  125470 lr:  0.189822 avg.loss: 12.907025 ETA:   0h 0m52s  6.0% words/sec/thread:  125349 lr:  0.188017 avg.loss: 12.217083 ETA:   0h 0m51s 10.4% words/sec/thread:  124304 lr:  0.179121 avg.loss: 10.001143 ETA:   0h 0m49s 11.3% words/sec/thread:  124118 lr:  0.177356 avg.loss:  9.713044 ETA:   0h 0m49s 12.2% words/sec/thread:  124009 lr:  0.175582 avg.loss:  9.450936 ETA:   0h 0m48s 13.1% words/sec/thread:  124047 lr:  0.173779 avg.loss:  9.211105 ETA:   0h 0m48s 14.0% words/sec/thread:  124006 lr:  0.171993 avg.loss:  9.004786 ETA:   0h 0m47s 14.9% words/sec/thread:  124000 lr:  0.170200 avg.loss:  8.801422 ETA:   0h 0m47s 15.8% words/sec/thread:  124096 lr:  0.168381 avg.loss:  8.619785 ETA:   0h 0m46s 16.7% words/sec/thread:  1

lr=0.2, dim=80, ws=7, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7200, Train F1=0.8446, Test F1=0.7479 



  0%|          | 0/5 [00:00<?, ?it/s]

Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  151191 lr:  0.000000 avg.loss:  0.824311 ETA:   0h 0m 0s


lr=0.25, dim=80, ws=3, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8462, Train F1=0.9788, Test F1=0.8508 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  154881 lr:  0.000000 avg.loss:  0.884797 ETA:   0h 0m 0s  0.7% words/sec/thread:  161810 lr:  0.248233 avg.loss: 15.294449 ETA:   0h 0m42s


lr=0.25, dim=80, ws=5, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8456, Train F1=0.9784, Test F1=0.8504 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  154231 lr:  0.000000 avg.loss:  0.802348 ETA:   0h 0m 0s 48.4% words/sec/thread:  152840 lr:  0.128890 avg.loss:  1.500588 ETA:   0h 0m23s 55.6% words/sec/thread:  153005 lr:  0.111045 avg.loss:  1.331103 ETA:   0h 0m20s 72.5% words/sec/thread:  152803 lr:  0.068658 avg.loss:  1.066493 ETA:   0h 0m12s% words/sec/thread:  153376 lr:  0.043562 avg.loss:  0.948669 ETA:   0h 0m 7s 85.9% words/sec/thread:  153711 lr:  0.035324 avg.loss:  0.915934 ETA:   0h 0m 6s words/sec/thread:  153956 lr:  0.025513 avg.loss:  0.881179 ETA:   0h 0m 4s 99.8% words/sec/thread:  154309 lr:  0.000432 avg.loss:  0.803528 ETA:   0h 0m 0s


lr=0.25, dim=80, ws=7, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8466, Train F1=0.9786, Test F1=0.8512 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  144265 lr:  0.000000 avg.loss:  1.063141 ETA:   0h 0m 0s 13.9% words/sec/thread:  152227 lr:  0.215293 avg.loss:  5.292197 ETA:   0h 0m39s 30.2% words/sec/thread:  148844 lr:  0.174598 avg.loss:  3.001524 ETA:   0h 0m32s 64.8% words/sec/thread:  145840 lr:  0.088015 avg.loss:  1.564235 ETA:   0h 0m16s 70.8% words/sec/thread:  145532 lr:  0.073088 avg.loss:  1.447318 ETA:   0h 0m13s 77.3% words/sec/thread:  145185 lr:  0.056702 avg.loss:  1.341776 ETA:   0h 0m10s 84.4% words/sec/thread:  144787 lr:  0.038900 avg.loss:  1.242296 ETA:   0h 0m 7s 86.1% words/sec/thread:  144715 lr:  0.034817 avg.loss:  1.221191 ETA:   0h 0m 6s 92.7% words/sec/thread:  144553 lr:  0.018325 avg.loss:  1.141355 ETA:   0h 0m 3s


lr=0.25, dim=80, ws=3, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8330, Train F1=0.9659, Test F1=0.8393 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  143076 lr:  0.000000 avg.loss:  1.117505 ETA:   0h 0m 0s  3.3% words/sec/thread:  150322 lr:  0.241835 avg.loss: 10.489855 ETA:   0h 0m44s 0m43s 12.8% words/sec/thread:  145261 lr:  0.217937 avg.loss:  5.383138 ETA:   0h 0m41s 0m37ss 53.6% words/sec/thread:  143048 lr:  0.115962 avg.loss:  1.892859 ETA:   0h 0m22s 59.6% words/sec/thread:  143068 lr:  0.100934 avg.loss:  1.733965 ETA:   0h 0m19s 66.2% words/sec/thread:  142968 lr:  0.084489 avg.loss:  1.588458 ETA:   0h 0m16s 78.5% words/sec/thread:  143107 lr:  0.053784 avg.loss:  1.374890 ETA:   0h 0m10s 82.0% words/sec/thread:  143175 lr:  0.044886 avg.loss:  1.322860 ETA:   0h 0m 8s 87.5% words/sec/thread:  143264 lr:  0.031284 avg.loss:  1.250330 ETA:   0h 0m 6s 97.9% words/sec/thread:  143314 lr:  0.005286 avg.loss:  1.136294 ETA:   0h 0m 1s


lr=0.25, dim=80, ws=5, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8321, Train F1=0.9659, Test F1=0.8384 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  144558 lr:  0.000000 avg.loss:  1.129026 ETA:   0h 0m 0s  5.2% words/sec/thread:  148276 lr:  0.237119 avg.loss:  8.762530 ETA:   0h 0m44sh 0m28s


lr=0.25, dim=80, ws=7, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8313, Train F1=0.9656, Test F1=0.8378 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  137078 lr:  0.000000 avg.loss:  1.440514 ETA:   0h 0m 0s 98.3% words/sec/thread:  137267 lr:  0.004125 avg.loss:  1.460492 ETA:   0h 0m 0s


lr=0.25, dim=80, ws=3, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.8143, Train F1=0.9494, Test F1=0.8236 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  138180 lr:  0.000000 avg.loss:  1.435016 ETA:   0h 0m 0s


lr=0.25, dim=80, ws=5, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.8157, Train F1=0.9490, Test F1=0.8248 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  136000 lr:  0.000000 avg.loss:  1.405854 ETA:   0h 0m 0s 26.5% words/sec/thread:  136792 lr:  0.183665 avg.loss:  4.009963 ETA:   0h 0m37s 53.7% words/sec/thread:  136881 lr:  0.115779 avg.loss:  2.310409 ETA:   0h 0m23s 65.2% words/sec/thread:  136597 lr:  0.086901 avg.loss:  1.970804 ETA:   0h 0m17s 77.2% words/sec/thread:  136478 lr:  0.056913 avg.loss:  1.726206 ETA:   0h 0m11s 86.4% words/sec/thread:  136359 lr:  0.033886 avg.loss:  1.576808 ETA:   0h 0m 6s 89.2% words/sec/thread:  136330 lr:  0.027025 avg.loss:  1.537227 ETA:   0h 0m 5s 91.9% words/sec/thread:  136302 lr:  0.020164 avg.loss:  1.501886 ETA:   0h 0m 4s 95.3% words/sec/thread:  136270 lr:  0.011832 avg.loss:  1.460390 ETA:   0h 0m 2s


lr=0.25, dim=80, ws=7, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.8157, Train F1=0.9493, Test F1=0.8249 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  134421 lr:  0.000000 avg.loss:  1.702960 ETA:   0h 0m 0s  1.2% words/sec/thread:  137954 lr:  0.246997 avg.loss: 16.596476 ETA:   0h 0m49s  4.5% words/sec/thread:  135820 lr:  0.238689 avg.loss: 11.324074 ETA:   0h 0m48s  7.2% words/sec/thread:  134141 lr:  0.232034 avg.loss:  9.534426 ETA:   0h 0m47s% words/sec/thread:  133658 lr:  0.223877 avg.loss:  8.074697 ETA:   0h 0m46s 13.2% words/sec/thread:  134234 lr:  0.216965 avg.loss:  7.192690 ETA:   0h 0m44s 134033 lr:  0.201497 avg.loss:  5.813856 ETA:   0h 0m41s 5.539338 ETA:   0h 0m40s 34.2% words/sec/thread:  134396 lr:  0.164411 avg.loss:  3.895295 ETA:   0h 0m33s 0.157442 avg.loss:  3.688352 ETA:   0h 0m32s 45.4% words/sec/thread:  134055 lr:  0.136495 avg.loss:  3.145618 ETA:   0h 0m28s% words/sec/thread:  134393 lr:  0.053544 avg.loss:  2.063655 ETA:   0h 0m11s 0.046630 avg.loss:  2.007030 ETA:   0h 0m 9s 134579 lr:  0.016261 avg.los

lr=0.25, dim=80, ws=3, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.7977, Train F1=0.9327, Test F1=0.8099 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress:  99.9% words/sec/thread:  134213 lr:  0.000354 avg.loss:  1.827891 ETA:   0h 0m 0s  5.3% words/sec/thread:  136694 lr:  0.236637 avg.loss: 10.596037 ETA:   0h 0m47s  8.7% words/sec/thread:  136581 lr:  0.228247 avg.loss:  8.683407 ETA:   0h 0m46s

lr=0.25, dim=80, ws=5, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.7991, Train F1=0.9328, Test F1=0.8111 



Progress: 100.0% words/sec/thread:  134144 lr:  0.000000 avg.loss:  1.824975 ETA:   0h 0m 0s
Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  133390 lr:  0.000000 avg.loss:  1.574885 ETA:   0h 0m 0s  0.6% words/sec/thread:  142338 lr:  0.248447 avg.loss: 17.642553 ETA:   0h 0m48s 23.1% words/sec/thread:  133981 lr:  0.192302 avg.loss:  5.034672 ETA:   0h 0m39s 32.1% words/sec/thread:  133497 lr:  0.169809 avg.loss:  3.859975 ETA:   0h 0m35s 133672 lr:  0.088938 avg.loss:  2.334863 ETA:   0h 0m18s 70.1% words/sec/thread:  133758 lr:  0.074801 avg.loss:  2.167339 ETA:   0h 0m15s 76.2% words/sec/thread:  133691 lr:  0.059411 avg.loss:  1.998758 ETA:   0h 0m12s ETA:   0h 0m10s 84.4% words/sec/thread:  133501 lr:  0.038912 avg.loss:  1.827750 ETA:   0h 0m 8s 87.7% words/sec/thread:  133541 lr:  0.030636 avg.loss:  1.765975 ETA:   0h 0m 6s


lr=0.25, dim=80, ws=7, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.7982, Train F1=0.9327, Test F1=0.8104 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  119519 lr: -0.000004 avg.loss:  2.262725 ETA:   0h 0m 0s 0.000000 avg.loss:  2.262725 ETA:   0h 0m 0s


lr=0.25, dim=80, ws=3, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7587, Train F1=0.8943, Test F1=0.7780 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  119111 lr: -0.000006 avg.loss:  2.427060 ETA:   0h 0m 0s

lr=0.25, dim=80, ws=5, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7577, Train F1=0.8942, Test F1=0.7771 



100.0% words/sec/thread:  119111 lr:  0.000000 avg.loss:  2.427060 ETA:   0h 0m 0s
Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  122496 lr: -0.000006 avg.loss:  2.266642 ETA:   0h 0m 0s  0.6% words/sec/thread:  129743 lr:  0.248585 avg.loss: 17.751276 ETA:   0h 0m53s 11.2% words/sec/thread:  125320 lr:  0.221879 avg.loss:  8.684338 ETA:   0h 0m49s 13.8% words/sec/thread:  125097 lr:  0.215591 avg.loss:  8.046524 ETA:   0h 0m47s 16.3% words/sec/thread:  124855 lr:  0.209333 avg.loss:  7.508296 ETA:   0h 0m46s 19.3% words/sec/thread:  124715 lr:  0.201706 avg.loss:  6.994071 ETA:   0h 0m44s 24.5% words/sec/thread:  124472 lr:  0.188738 avg.loss:  6.159623 ETA:   0h 0m41s 27.5% words/sec/thread:  124045 lr:  0.181318 avg.loss:  5.731996 ETA:   0h 0m40s 29.9% words/sec/thread:  123730 lr:  0.175223 avg.loss:  5.338726 ETA:   0h 0m39s 35.0% words/sec/thread:  123235 lr:  0.162588 avg.loss:  4.721802 ETA:   0h 0m36s 37.9% words/sec/thread:  

lr=0.25, dim=80, ws=7, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7581, Train F1=0.8944, Test F1=0.7776 



  0%|          | 0/5 [00:00<?, ?it/s]

Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  154578 lr: -0.000011 avg.loss:  0.604912 ETA:   0h 0m 0s  0.8% words/sec/thread:  173151 lr:  0.347357 avg.loss: 14.065721 ETA:   0h 0m39s100.0% words/sec/thread:  154576 lr:  0.000000 avg.loss:  0.604912 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=3, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8511, Train F1=0.9872, Test F1=0.8541 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  156663 lr: -0.000000 avg.loss:  0.537727 ETA:   0h 0m 0s  0.8% words/sec/thread:  174318 lr:  0.347338 avg.loss: 13.850315 ETA:   0h 0m39s100.0% words/sec/thread:  156653 lr:  0.000000 avg.loss:  0.537720 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=5, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8513, Train F1=0.9870, Test F1=0.8546 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  154918 lr: -0.000004 avg.loss:  0.486991 ETA:   0h 0m 0s 154917 lr:  0.000000 avg.loss:  0.486991 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=7, epoch=40, neg=7, wordNgrams=2
Weighted F1=0.8537, Train F1=0.9872, Test F1=0.8569 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  145455 lr:  0.000000 avg.loss:  0.807589 ETA:   0h 0m 0s  0.7% words/sec/thread:  155996 lr:  0.347619 avg.loss: 15.942118 ETA:   0h 0m44s  1.8% words/sec/thread:  156784 lr:  0.343637 avg.loss: 11.778028 ETA:   0h 0m43s 79.7% words/sec/thread:  146519 lr:  0.070911 avg.loss:  0.980088 ETA:   0h 0m 9s words/sec/thread:  146401 lr:  0.062978 avg.loss:  0.955778 ETA:   0h 0m 8s 91.1% words/sec/thread:  146066 lr:  0.031083 avg.loss:  0.873991 ETA:   0h 0m 4ss


lr=0.35, dim=80, ws=3, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8439, Train F1=0.9825, Test F1=0.8480 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  143955 lr:  0.000000 avg.loss:  0.809681 ETA:   0h 0m 0s 30.8% words/sec/thread:  144725 lr:  0.242232 avg.loss:  2.262559 ETA:   0h 0m33s 0m16s 94.1% words/sec/thread:  144101 lr:  0.020811 avg.loss:  0.857933 ETA:   0h 0m 2s


lr=0.35, dim=80, ws=5, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8434, Train F1=0.9825, Test F1=0.8474 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  145762 lr:  0.000000 avg.loss:  0.731548 ETA:   0h 0m 0s 145764 lr: -0.000010 avg.loss:  0.731548 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=7, epoch=40, neg=7, wordNgrams=3
Weighted F1=0.8439, Train F1=0.9824, Test F1=0.8474 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  138730 lr:  0.000000 avg.loss:  1.079524 ETA:   0h 0m 0s  0.6% words/sec/thread:  145500 lr:  0.347778 avg.loss: 16.780361 ETA:   0h 0m47s ETA:   0h 0m 0s


lr=0.35, dim=80, ws=3, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.8323, Train F1=0.9752, Test F1=0.8377 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  136812 lr:  0.000000 avg.loss:  0.879813 ETA:   0h 0m 0s  0.6% words/sec/thread:  143493 lr:  0.347810 avg.loss: 16.914282 ETA:   0h 0m47s 76.6% words/sec/thread:  137140 lr:  0.081816 avg.loss:  1.120510 ETA:   0h 0m11s 85.3% words/sec/thread:  137071 lr:  0.051403 avg.loss:  1.015823 ETA:   0h 0m 7s avg.loss:  0.879813 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=5, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.8329, Train F1=0.9751, Test F1=0.8381 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  137650 lr: -0.000007 avg.loss:  1.070074 ETA:   0h 0m 0s11.520659 ETA:   0h 0m47s 33.1% words/sec/thread:  139361 lr:  0.234230 avg.loss:  2.763784 ETA:   0h 0m33s 60.2% words/sec/thread:  138264 lr:  0.139195 avg.loss:  1.659300 ETA:   0h 0m19s 77.4% words/sec/thread:  138140 lr:  0.079214 avg.loss:  1.331843 ETA:   0h 0m11s 78.3% words/sec/thread:  138093 lr:  0.075808 avg.loss:  1.317461 ETA:   0h 0m10s 79.3% words/sec/thread:  138058 lr:  0.072382 avg.loss:  1.304090 ETA:   0h 0m10s 1.291504 ETA:   0h 0m 9s 81.3% words/sec/thread:  138052 lr:  0.065401 avg.loss:  1.278625 ETA:   0h 0m 9s 96.6% words/sec/thread:  137901 lr:  0.011928 avg.loss:  1.102920 ETA:   0h 0m 1s 99.3% words/sec/thread:  137837 lr:  0.002309 avg.loss:  1.075946 ETA:   0h 0m 0s% words/sec/thread:  137648 lr:  0.000000 avg.loss:  1.070074 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=7, epoch=40, neg=7, wordNgrams=4
Weighted F1=0.8323, Train F1=0.9752, Test F1=0.8377 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  130375 lr:  0.000000 avg.loss:  1.254447 ETA:   0h 0m 0s  0.6% words/sec/thread:  138001 lr:  0.347893 avg.loss: 17.456259 ETA:   0h 0m49s 136504 lr:  0.338236 avg.loss: 11.068811 ETA:   0h 0m48s 26.5% words/sec/thread:  131696 lr:  0.257272 avg.loss:  3.682728 ETA:   0h 0m38s 55.1% words/sec/thread:  130724 lr:  0.157316 avg.loss:  2.058633 ETA:   0h 0m23s 80.9% words/sec/thread:  130550 lr:  0.066980 avg.loss:  1.492610 ETA:   0h 0m10s


lr=0.35, dim=80, ws=3, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.8209, Train F1=0.9661, Test F1=0.8276 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  132868 lr:  0.000000 avg.loss:  1.329486 ETA:   0h 0m 0s  0.6% words/sec/thread:  147213 lr:  0.347754 avg.loss: 17.361855 ETA:   0h 0m46s  7.0% words/sec/thread:  138703 lr:  0.325399 avg.loss:  8.402951 ETA:   0h 0m46s 15.4% words/sec/thread:  134294 lr:  0.296250 avg.loss:  5.680884 ETA:   0h 0m43s 17.5% words/sec/thread:  134475 lr:  0.288684 avg.loss:  5.245906 ETA:   0h 0m42s 21.4% words/sec/thread:  134311 lr:  0.275149 avg.loss:  4.600212 ETA:   0h 0m40s 27.0% words/sec/thread:  134287 lr:  0.255433 avg.loss:  3.878432 ETA:   0h 0m37s 3.548764 ETA:   0h 0m35s 50.8% words/sec/thread:  133516 lr:  0.172109 avg.loss:  2.348524 ETA:   0h 0m25s 77.1% words/sec/thread:  133133 lr:  0.080226 avg.loss:  1.665295 ETA:   0h 0m11s  0h 0m 0s


lr=0.35, dim=80, ws=5, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.8221, Train F1=0.9660, Test F1=0.8288 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  133389 lr:  0.000000 avg.loss:  1.294612 ETA:   0h 0m 0s  0.6% words/sec/thread:  138785 lr:  0.347881 avg.loss: 17.390646 ETA:   0h 0m49s  2.2% words/sec/thread:  139691 lr:  0.342206 avg.loss: 12.801095 ETA:   0h 0m48s  5.0% words/sec/thread:  139370 lr:  0.332340 avg.loss:  9.501888 ETA:   0h 0m47s  7.8% words/sec/thread:  138074 lr:  0.322713 avg.loss:  7.760231 ETA:   0h 0m46s 34.8% words/sec/thread:  134336 lr:  0.228183 avg.loss:  3.155874 ETA:   0h 0m33s 64.1% words/sec/thread:  133896 lr:  0.125484 avg.loss:  1.908524 ETA:   0h 0m18s 133733 lr:  0.022800 avg.loss:  1.369924 ETA:   0h 0m 3s 133613 lr:  0.005498 avg.loss:  1.312806 ETA:   0h 0m 0s100.0% words/sec/thread:  133614 lr:  0.000083 avg.loss:  1.294946 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=7, epoch=40, neg=7, wordNgrams=5
Weighted F1=0.8208, Train F1=0.9655, Test F1=0.8277 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  120997 lr:  0.000000 avg.loss:  1.581966 ETA:   0h 0m 0s 33.9% words/sec/thread:  122061 lr:  0.231259 avg.loss:  3.818152 ETA:   0h 0m37s 36.4% words/sec/thread:  122194 lr:  0.222462 avg.loss:  3.655455 ETA:   0h 0m35s 52.7% words/sec/thread:  122091 lr:  0.165672 avg.loss:  2.653545 ETA:   0h 0m26s 74.0% words/sec/thread:  121741 lr:  0.090974 avg.loss:  1.993558 ETA:   0h 0m14s 97.8% words/sec/thread:  121282 lr:  0.007779 avg.loss:  1.606456 ETA:   0h 0m 1s


lr=0.35, dim=80, ws=3, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7968, Train F1=0.9458, Test F1=0.8074 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  118699 lr: -0.000012 avg.loss:  1.721961 ETA:   0h 0m 0s  6.8% words/sec/thread:  120477 lr:  0.326188 avg.loss:  9.523294 ETA:   0h 0m53s 12.9% words/sec/thread:  120118 lr:  0.304961 avg.loss:  7.185832 ETA:   0h 0m50s 0.301937 avg.loss:  6.952355 ETA:   0h 0m49s 15.1% words/sec/thread:  120009 lr:  0.297099 avg.loss:  6.593024 ETA:   0h 0m48s 33.5% words/sec/thread:  119317 lr:  0.232721 avg.loss:  3.994821 ETA:   0h 0m38s 48.8% words/sec/thread:  119122 lr:  0.179200 avg.loss:  3.037055 ETA:   0h 0m29s 51.8% words/sec/thread:  119204 lr:  0.168814 avg.loss:  2.907773 ETA:   0h 0m27s 0.000000 avg.loss:  1.721961 ETA:   0h 0m 0s


lr=0.35, dim=80, ws=5, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7959, Train F1=0.9453, Test F1=0.8068 



Read 7M words
Number of words:  149031
Number of labels: 870
Progress: 100.0% words/sec/thread:  118287 lr:  0.000000 avg.loss:  1.587188 ETA:   0h 0m 0s  0.5% words/sec/thread:  124798 lr:  0.348094 avg.loss: 17.751329 ETA:   0h 0m55s  1.4% words/sec/thread:  124158 lr:  0.344960 avg.loss: 15.637154 ETA:   0h 0m54s  2.3% words/sec/thread:  123423 lr:  0.341864 avg.loss: 14.313006 ETA:   0h 0m54s  3.2% words/sec/thread:  123206 lr:  0.338758 avg.loss: 13.242870 ETA:   0h 0m54s  4.1% words/sec/thread:  122490 lr:  0.335721 avg.loss: 12.378911 ETA:   0h 0m54s  4.9% words/sec/thread:  122029 lr:  0.332685 avg.loss: 11.534034 ETA:   0h 0m53s  5.8% words/sec/thread:  121600 lr:  0.329666 avg.loss: 10.736617 ETA:   0h 0m53s  6.7% words/sec/thread:  121375 lr:  0.326630 avg.loss:  9.939037 ETA:   0h 0m53s  7.5% words/sec/thread:  120986 lr:  0.323641 avg.loss:  9.323022 ETA:   0h 0m52s  8.4% words/sec/thread:  120732 lr:  0.320638 avg.loss:  8.856715 ETA:   0h 0m52s  9.3% words/sec/thread:  1

lr=0.35, dim=80, ws=7, epoch=40, neg=7, wordNgrams=7
Weighted F1=0.7967, Train F1=0.9456, Test F1=0.8070 

