In [135]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, classification_report
from transformers import RobertaTokenizer, RobertaModel
import torch

df = pd.read_csv('homework2_data.csv', sep = ',')

df['word_count'] = df['text'].apply(lambda x: len(str(x).split()))
min_words = 15
df = df[df['word_count'] >= min_words].copy()


# df = df.sample(frac = 0.03)
print(df)

            id                                               text author  \
0      id26305  This process, however, afforded me no means of...    EAP   
2      id11008  In his left hand was a gold snuff box, from wh...    EAP   
3      id27763  How lovely is spring As we looked from Windsor...    MWS   
4      id12958  Finding nothing else, not even gold, the Super...    HPL   
5      id22965  A youth passed in solitude, my best years spen...    MWS   
...        ...                                                ...    ...   
19572  id03325  But these and other difficulties attending res...    EAP   
19573  id07567  Stress of weather drove us up the Adriatic Gul...    MWS   
19574  id17718  I could have fancied, while I looked at it, th...    EAP   
19577  id17513  For an item of news like this, it strikes us i...    EAP   
19578  id00393  He laid a gnarled claw on my shoulder, and it ...    HPL   

       word_count  
0              41  
2              36  
3              34  
4      

In [136]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder

df = df.drop(columns=['id'])

categorial_features = ['author']

ct = ColumnTransformer(
       transformers=[
           ('ordinal', OrdinalEncoder(), categorial_features)
       ],
       remainder='passthrough',
       verbose_feature_names_out=False
   )

encoded = ct.fit_transform(df)
df = pd.DataFrame(encoded, columns=ct.get_feature_names_out())
df['author'] = df['author'].astype(int)
df


Unnamed: 0,author,text,word_count
0,0,"This process, however, afforded me no means of...",41
1,0,"In his left hand was a gold snuff box, from wh...",36
2,2,How lovely is spring As we looked from Windsor...,34
3,1,"Finding nothing else, not even gold, the Super...",27
4,2,"A youth passed in solitude, my best years spen...",83
...,...,...,...
14838,0,But these and other difficulties attending res...,51
14839,2,Stress of weather drove us up the Adriatic Gul...,27
14840,0,"I could have fancied, while I looked at it, th...",20
14841,0,"For an item of news like this, it strikes us i...",15


In [137]:


class_column = 'author'
print(df[class_column].value_counts())
min_size = df[class_column].value_counts().min()

print('min class size =', min_size)

author
0    5386
1    4793
2    4664
Name: count, dtype: int64
min class size = 4664


In [138]:
df_downsampled = pd.DataFrame()
for class_type in pd.unique(df[class_column].values):
    sampled_class_df = df[df[class_column] == class_type].sample(min_size, random_state=777)
    df_downsampled = pd.concat([df_downsampled, sampled_class_df], ignore_index=True)
df_downsampled = df_downsampled[df_downsampled[class_column].notnull()]
df_downsampled = df_downsampled[df_downsampled['text'].notnull()]
print(df_downsampled)
print(df_downsampled[class_column].value_counts())
df = df_downsampled

       author                                               text word_count
0           0  Yes: the king is coming See the people are agh...         22
1           0  Its close resemblance to the medicinal leech c...         16
2           0  The opinion of Bob, the devil who kept dark ab...         37
3           0  Cut loose, then, in high spirits, and rose gen...         27
4           0  And the evening closed in upon me thus and the...         79
...       ...                                                ...        ...
13987       1  According to Mwanu, the grey city and the hybr...         23
13988       1  My host now took my hand to draw me to one of ...         35
13989       1  It was designed to open in my direction, hence...         29
13990       1  He had awaked to find himself standing bloody ...         26
13991       1  I climbed gentle hills from whose summits I co...         35

[13992 rows x 3 columns]
author
0    4664
2    4664
1    4664
Name: count, dtype: int64

In [139]:
from sklearn.model_selection import train_test_split

# Шаг 2: Разделение данных на train/test (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    df['text'].values,
    df['author'].values,
    test_size=0.2,
    random_state=42,
    stratify=df['author']  # для сохранения пропорций классов
)

print(f"Training set size: {len(X_train)} ({len(X_train)/len(df)*100:.1f}%)")
print(f"Test set size: {len(X_test)} ({len(X_test)/len(df)*100:.1f}%)")



Training set size: 11193 (80.0%)
Test set size: 2799 (20.0%)


In [140]:
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
# bert_model = RobertaModel.from_pretrained('roberta-base')

# # Перевод модели в режим оценки (отключение dropout и т.д.)
# bert_model.eval()

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# bert_model.to(device)

from sentence_transformers import SentenceTransformer

print("Loading all-roberta-large-v1 model...")
model = SentenceTransformer('all-roberta-large-v1')

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"Model loaded on device: {device}")


Loading all-roberta-large-v1 model...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/391 [00:00<?, ?it/s]

[1mRobertaModel LOAD REPORT[0m from: sentence-transformers/all-roberta-large-v1
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


tokenizer_config.json:   0%|          | 0.00/328 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Model loaded on device: cpu


In [141]:
from tqdm import tqdm

# def get_embeddings(texts, batch_size=8):
#     embeddings = []
    
#     # Внешний прогресс-бар для текстов
#     with tqdm(total=len(texts), desc="Всего текстов", position=0) as pbar_texts:
#         # Внутренний прогресс-бар для батчей
#         with tqdm(total=(len(texts) + batch_size - 1) // batch_size, 
#                   desc="Батчи", position=1, leave=False) as pbar_batches:
            
#             for i in range(0, len(texts), batch_size):
#                 batch_texts = texts[i:i + batch_size]
#                 actual_size = len(batch_texts)
                
#                 encoded = tokenizer(
#                     batch_texts.tolist() if isinstance(batch_texts, np.ndarray) else batch_texts,
#                     padding=True,
#                     truncation=True,
#                     max_length=512,
#                     return_tensors='pt'
#                 )
                
#                 encoded = {key: val.to(device) for key, val in encoded.items()}
                
#                 with torch.no_grad():
#                     outputs = bert_model(**encoded)
#                     cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
#                     embeddings.append(cls_embeddings)
                
#                 pbar_texts.update(actual_size)
#                 pbar_batches.update(1)
    
#     return np.vstack(embeddings)



def get_embeddings(texts, batch_size=8):
    """
    Get embeddings using all-mpnet-base-v2 model.
    Sentence Transformers handles tokenization and pooling internally.
    """
    embeddings = []
    
    # Convert to list if numpy array
    if isinstance(texts, np.ndarray):
        texts = texts.tolist()
    
    # Process in batches with progress bar
    with tqdm(total=len(texts), desc="Generating embeddings") as pbar:
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            
            # Sentence Transformers encode method handles everything
            batch_embeddings = model.encode(
                batch_texts,
                convert_to_numpy=True,
                show_progress_bar=False,
                device=device
            )
            
            embeddings.append(batch_embeddings)
            pbar.update(len(batch_texts))
    
    return np.vstack(embeddings)


In [142]:
import numpy as np

def extract_additional_features(texts):
    """Извлекаем дополнительные признаки"""
    features = []
    
    for text in texts:
        text_str = str(text)
        
        feat = {
            'length': len(text_str),  # Длина текста
            'word_count': len(text_str.split()),  # Количество слов
            'avg_word_length': np.mean([len(w) for w in text_str.split()]) if text_str.split() else 0,
        }
        features.append(list(feat.values()))
    
    return np.array(features)

print("="*50)
print("ЭМБЕДДИНГИ + ДОПОЛНИТЕЛЬНЫЕ ПРИЗНАКИ")
print("="*50)

# Получаем эмбеддинги
X_train_emb = get_embeddings(X_train)
X_test_emb = get_embeddings(X_test)

# Добавляем дополнительные признаки
X_train_extra = extract_additional_features(X_train)
X_test_extra = extract_additional_features(X_test)

# Объединяем
X_train_embeddings = np.hstack([X_train_emb, X_train_extra])
X_test_embeddings = np.hstack([X_test_emb, X_test_extra])

print(f"Эмбеддинги размерность: {X_train_emb.shape[1]}")
print(f"Дополнительные признаки: {X_train_extra.shape[1]}")
print(f"Итоговая размерность: {X_train_embeddings.shape[1]}")


ЭМБЕДДИНГИ + ДОПОЛНИТЕЛЬНЫЕ ПРИЗНАКИ


Generating embeddings: 100%|██████████| 11193/11193 [32:57<00:00,  5.66it/s]
Generating embeddings: 100%|██████████| 2799/2799 [11:21<00:00,  4.10it/s]

Эмбеддинги размерность: 1024
Дополнительные признаки: 3
Итоговая размерность: 1027





In [143]:
print(f"Размерность эмбеддингов: {X_train_embeddings.shape[1]}")

print("Проверка меток классов:")
print(f"Тип y_train: {type(y_train)}")
print(f"Dtype y_train: {y_train.dtype if hasattr(y_train, 'dtype') else 'N/A'}")
print(f"Первые 10 элементов y_train: {y_train[:10]}")
print(f"Уникальные значения: {np.unique(y_train)}")
print(f"Количество классов: {len(np.unique(y_train))}")


Размерность эмбеддингов: 1027
Проверка меток классов:
Тип y_train: <class 'numpy.ndarray'>
Dtype y_train: int64
Первые 10 элементов y_train: [0 2 0 2 1 0 2 0 1 0]
Уникальные значения: [0 1 2]
Количество классов: 3


In [None]:
# from sklearn.model_selection import GridSearchCV, StratifiedKFold
# from xgboost import XGBClassifier
# from sklearn.metrics import f1_score, classification_report

# print("="*50)
# print("БЫСТРЫЙ GRID SEARCH ДЛЯ XGBOOST")
# print("="*50)

# param_grid_quick = {
#     'n_estimators': [200, 300],
#     'max_depth': [5, 7, 9],
#     'learning_rate': [0.05, 0.1],
#     'subsample': [0.8, 1.0],
#     'colsample_bytree': [0.8, 1.0],
# }

# print(f"Комбинаций: {2*3*2*2*2} = 48")
# print(f"С CV=3: 144 обучений\n")

# xgb = XGBClassifier(
#     random_state=42,
#     n_jobs=4,
#     eval_metric='mlogloss',
#     verbosity=3
# )

# cv_strategy = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# grid_search = GridSearchCV(
#     estimator=xgb,
#     param_grid=param_grid_quick,
#     cv=cv_strategy,
#     scoring='f1_weighted',
#     scoring='f1_weighted',
#     n_jobs=4,
#     verbose=3  # Показывать прогресс
# )

# print("\n" + "="*50)
# print("ЛУЧШИЕ ПАРАМЕТРЫ")
# print("="*50)
# for key, value in grid_search.best_params_.items():
#     print(f"  {key}: {value}")

# grid_search.fit(X_train_embeddings, y_train)

# from xgboost import XGBClassifier
# from sklearn.metrics import f1_score, classification_report

# print("="*50)
# print("XGBOOST КЛАССИФИКАТОР")
# print("="*50)

# xgb_clf = XGBClassifier(
#     n_estimators=300,
#     max_depth=5,
#     learning_rate=0.1,
#     subsample=0.8,
#     colsample_bytree=0.8,
#     random_state=42,
#     n_jobs=4,
#     eval_metric='mlogloss'
# )

# xgb_clf.fit(X_train_embeddings, y_train)


from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

print("="*50)
print("АНСАМБЛЬ МОДЕЛЕЙ")
print("="*50)

# Создаем три модели
rf_clf = RandomForestClassifier(
    n_estimators=300,
    max_depth=30,
    min_samples_split=10,
    min_samples_leaf=4,
    random_state=42, 
    n_jobs=4,
    verbose=0
)

xgb_clf = XGBClassifier(
    n_estimators=300, 
    max_depth=5, 
    colsample_bytree = 0.8,
    learning_rate = 0.1,
    subsample = 0.8,
    random_state=42, 
    n_jobs=4,
    verbose=0
)

lgbm_clf = LGBMClassifier(
    n_estimators=300, 
    max_depth=7, 
    colsample_bytree=0.8,
    subsample=0.8,
    learning_rate=0.1,
    random_state=42, 
    n_jobs=4,
    verbose=0
)

# Объединяем в ансамбль
ensemble = VotingClassifier(
    estimators=[
        ('rf', rf_clf),
        ('xgb', xgb_clf),
        ('lgbm', lgbm_clf)
    ],
    voting='soft',  # Использует вероятности
    n_jobs=4,
    verbose=3
)

print("Обучение ансамбля из 3 моделей...")
ensemble.fit(X_train_embeddings, y_train)


АНСАМБЛЬ МОДЕЛЕЙ
Обучение ансамбля из 3 моделей...


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


building tree 1 of 300
building tree 2 of 300
building tree 3 of 300
building tree 4 of 300
building tree 5 of 300
building tree 6 of 300
building tree 7 of 300
building tree 8 of 300


Parameters: { "verbose" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


building tree 9 of 300
building tree 10 of 300
building tree 11 of 300
building tree 12 of 300
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.000000
[LightGBM] [Debug] init for col-wise cost 0.000053 seconds, init for row-wise cost 0.060177 seconds
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.091620 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 261735
[LightGBM] [Info] Number of data points in the train set: 11193, number of used features: 1027
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 13 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 6
building tree 14 of 300
[LightGBM] [Debug] Trained a t

[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    3.5s


building tree 29 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 30 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 31 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 32 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 33 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 34 of 300
[LightGBM] 

[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:   17.0s


building tree 125 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 126 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 127 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 128 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 129 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 130 of 300
[Ligh

[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:   50.2s


[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 285 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 286 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 287 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
building tree 288 of 300
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and 

[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:   57.4s finished


[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 7
[LightGBM] [Debug] Traine

0,1,2
,estimators,"[('rf', ...), ('xgb', ...), ...]"
,voting,'soft'
,weights,
,n_jobs,4
,flatten_transform,True
,verbose,3

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,30
,min_samples_split,10
,min_samples_leaf,4
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.8
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,boosting_type,'gbdt'
,num_leaves,31
,max_depth,7
,learning_rate,0.1
,n_estimators,300
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.0
,min_child_weight,0.001


In [145]:
y_pred = ensemble.predict(X_test_embeddings)

# Шаг 8: Вычисление метрик
# F1-score для многоклассовой классификации
f1_macro = f1_score(y_test, y_pred, average='macro')
f1_micro = f1_score(y_test, y_pred, average='micro')
f1_weighted = f1_score(y_test, y_pred, average='weighted')

print("\n" + "="*50)
print("РЕЗУЛЬТАТЫ")
print("="*50)
print(f"\nF1-score (macro):    {f1_macro:.4f}")
print(f"F1-score (micro):    {f1_micro:.4f}")
print(f"F1-score (weighted): {f1_weighted:.4f}")

# Детальный отчет по классам
print("\n" + "="*50)
print("ДЕТАЛЬНЫЙ ОТЧЕТ ПО КЛАССАМ")
print("="*50)
print(classification_report(y_test, y_pred))

# Дополнительно: важность признаков
print("\nТочность на обучающей выборке:", ensemble.score(X_train_embeddings, y_train))
print("Точность на тестовой выборке:", ensemble.score(X_test_embeddings, y_test))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.



РЕЗУЛЬТАТЫ

F1-score (macro):    0.7865
F1-score (micro):    0.7867
F1-score (weighted): 0.7865

ДЕТАЛЬНЫЙ ОТЧЕТ ПО КЛАССАМ
              precision    recall  f1-score   support

           0       0.76      0.75      0.76       933
           1       0.80      0.82      0.81       933
           2       0.80      0.80      0.80       933

    accuracy                           0.79      2799
   macro avg       0.79      0.79      0.79      2799
weighted avg       0.79      0.79      0.79      2799



[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.3s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.3s finished



Точность на обучающей выборке: 1.0
Точность на тестовой выборке: 0.7867095391211146


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.1s finished
