In [30]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

table = pd.read_excel("/content/drive/MyDrive/doc_comment_summary.xlsx", header= None)
table = table.dropna() # Можно не использовать, так как drop() все равно уберет пропуски из-за их типа NaN
table = table.drop(table[(table[1] == 0) | (table[1].apply(type)!=int) | (table[0].apply(type)!=str)].index, axis=0)
table[2] = (table[1] > 0).astype(bool)
train, test = train_test_split(table, test_size= 0.2, random_state = 22)
for sample in [train, test]:
  print(sample.shape[0]) # Количество строк в каждой выборке
  print(sample[sample[2] == 1].shape[0] / sample.shape[0]) # Видно, что доля положительных комментариев в тестовой и тренировочной выборках близки, данные распределены равномерно

10315
0.1679108095007271
2579
0.16556804963164018


In [None]:

pipeline_base = Pipeline([
  ("vect", TfidfVectorizer()),
  ("clf", LogisticRegression(random_state=123)), # Воспользуемся логистической регрессией
  ], verbose = True)
Y_train = train[2].astype('float')
Y_test = test[2].astype('float')


param = {'clf__penalty': ['l1','l2'],
        'clf__C': [1.0, 5, 10],
        'clf__solver': ['lbfgs', 'liblinear'],
        'clf__max_iter': [25, 50, 100],
         'vect__ngram_range': [(1,1),(2,2)],
          'vect__max_df': [0.5, 1],
        } # Подбираемые параметры логистической регрессии и извлекателя признаков

grid = GridSearchCV(estimator=pipeline_base,
            param_grid= param,
            cv=3,
            scoring= 'accuracy', # Вывод метрики 
            verbose = 3 ) # Вывод большего количества информации в процессе обучения




# Базовая Модель без подбора параметров


In [None]:
pipeline_base.fit(train[0], Y_train) # Обучение без подбора параметров
result_proba = pipeline_base.predict_proba(test[0]) # Массив вероятностей тональности в каждой строке
result = pipeline_base.predict(test[0]) # Массив предсказанных тональностей

[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.6s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.7s


In [None]:
accuracy_score(Y_test, result) # Метрика правильности без подбора параметров


0.8631252423419931

# Модель с подбираемыми параметрами

In [None]:
grid.fit(train[0], Y_train) # Обучение с лучшими параметрами


Fitting 3 folds for each of 144 candidates, totalling 432 fits
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.6s
[CV 1/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l1, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=nan total time=   2.6s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.1s
[CV 2/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l1, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=nan total time=   2.1s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s
[CV 3/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l1, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=nan total time=   1.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.8s
[CV 1/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l1, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=nan total time=   3.8s
[Pipeline] .............. (step 1 of 2) P

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.0s
[CV 1/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.855 total time=   3.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.6s
[CV 2/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.854 total time=   4.7s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.2s
[CV 3/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.850 total time=   3.4s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   5.5s
[CV 1/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.839 total time=  10.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   4.3s
[CV 2/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.837 total time=   8.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   5.0s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   3.5s
[CV 3/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.837 total time=   9.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.4s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 1/3] END clf__C=1.0, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.832 total time=   2.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 2/3] END clf__C=1.0, clf__max_iter=2

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.8s
[CV 1/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.869 total time=   3.0s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.0s
[CV 2/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.868 total time=   4.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.2s
[CV 3/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.862 total time=   3.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   4.6s
[CV 1/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.852 total time=   9.6s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   4.3s
[CV 2/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.855 total time=   9.7s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   4.7s
[CV 3/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.851 total time=   9.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.5s
[CV 1/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.837 total time=   2.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 2/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.842 total time=   2.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.6s
[CV 3/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.836 total time=   2.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   3.0s
[CV 1/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.837 total time=   8.0s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   3.4s
[CV 2/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.844 total time=   7.6s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.9s
[CV 3/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.838 total time=   7.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 1/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=liblinear, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.869 total time=   2.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 2/3] END clf__C=5, clf__max_iter=25, clf__penalty=l2, clf__solver=liblinear, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.868 total time=   2.7s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.5s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.6s
[CV 3/3] END clf__C=5, clf__max_iter=2

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.0s
[CV 1/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.869 total time=   4.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.3s
[CV 2/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.868 total time=   4.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.1s
[CV 3/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.862 total time=   5.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   9.0s
[CV 1/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.851 total time=  13.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   9.8s
[CV 2/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.856 total time=  14.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   8.6s
[CV 3/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.852 total time=  13.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 1/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.837 total time=   2.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 2/3] END clf__C=5, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.842 total time=   2.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.2s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.5s
[CV 3/3] END clf__C=5, clf__max_iter=50, clf__pe



[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.8s
[CV 2/3] END clf__C=10, clf__max_iter=25, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.832 total time=   5.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.0s
[CV 3/3] END clf__C=10, clf__max_iter=25, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.823 total time=   1.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.9s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 1/3] END clf__C=10, clf__max_iter=25, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.831 total time=   3.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 2/3] END clf__C=10, clf__max_it

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.2s
[CV 1/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.873 total time=   3.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.8s
[CV 2/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.869 total time=   4.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.0s
[CV 3/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.867 total time=   4.1s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   4.3s
[CV 1/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.851 total time=   9.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   4.3s
[CV 2/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.855 total time=  10.0s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   5.2s
[CV 3/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.849 total time=  10.0s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 1/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.836 total time=   2.1s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 2/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.844 total time=   2.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 3/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.837 total time=   2.1s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.9s
[CV 1/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.837 total time=   8.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.9s
[CV 2/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.843 total time=   6.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.9s
[CV 3/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.839 total time=   7.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 1/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=liblinear, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.866 total time=   2.4s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.4s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 2/3] END clf__C=10, clf__max_iter=25, clf__penalty=l2, clf__solver=liblinear, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.864 total time=   2.4s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.2s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.6s
[CV 3/3] END clf__C=10, clf__max_it



[Pipeline] ............... (step 2 of 2) Processing clf, total=   7.0s
[CV 2/3] END clf__C=10, clf__max_iter=50, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.832 total time=   9.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.0s
[CV 3/3] END clf__C=10, clf__max_iter=50, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.823 total time=   1.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.4s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.2s
[CV 1/3] END clf__C=10, clf__max_iter=50, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.831 total time=   5.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.9s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 2/3] END clf__C=10, clf__max_it

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   3.3s
[CV 1/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.866 total time=   5.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   2.1s
[CV 2/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.864 total time=   4.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.9s
[CV 3/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.858 total time=   4.1s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   7.9s
[CV 1/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.851 total time=  13.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   7.9s
[CV 2/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.854 total time=  13.3s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   7.8s
[CV 3/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.849 total time=  13.1s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.2s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 1/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.836 total time=   2.2s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.1s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.8s
[CV 2/3] END clf__C=10, clf__max_iter=50, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.844 total time=   3.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.5s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.4s
[CV 3/3] END clf__C=10, clf__max_iter=50, clf



[Pipeline] ............... (step 2 of 2) Processing clf, total=  17.2s
[CV 2/3] END clf__C=10, clf__max_iter=100, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.832 total time=  19.0s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.3s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.0s
[CV 3/3] END clf__C=10, clf__max_iter=100, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(1, 1);, score=0.823 total time=   1.8s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   4.5s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.2s
[CV 1/3] END clf__C=10, clf__max_iter=100, clf__penalty=l1, clf__solver=liblinear, vect__max_df=1, vect__ngram_range=(2, 2);, score=0.831 total time=   5.5s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.0s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[CV 2/3] END clf__C=10, clf__max

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[Pipeline] ............... (step 2 of 2) Processing clf, total=   3.8s
[CV 2/3] END clf__C=10, clf__max_iter=100, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.864 total time=   5.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   1.7s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   5.7s
[CV 3/3] END clf__C=10, clf__max_iter=100, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(1, 1);, score=0.857 total time=   7.9s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.6s
[Pipeline] ............... (step 2 of 2) Processing clf, total=  14.0s
[CV 1/3] END clf__C=10, clf__max_iter=100, clf__penalty=l2, clf__solver=lbfgs, vect__max_df=0.5, vect__ngram_range=(2, 2);, score=0.850 total time=  18.4s
[Pipeline] .............. (step 1 of 2) Processing vect, total=   3.6s
[Pipeline] ............... (step 2 of 2) Processing clf, total=  12.5s
[CV 2/3] END clf__C=10, clf__max_iter=

108 fits failed out of a total of 432.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
108 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 405, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 54, in _chec

[Pipeline] .............. (step 1 of 2) Processing vect, total=   2.1s
[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
grid.best_params_

{'clf__C': 10,
 'clf__max_iter': 25,
 'clf__penalty': 'l2',
 'clf__solver': 'lbfgs',
 'vect__max_df': 0.5,
 'vect__ngram_range': (1, 1)}

In [None]:
result_with_grid = grid.predict(test[0]) # Массив предсказанных тональностей с лучшими параметрами
accuracy_score(Y_test, result_with_grid) # И лучшая метрика правильности

NotFittedError: ignored

#Итоги: Таблица со всеми параметрами и вывод лучшего набора


In [None]:
Result_table = pd.DataFrame(grid.cv_results_).drop(['mean_fit_time','mean_score_time','std_score_time','std_test_score','rank_test_score','split0_test_score','split1_test_score','split2_test_score'], axis=1)
Result_table

Unnamed: 0,std_fit_time,param_clf__C,param_clf__max_iter,param_clf__penalty,param_clf__solver,param_vect__max_df,param_vect__ngram_range,params,mean_test_score
0,0.478175,1.0,25,l1,lbfgs,0.5,"(1, 1)","{'clf__C': 1.0, 'clf__max_iter': 25, 'clf__pen...",
1,0.665432,1.0,25,l1,lbfgs,0.5,"(2, 2)","{'clf__C': 1.0, 'clf__max_iter': 25, 'clf__pen...",
2,0.034263,1.0,25,l1,lbfgs,1,"(1, 1)","{'clf__C': 1.0, 'clf__max_iter': 25, 'clf__pen...",
3,0.506091,1.0,25,l1,lbfgs,1,"(2, 2)","{'clf__C': 1.0, 'clf__max_iter': 25, 'clf__pen...",
4,0.181388,1.0,25,l1,liblinear,0.5,"(1, 1)","{'clf__C': 1.0, 'clf__max_iter': 25, 'clf__pen...",0.851284
...,...,...,...,...,...,...,...,...,...
139,0.595975,10,100,l2,lbfgs,1,"(2, 2)","{'clf__C': 10, 'clf__max_iter': 100, 'clf__pen...",0.839457
140,0.007729,10,100,l2,liblinear,0.5,"(1, 1)","{'clf__C': 10, 'clf__max_iter': 100, 'clf__pen...",0.862336
141,0.482774,10,100,l2,liblinear,0.5,"(2, 2)","{'clf__C': 10, 'clf__max_iter': 100, 'clf__pen...",0.851091
142,0.423905,10,100,l2,liblinear,1,"(1, 1)","{'clf__C': 10, 'clf__max_iter': 100, 'clf__pen...",0.838876


In [None]:
Result_table[Result_table['mean_test_score'] == Result_table['mean_test_score'].max()] # Вывод лучшего набора

Unnamed: 0,std_fit_time,param_clf__C,param_clf__max_iter,param_clf__penalty,param_clf__solver,param_vect__max_df,param_vect__ngram_range,params,mean_test_score
104,0.341343,10,25,l2,lbfgs,0.5,"(1, 1)","{'clf__C': 10, 'clf__max_iter': 25, 'clf__pena...",0.869607


Значение метрики, если модель всегда возвращала бы негативную оценку

In [None]:
only_neg = Y_test.copy()
only_neg[only_neg >= 0] = 0.0 
accuracy_score(Y_test, only_neg)

0.8344319503683598