In [119]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
# from sklearn.model_selection import GridSearchCV
import dask_ml.model_selection as dcv
from dask.diagnostics import ProgressBar
import joblib

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, TfidfTransformer

from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import f1_score, hamming_loss, make_scorer, accuracy_score, precision_score, recall_score

from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix
from skmultilearn.model_selection import iterative_train_test_split


In [47]:
DATA_DIR = "../../data/processed/"
INPUT_FILE_NAME = 'final_squash15_with_pos_ner_tm.parquet'
df = pd.read_parquet(DATA_DIR + INPUT_FILE_NAME)
df.head()

Unnamed: 0,speaker,headline,description,duration,tags,transcript,WC,clean_transcript,clean_transcript_string,sim_tags,squash15_tags,pos_sequence,ner_sequence,tm
0,Al Gore,Averting the climate crisis,With the same humor and humanity he exuded in ...,0:16:17,"cars,alternative energy,culture,politics,scien...","0:14\r\r\rThank you so much, Chris.\rAnd it's ...",2281.0,"b'[""thank"", ""chris"", ""truly"", ""great"", ""honor""...",thank chris truly great honor opportunity come...,"cars,solar system,energy,culture,politics,scie...","culture,politics,science,global issues,technology",VERB PROPN ADV ADJ NOUN NOUN VERB NOUN ADV ADV...,PERSON ORG ORG GPE LOC ORG PRODUCT GPE GPE PER...,"[0.04325945698517057, 0.0, 0.00142482934694180..."
1,Amy Smith,Simple designs to save a life,Fumes from indoor cooking fires kill more than...,0:15:06,"MacArthur grant,simplicity,industrial design,a...","0:11\r\r\rIn terms of invention,\rI'd like to ...",2687.0,"b'[""term"", ""invention"", ""like"", ""tell"", ""tale""...",term invention like tell tale favorite project...,"macarthur grant,simplicity,design,solar system...","design,global issues",NOUN NOUN SCONJ VERB PROPN ADJ NOUN VERB NOUN ...,GPE DATE CARDINAL DATE ORG PERSON LOC ORG GPE ...,"[0.013287880838036227, 0.0, 0.0, 0.00511725094..."
2,Ashraf Ghani,How to rebuild a broken state,Ashraf Ghani's passionate and powerful 10-minu...,0:18:45,"corruption,poverty,economics,investment,milita...","0:12\r\r\rA public, Dewey long ago observed,\r...",2506.0,"b'[""public"", ""dewey"", ""long"", ""ago"", ""observe""...",public dewey long ago observe constitute discu...,"corruption,inequality,science,investment,war,c...","science,culture,politics,global issues,business",ADJ PROPN ADV ADV VERB ADJ NOUN NOUN PROPN PRO...,DATE NORP ORDINAL DATE MONEY DATE DATE DATE EV...,"[0.0, 0.006699599134802422, 0.0, 0.00564851883..."
3,Burt Rutan,The real future of space exploration,"In this passionate talk, legendary spacecraft ...",0:19:37,"aircraft,flight,industrial design,NASA,rocket ...","0:11\r\r\rI want to start off by saying, Houst...",3092.0,"b'[""want"", ""start"", ""say"", ""houston"", ""problem...",want start say houston problem enter second ge...,"flight,design,nasa,science,invention,entrepren...","design,science,business",VERB NOUN VERB PROPN NOUN VERB ADJ NOUN NOUN N...,GPE ORDINAL ORG PERSON DATE DATE DATE TIME PER...,"[0.040282108339079505, 0.03732895646484358, 0...."
4,Chris Bangle,Great cars are great art,American designer Chris Bangle explains his ph...,0:20:04,"cars,industrial design,transportation,inventio...","0:12\r\r\rWhat I want to talk about is, as bac...",3781.0,"b'[""want"", ""talk"", ""background"", ""idea"", ""car""...",want talk background idea car art actually mea...,"cars,design,transportation,invention,technolog...","design,technology,business,science",VERB NOUN NOUN NOUN NOUN NOUN ADV ADJ NOUN NOU...,PERSON PRODUCT ORG ORG PERSON PERSON PERSON OR...,"[0.08049208168957463, 0.0, 0.0, 0.008031187136..."


In [48]:
df = df.dropna(subset=['squash15_tags'])
df = df.reset_index(drop=True)
df.iloc[:,:10].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2313 entries, 0 to 2312
Data columns (total 10 columns):
speaker                    2313 non-null object
headline                   2313 non-null object
description                2313 non-null object
duration                   2313 non-null object
tags                       2313 non-null object
transcript                 2313 non-null object
WC                         2313 non-null float64
clean_transcript           2313 non-null object
clean_transcript_string    2313 non-null object
sim_tags                   2313 non-null object
dtypes: float64(1), object(9)
memory usage: 180.8+ KB


In [49]:
X = df['clean_transcript_string']
labels = df[['squash15_tags']]

In [50]:
from sklearn.preprocessing import MultiLabelBinarizer

y = []
for index, row in labels.iterrows():
    y.append(set(row['squash15_tags'].split(',')))
    
mlb = MultiLabelBinarizer()
encoded_y = mlb.fit_transform(y)

In [51]:
def compile_vectors(series,num):
    big = np.zeros((len(series),num))
    for i in range(len(series)):
        array = series.iloc[i]
        big[i] = array
        return big

In [52]:
X_tm = df['tm']

array([[0.04325946, 0.        , 0.00142483, ..., 0.12471587, 0.        ,
        0.03529326],
       [0.01328788, 0.        , 0.        , ..., 0.08481789, 0.02866964,
        0.        ],
       [0.        , 0.0066996 , 0.        , ..., 0.0047375 , 0.0055694 ,
        0.08197532],
       ...,
       [0.05075222, 0.00120244, 0.        , ..., 0.01118185, 0.02436684,
        0.02354271],
       [0.11474112, 0.00318708, 0.        , ..., 0.        , 0.        ,
        0.02118649],
       [0.05883475, 0.00254283, 0.01706404, ..., 0.03623787, 0.06303276,
        0.        ]])

array([[0.04325946, 0.        , 0.00142483, ..., 0.12471587, 0.        ,
        0.03529326],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [58]:
pd.concat([df[['clean_transcript_string']], pd.DataFrame(np.array(list(X_tm)))], axis=1)

Unnamed: 0,clean_transcript_string,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,thank chris truly great honor opportunity come...,0.043259,0.000000,0.001425,0.000000,0.017688,0.003206,0.002644,0.017519,0.004639,0.000000,0.000000,0.000000,0.124716,0.000000,0.035293
1,term invention like tell tale favorite project...,0.013288,0.000000,0.000000,0.005117,0.034926,0.017484,0.020667,0.009246,0.005919,0.000000,0.001089,0.010383,0.084818,0.028670,0.000000
2,public dewey long ago observe constitute discu...,0.000000,0.006700,0.000000,0.005649,0.116568,0.000000,0.000000,0.026561,0.018581,0.001628,0.002034,0.000000,0.004737,0.005569,0.081975
3,want start say houston problem enter second ge...,0.040282,0.037329,0.000000,0.003867,0.029308,0.000000,0.008637,0.018175,0.000000,0.000000,0.023373,0.000000,0.050151,0.045343,0.003090
4,want talk background idea car art actually mea...,0.080492,0.000000,0.000000,0.008031,0.000000,0.000000,0.000000,0.048297,0.017136,0.000000,0.000560,0.000000,0.094348,0.034353,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2308,imagine walk even discover everybody room look...,0.000000,0.005743,0.001340,0.000633,0.000000,0.000000,0.000000,0.250697,0.000000,0.005070,0.012961,0.000000,0.000000,0.000000,0.001301
2309,pay close attention easy attention pull differ...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.201446,0.000000,0.000000,0.000000,0.049549,0.000000
2310,happy pic take senior college right dance prac...,0.050752,0.001202,0.000000,0.000000,0.018366,0.001270,0.010351,0.000000,0.001023,0.009487,0.000000,0.019727,0.011182,0.024367,0.023543
2311,sevenyearold grandson sleep hall wake lot morn...,0.114741,0.003187,0.000000,0.000000,0.000000,0.007198,0.007897,0.000000,0.006953,0.000000,0.000000,0.010003,0.000000,0.000000,0.021186


In [59]:
print(encoded_y[0])
print(len(encoded_y[0]))
print(mlb.inverse_transform(encoded_y)[:10])

[0 0 0 0 1 0 0 0 1 0 0 0 1 1 1]
15
[('culture', 'global issues', 'politics', 'science', 'technology'), ('design', 'global issues'), ('business', 'culture', 'global issues', 'politics', 'science'), ('business', 'design', 'science'), ('business', 'design', 'science', 'technology'), ('biodiversity', 'biomechanics', 'science', 'technology'), ('design', 'entertainment', 'media', 'science', 'technology'), ('culture', 'design'), ('business', 'culture', 'design', 'global issues', 'media', 'science', 'technology'), ('culture', 'global issues', 'science')]


In [35]:
from skmultilearn.model_selection import iterative_train_test_split

X_train, y_train, X_test, y_test = iterative_train_test_split(X.values.reshape(len(X.values), 1), encoded_y, test_size = 0.2)
X_train = pd.DataFrame(X_train)[0]
X_test = pd.DataFrame(X_test)[0]

In [63]:
from skmultilearn.model_selection import iterative_train_test_split

X_train, y_train, X_test, y_test = iterative_train_test_split(pd.concat([df[['clean_transcript_string']], pd.DataFrame(np.array(list(X_tm)))], axis=1).values, encoded_y, test_size = 0.2)
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)

In [64]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,term invention like tell tale favorite project...,0.0132879,0,0,0.00511725,0.0349261,0.0174842,0.0206666,0.00924614,0.00591928,0,0.00108856,0.0103826,0.0848179,0.0286696,0
1,public dewey long ago observe constitute discu...,0,0.0066996,0,0.00564852,0.116568,0,0,0.0265611,0.0185813,0.00162754,0.00203428,0,0.0047375,0.0055694,0.0819753
2,want start say houston problem enter second ge...,0.0402821,0.037329,0,0.00386679,0.0293075,0,0.00863695,0.0181755,0,0,0.0233732,0,0.0501507,0.0453433,0.00308957
3,break ask people comment age debate comment un...,0,0.0112228,0,0.163766,0.0033398,0.050081,0,0.00376791,0,0,0.00220388,0,0.0255144,0.00717125,0.00998504
4,music sound silence simon garfunkel hello voic...,0.0622724,0,0.024305,0.00345465,0.0115958,0,0,0,0.00710706,0,0,0,0.00844518,0.120207,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1823,imagine smartphone miniaturize hook directly b...,0.0611207,0.00254596,0,0,0,0,0,0,0,0.0339163,0,0,0,0.0778498,0.0562354
1824,imagine walk even discover everybody room look...,0,0.00574282,0.00134005,0.00063271,0,0,0,0.250697,0,0.00507013,0.0129609,0,0,0,0.00130057
1825,pay close attention easy attention pull differ...,0,0,0,0,0,0,0,0,0,0.201446,0,0,0,0.0495489,0
1826,sevenyearold grandson sleep hall wake lot morn...,0.114741,0.00318708,0,0,0,0.00719808,0.00789727,0,0.00695262,0,0,0.0100033,0,0,0.0211865


In [22]:
print(y_train.sum(axis=0))
print(y_test.sum(axis=0))

[ 172  176  279  142  916  382  228  181  543  296  162  229  159 1174
  622]
[ 46  44  70  43 239  95  57  37 136  89  55  50  40 293 165]


0       term invention like tell tale favorite project...
1       public dewey long ago observe constitute discu...
2       want start say houston problem enter second ge...
3       break ask people comment age debate comment un...
4       music sound silence simon garfunkel hello voic...
                              ...                        
1820    imagine smartphone miniaturize hook directly b...
1821    imagine walk even discover everybody room look...
1822    pay close attention easy attention pull differ...
1823    sevenyearold grandson sleep hall wake lot morn...
1824    michael brown engineer innovator inventor insp...
Name: 0, Length: 1825, dtype: object

In [71]:
{**vectorizer_params, **lr_params}

({'clf__classifier': [LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                      intercept_scaling=1, l1_ratio=None, max_iter=100,
                      multi_class='warn', n_jobs=None, penalty='l2',
                      random_state=None, solver='warn', tol=0.0001, verbose=0,
                      warm_start=False)],
  'clf__classifier__penalty': ['l1', 'l2'],
  'clf__classifier__C': [0.01, 0.1, 1, 10, 100],
  'clf__classifier__solver': ['liblinear'],
  'clf__classifier__class_weight': ['balanced']},)

## Gridsearch for the best single model for all labels

### References 
http://scikit.ml/api/skmultilearn.problem_transform.br.html

https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter

http://scikit.ml/stratification.html

https://stackoverflow.com/questions/12632992/gridsearch-for-an-estimator-inside-a-onevsrestclassifier/12637528#12637528

### Binary Relevance

In [108]:
# TODO: 
# 1. Check if TfidfTransformer use_idf=False is the same as Countvectorizer? or there are other metrics to suppress
# 2. Get scoring function to work, hamming? -- kinda done
# 3. Balanced class labels
# 4. Set better param ranges
# 5. Remove vectorizer step once we decide on which is better, then use sparse csr and hopefully it trains faster

# param_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
param_range = [1, 2, 3, 4, 5]
param_range_lr = [1.0, 0.5, 0.1]

# Set params, comment out as see fit

vectorizer_params = {
#     'vectorizer__min_df': np.linspace(0.005, 0.05, 5),
#     'vectorizer__ngram_range': [(1, 1), (1, 2)], # This shit blows up your memory
#     'tfidf__norm': ('l1', 'l2'),
#     'tfidf__use_idf': [True, False],
}

vectorizer = Pipeline(steps=[
    ('vec', TfidfVectorizer()),
])

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('vectorizer', vectorizer, 0),
        ('num', numeric_transformer, list(range(1, 16))),
    ])


lr_params = {
    'clf__classifier': [LogisticRegression()],
        'clf__classifier__penalty': ['l1', 'l2'],
        'clf__classifier__C': [0.01, 0.1, 1, 10, 50, 100, 500],
        'clf__classifier__solver': ['liblinear'],
        'clf__classifier__class_weight': ['balanced'],
}

svc_params = {
    'clf__classifier': [SVC()],
        'clf__classifier__kernel': ['linear', 'rbf'],
        'clf__classifier__C': param_range, # np.logspace(-1, 2, 10),
        'clf__classifier__gamma': ['auto'], # np.logspace(-1, 1, 10)
        'clf__classifier__probability': [True],
        'clf__classifier__class_weight': ['balanced'],
}

rf_params = {
    'clf__classifier': [RandomForestClassifier()],
        'clf__classifier__criterion': ['gini', 'entropy'],
        'clf__classifier__min_samples_leaf': param_range,
        'clf__classifier__max_depth': param_range,
        'clf__classifier__min_samples_split': param_range[1:],
        'clf__classifier__n_estimators': [10],
        'clf__classifier__class_weight': ['balanced'],
}

mnb_params = {
    'clf__classifier': [MultinomialNB()],
        'clf__classifier__alpha': [0.7, 1.0, 1.5],
}

## Stack params
parameters = [
    {**vectorizer_params, **lr_params},
#     {**vectorizer_params, **svc_params},
#     {**vectorizer_params, **rf_params},
#     {**vectorizer_params, **mnb_params}
]

# br_pipeline = Pipeline([('vectorizer', CountVectorizer()),
#                         ('tfidf', TfidfTransformer()),
#                         ('clf', BinaryRelevance()),
#                        ]
#                       )

br_pipeline = Pipeline([('processor', preprocessor),
                        ('clf', BinaryRelevance()),
                       ]
                      )


# Gridsearch settings
# scoring = make_scorer(f1_score, average='micro') # possible scorings 'f1_micro' 'f1_macro'
# scoring = 'f1_micro'
# scoring = make_scorer(hamming_loss)
# scoring = 'neg_log_loss'
scoring = 'f1_samples'
folds = 4
njobs = -1

br_model = dcv.GridSearchCV(br_pipeline, parameters, scoring=scoring, cv=folds, n_jobs=njobs)

In [109]:
%%time

with ProgressBar():
    br_model.fit(X_train, y_train)
print(br_model.best_params_, br_model.best_score_)
pd.DataFrame(br_model.cv_results_)
filename = f'best_br_model.joblib'
joblib.dump(br_model, filename)

[#####                                   ] | 13% Completed | 43.6s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 13% Completed | 44.8s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 45.5s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 37.0s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 38.5s

  'precision', 'predicted', average, warn_for)


[############                            ] | 31% Completed |  1min 48.2s

  'precision', 'predicted', average, warn_for)


[############                            ] | 32% Completed |  1min 52.7s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 33% Completed |  1min 53.4s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 34% Completed |  1min 54.3s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 35% Completed |  1min 59.4s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 36% Completed |  2min  1.0s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  2min 10.6s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 13.9s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 45.3s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 51.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 59.5s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  3min  1.7s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 52% Completed |  3min  6.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 55% Completed |  3min 12.0s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[######################                  ] | 57% Completed |  3min 15.2s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 59% Completed |  3min 20.4s

  'precision', 'predicted', average, warn_for)


[########################                ] | 60% Completed |  3min 21.1s

  'precision', 'predicted', average, warn_for)


[########################                ] | 60% Completed |  3min 21.4s

  'precision', 'predicted', average, warn_for)


[########################                ] | 62% Completed |  3min 28.9s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  4min  0.2s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  4min  6.7s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 13.2s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min 18.3s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  4min 22.3s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 77% Completed |  4min 23.6s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 78% Completed |  4min 26.5s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 26.9s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 27.2s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 28.5s

  'precision', 'predicted', average, warn_for)


[################################        ] | 82% Completed |  4min 36.5s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 41.2s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 45.0s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  5min  7.6s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 92% Completed |  5min  8.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min  9.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 10.1s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 11.3s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 11.8s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 13.6s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 17.7s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 31.5s


  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed | 19.5s
{'clf__classifier': LogisticRegression(C=100, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__classifier__C': 100, 'clf__classifier__class_weight': 'balanced', 'clf__classifier__penalty': 'l2', 'clf__classifier__solver': 'liblinear'} 0.5435939637799594
Wall time: 5min 52s


['best_br_model.joblib']

In [111]:
y_pred_prob = br_model.predict_proba(X_test)
t = 0.5 # threshold value
y_pred_new = (y_pred_prob >= t).astype(int)
score = f1_score(y_test, y_pred_new, average="micro")
print(f"Binary relevance best model's f1-score {score}")
print(accuracy_score(y_test, y_pred_new))

Binary relevance best model's f1-score 0.573906758221901
0.07835051546391752


'public dewey long ago observe constitute discussion debate tyranny assumption question avoid doxa realm unquestioned will subject assumption debate discussion spirit join discussion critical issue time mobilize different form capital project state build assumption clearly capitalism year acceptable democracy look world look map capitalist economy democratic polity rare exception norm question form capitalism type democratic participation acknowledge moment bring rare consensus assumption provide grind type action consensus moment allow act necessary matter fragile provisional consensus able forward majority world benefit capitalism democratic system globe experience state repressive organization concern denial right denial justice provision term experience capitalism aspect rest globe experience extractive industry blood diamond smuggle emerald timber cut right poor second technical assistance technical assistance shock bad form today ugly face develop world develop country ten billio

In [22]:
mlb.inverse_transform(br_model.predict(X_test[[0]]))

[('business', 'culture', 'global issues', 'politics')]

### OneVsRest

In [None]:
# TODO: 
# 1. Check if TfidfTransformer use_idf=False is the same as Countvectorizer? or there are other metrics to suppress
# 2. Use proper scoring function - ideally, predicting relevant labels should be more important than predicting irrelevant ones
# 3. Balanced class labels
# 4. Set better param ranges

# param_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
param_range = [1, 2, 3, 4, 5]
param_range_lr = [1.0, 0.5, 0.1]

# Set params, comment out as see fit

vectorizer_params = {
#     'vectorizer__min_df': np.linspace(0.005, 0.05, 5),
#     'vectorizer__ngram_range': [(1, 1), (1, 2)], # This shit blows up your memory
#     'tfidf__norm': ('l1', 'l2'),
    'tfidf__use_idf': [True, False],
}

lr_params = {
    'clf__estimator': [LogisticRegression()],
        'clf__estimator__penalty': ['l1', 'l2'],
        'clf__estimator__C': param_range_lr,
        'clf__estimator__solver': ['liblinear'],
        'clf__estimator__class_weight': ['balanced'],
}

svc_params = {
    'clf__estimator': [SVC()],
        'clf__estimator__kernel': ['linear', 'rbf'],
        'clf__estimator__C': param_range, # np.logspace(-1, 2, 10),
        'clf__estimator__gamma': ['auto'], # np.logspace(-1, 1, 10)
        'clf__estimator__probability': [True],
        'clf__estimator__class_weight': ['balanced'],
}

rf_params = {
    'clf__estimator': [RandomForestClassifier()],
        'clf__estimator__criterion': ['gini', 'entropy'],
        'clf__estimator__min_samples_leaf': param_range,
        'clf__estimator__max_depth': param_range,
        'clf__estimator__min_samples_split': param_range[1:],
        'clf__estimator__n_estimators': [10],
        'clf__estimator__class_weight': ['balanced'],
}

mnb_params = {
    'clf__estimator': [MultinomialNB()],
        'clf__estimator__alpha': [0.7, 1.0],
}

## Stack params
parameters = [
    {**vectorizer_params, **lr_params},
#     {**vectorizer_params, **svc_params},
#     {**vectorizer_params, **rf_params},
    {**vectorizer_params, **mnb_params}
]

ovr_pipeline = Pipeline([('vectorizer', CountVectorizer()),
                         ('tfidf', TfidfTransformer()),
                         ('clf', OneVsRestClassifier(LogisticRegression())),
                        ]
                       )

# Gridsearch settings
# scoring = make_scorer(f1_score, average='micro') # possible scorings 'f1_micro' 'f1_macro'
scoring = 'f1_micro'
# scoring = make_scorer(hamming_loss) # hamming gives equal weighting to both relevant and irrelevant?
# maybe use precision somewhere
folds = 3
njobs = -1

ovr_model = GridSearchCV(ovr_pipeline, parameters, scoring=scoring, cv=folds, n_jobs=njobs)

In [None]:
%%time
ovr_model.fit(X_train,y_train)
print(ovr_model.best_params_, ovr_model.best_score_)
pd.DataFrame(ovr_model.cv_results_)

In [None]:
y_pred_prob = ovr_model.predict_proba(X_test)
t = 0.1 # threshold value
y_pred_new = (y_pred_prob >= t).astype(int)
score = f1_score(y_test, y_pred_new, average="micro")
print(f"One vs Rest best model's f1-score {score}")

## Gridsearch best model for each tag

https://stackoverflow.com/questions/38555650/try-multiple-estimator-in-one-grid-search



In [115]:
# param_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
param_range = [1, 2, 3, 4, 5]
param_range_lr = [1.0, 0.5, 0.1]

# Set params, comment out as see fit

vectorizer_params = {
#     'vectorizer__min_df': np.linspace(0.005, 0.05, 5),
#     'vectorizer__ngram_range': [(1, 1), (1, 2)], # This shit blows up your memory
#     'tfidf__norm': ('l1', 'l2'),
#     'tfidf__use_idf': [True, False],
}

# Add any Binary classification model setting here.
# Also add to general parameters to be passed into pipeline below if want to use new model.

vectorizer = Pipeline(steps=[
    ('vec', TfidfVectorizer()),
])

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('vectorizer', vectorizer, 0),
        ('num', numeric_transformer, list(range(1, 16))),
    ])

lr_params = {
    'clf': [LogisticRegression()],
        'clf__penalty': ['l1', 'l2'],
        'clf__C': [0.01, 0.1, 1, 10, 50, 100, 500],
        'clf__solver': ['liblinear'],
        'clf__class_weight': ['balanced'],
}

svc_params = {
    'clf': [SVC()],
        'clf__kernel': ['linear', 'rbf'],
        'clf__C': param_range, # np.logspace(-1, 2, 10),
        'clf__gamma': ['auto'], # np.logspace(-1, 1, 10)
        'clf__probability': [True],
        'clf__class_weight': ['balanced'],
}

rf_params = {
    'clf': [RandomForestClassifier()],
        'clf__criterion': ['gini', 'entropy'],
        'clf__min_samples_leaf': [2, 4, 8],
#         'clf__max_depth': np.linspace(10, 50, num = 5),
        'clf__min_samples_split': [2, 5, 10],
        'clf__n_estimators': [60, 100, 200, 500],
        'clf__class_weight': ['balanced'],
}

mnb_params = {
    'clf': [MultinomialNB()],
        'clf__alpha': [0.7, 1.0],
}

## Stack params
parameters = [
    {**vectorizer_params, **lr_params},
#     {**vectorizer_params, **svc_params},
    {**vectorizer_params, **rf_params},
#     {**vectorizer_params, **mnb_params}
]

per_tag_pipe = Pipeline([('processor', preprocessor),
                  ('clf', LogisticRegression())], verbose=True)

# scoring = make_scorer(hamming_loss)
scoring = 'f1'
# scoring = 'f1_micro'
# scoring = 'balanced_accuracy'
# scoring = 'precision'
folds = 4
njobs = -1

per_tag_model = dcv.GridSearchCV(per_tag_pipe, parameters, scoring=scoring, cv=folds, n_jobs=njobs)

In [116]:
tags = [tag for tag in mlb.inverse_transform(np.ones(shape=(1, 15)))[0]]
print(tags)
tags.index('technology')

['biodiversity', 'biomechanics', 'business', 'communication', 'culture', 'design', 'entertainment', 'future', 'global issues', 'history', 'humanity', 'media', 'politics', 'science', 'technology']


14

In [117]:
%%time
for index in range(len(tags)):
    print(f"Processing {tags[index]}")
    with ProgressBar():
        per_tag_model.fit(X_train, y_train[:, index])
#     display(pd.DataFrame(per_tag_model.cv_results_))
    t = 0.5 #threshold value
    prediction_prob = per_tag_model.predict_proba(X_test)
    prediction = (prediction_prob[:, 1] >= t).astype(int)
    # save model or model params somewhere
    print(f'tag {index}: {tags[index]} best model {per_tag_model.best_params_}')
    print(f'tag {index}: {tags[index]} counts - predicted: {sum(prediction)}, actual: {sum(y_test[:, index])}')
    print(f'tag {index}: {tags[index]} test f1-score is {f1_score(y_test[:, index], prediction, average="binary")}')
    print(f'tag {index}: {tags[index]} test accuracy is {accuracy_score(y_test[:, index], prediction)}')
    filename = f'best_{tags[index]}_model.joblib'
    joblib.dump(per_tag_model, filename)
    print('--------------------------')

Processing biodiversity
[######                                  ] | 15% Completed | 45.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 46.0s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed | 47.4s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed | 47.8s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 22% Completed |  1min  0.4s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min  2.6s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min  4.0s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min  4.8s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  1min 54.9s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  1min 55.6s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  1min 56.1s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  1min 56.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  1min 58.9s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min  0.8s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min  8.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min  9.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min 10.1s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 11.4s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 13.0s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 14.2s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  2min 27.0s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 57% Completed |  2min 31.0s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 58% Completed |  2min 33.1s

  'precision', 'predicted', average, warn_for)


[########################                ] | 60% Completed |  2min 42.7s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 78% Completed |  3min 33.7s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  4min 39.1s
[                                        ] | 0% Completed |  1.2s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.2s
[                                        ] | 0% Completed |  1.5s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[########################################] | 100% Completed |  1.6s
tag 0: biodiversity best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 0: biodiversity counts - predicted: 76, actual: 45
tag 0: biodiversity test f1-score is 0.5289256198347106
tag 0

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 50.6s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 37.6s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min  6.5s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 12.0s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 20.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 23.4s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  3min 25.1s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  4min 38.4s
[                                        ] | 0% Completed |  1.6s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.7s
[                                        ] | 0% Completed |  2.2s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.6s
[########################################] | 100% Completed |  2.3s
tag 1: biomechanics best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 1: biomechanics counts - predicted: 76, actual: 44
tag 1: biomechanics test f1-score is 0.5500000000000002
tag 1

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 59.2s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  1.8s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 17.5s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 18.4s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 21.6s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 33.5s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 35.7s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 52.1s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 24.6s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 13.8s
[                                        ] | 0% Completed |  1.3s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.3s
[                                        ] | 0% Completed |  2.1s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.8s
[########################################] | 100% Completed |  2.2s
tag 2: business best model {'clf': LogisticRegression(C=10, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 10, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 2: business counts - predicted: 69, actual: 70
tag 2: business test f1-score is 0.4892086330935252
tag 2: business

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 13% Completed | 49.5s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 50.5s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 51.2s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 52.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 53.1s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 53.6s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 54.9s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 55.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed | 56.6s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed | 57.4s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed | 57.9s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 17% Completed | 58.4s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 17% Completed | 58.7s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 17% Completed | 59.6s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 17% Completed |  1min  0.1s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  0.5s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  1.0s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  2.0s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  2.7s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  3.9s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  4.6s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min  5.1s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min  6.4s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min  6.8s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min  7.2s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min  7.7s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min  8.2s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min  8.8s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min  9.4s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 10.2s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 11.1s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 11.5s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 12.0s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 12.3s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 13.4s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 13.8s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 14.8s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 15.5s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 16.1s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 16.7s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 27% Completed |  1min 17.2s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###########                             ] | 28% Completed |  1min 20.3s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 36% Completed |  1min 55.7s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  1min 58.2s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  1min 59.6s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  1min 59.9s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min  0.4s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min  1.2s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min  1.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min  2.3s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min  3.4s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min  3.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min  4.3s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min  4.7s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min  5.4s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min  5.9s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min  6.6s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min  7.0s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min  7.5s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 42% Completed |  2min  8.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min  8.6s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min  9.2s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 10.1s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 11.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 11.7s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 12.1s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 13.0s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 13.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 14.0s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 14.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 15.1s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 15.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 16.1s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min 16.7s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 17.4s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 18.1s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 18.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 19.5s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 20.6s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 21.2s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 22.0s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 22.7s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 23.0s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 23.5s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 23.9s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  2min 24.7s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  2min 30.7s

  'precision', 'predicted', average, warn_for)


[########################                ] | 61% Completed |  3min 12.3s

  'precision', 'predicted', average, warn_for)


[########################                ] | 62% Completed |  3min 14.6s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 17.3s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 17.9s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 18.4s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 19.4s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 19.9s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 20.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 21.7s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 22.2s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 23.0s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 23.4s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 24.0s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 24.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 25.4s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 26.3s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 27.1s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  3min 27.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  3min 28.4s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 29.3s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 30.0s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 31.0s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 31.6s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 32.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 33.2s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 33.8s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 34.6s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 35.1s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 35.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 36.3s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 37.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  3min 37.7s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  3min 38.6s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 39.5s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 40.5s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 41.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  3min 41.8s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  3min 42.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  3min 42.7s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  3min 44.0s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  3min 44.6s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  3min 45.0s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  3min 45.6s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  3min 52.2s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  3min 53.7s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  3min 55.7s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  3min 56.0s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  3min 57.0s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  3min 58.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  3min 59.1s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min  0.2s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min  0.7s

  'precision', 'predicted', average, warn_for)


[################################        ] | 82% Completed |  4min  2.5s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min  2.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min  3.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min  6.0s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min  6.3s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min  7.5s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min  8.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min  9.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 10.3s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 12.3s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 13.8s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 14.2s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  4min 16.5s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[##################################      ] | 87% Completed |  4min 17.5s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 87% Completed |  4min 18.5s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 87% Completed |  4min 19.2s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 20.4s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 21.6s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 23.0s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 23.7s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 24.2s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 25.4s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 26.0s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 27.4s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 27.7s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  4min 28.5s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  4min 29.1s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  4min 31.7s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  4min 32.6s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  4min 33.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 92% Completed |  4min 35.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 36.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 36.4s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 39.3s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 39.5s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  4min 41.4s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  4min 41.9s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  4min 43.3s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  4min 45.6s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  4min 46.8s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  4min 48.0s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  4min 48.5s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  4min 50.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  4min 51.3s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[####################################### ] | 97% Completed |  4min 52.6s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  4min 53.5s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  4min 54.4s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  4min 56.2s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  4min 57.0s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  4min 57.6s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  4min 58.4s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  4min 59.3s
[                                        ] | 0% Completed |  0.0s

  'precision', 'predicted', average, warn_for)


[                                        ] | 0% Completed |  1.5s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.6s
[                                        ] | 0% Completed |  2.1s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.5s
[########################################] | 100% Completed |  2.2s
tag 3: communication best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 3: communication counts - predicted: 73, actual: 42
tag 3: communication test f1-score is 0.33043478260869563
tag 3: communication test accuracy is 0.8412371134020619
------------------

  'precision', 'predicted', average, warn_for)


[####                                    ] | 10% Completed | 55.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  3min 23.0s

  'precision', 'predicted', average, warn_for)


[########################                ] | 60% Completed |  4min  3.5s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  6min  1.5s
[                                        ] | 0% Completed |  1.4s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.4s
[                                        ] | 0% Completed |  9.6s[Pipeline] ............... (step 2 of 2) Processing clf, total=   8.3s
[########################################] | 100% Completed |  9.7s
tag 4: culture best model {'clf': RandomForestClassifier(bootstrap=True, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=2,
                       min_samples_split=10, min_weight_fraction_leaf=0.0,
                       n_estimators=500, n_jobs=None, oob_score=False,
                       random_state=None, verbose=0, warm_start=False), 'clf__class_weight': 'balanced', 'clf__criterion': 'g

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 57.1s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 57.7s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 58.2s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 58.6s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 58.9s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 59.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed |  1min  0.0s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed |  1min  0.6s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  1.1s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  1.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  1.8s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  2.4s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 17% Completed |  1min  2.9s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 17% Completed |  1min  3.4s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 17% Completed |  1min  4.0s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  4.9s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  5.6s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  6.0s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  6.5s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  7.9s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  8.9s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min  9.8s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min 10.9s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min 11.8s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min 12.2s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min 13.2s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min 13.9s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min 14.3s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 22% Completed |  1min 15.1s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 22% Completed |  1min 15.6s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 16.3s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 16.9s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 17.3s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 18.0s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 18.4s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 19.0s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 19.5s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 20.1s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 21.3s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 22.2s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 22.9s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 27% Completed |  1min 23.8s

  'precision', 'predicted', average, warn_for)


[###########                             ] | 28% Completed |  1min 27.4s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  3min  1.3s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  3min  1.9s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  3min  2.3s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  3min  3.0s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  3min  3.7s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  3min  4.0s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  3min  4.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  3min  5.5s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  3min  6.0s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  3min  6.8s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  3min  7.3s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  3min  8.2s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  3min  9.0s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  3min  9.4s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  3min  9.7s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  3min 11.0s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  3min 11.6s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  3min 12.1s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  3min 12.7s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 52% Completed |  3min 13.8s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 52% Completed |  3min 14.4s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 52% Completed |  3min 14.9s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  3min 15.7s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  3min 16.3s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  3min 16.8s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 54% Completed |  3min 17.2s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 54% Completed |  3min 17.9s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 54% Completed |  3min 18.8s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 54% Completed |  3min 19.1s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 55% Completed |  3min 20.4s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 55% Completed |  3min 20.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 21.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 22.4s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 22.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 23.2s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 57% Completed |  3min 24.2s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 57% Completed |  3min 25.3s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 58% Completed |  3min 25.8s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 58% Completed |  3min 26.6s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 59% Completed |  3min 27.5s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 59% Completed |  3min 28.0s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 59% Completed |  3min 28.5s

  'precision', 'predicted', average, warn_for)


[########################                ] | 60% Completed |  3min 29.3s

  'precision', 'predicted', average, warn_for)


[########################                ] | 61% Completed |  3min 30.6s

  'precision', 'predicted', average, warn_for)


[########################                ] | 62% Completed |  3min 32.6s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 35.3s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 35.7s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 36.2s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 37.0s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 37.8s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 38.7s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 39.4s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 39.9s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 40.3s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 40.8s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 41.5s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 42.1s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 42.7s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 43.1s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 43.7s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  3min 45.0s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 45.5s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 46.9s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 47.4s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 48.2s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 49.2s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 49.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 50.2s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 50.9s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 51.6s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 52.0s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 52.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 53.3s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 53.6s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 54.2s

  'precision', 'predicted', average, warn_for)


[############################            ] | 72% Completed |  3min 54.7s

  'precision', 'predicted', average, warn_for)


[############################            ] | 72% Completed |  3min 55.0s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  3min 56.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 57.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 57.6s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 58.2s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  3min 59.3s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min  0.2s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min  0.4s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  1.0s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  1.8s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  2.7s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  4min  3.5s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  4min  3.7s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 78% Completed |  4min  6.3s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min  8.0s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 10.3s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 10.9s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 11.7s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 12.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 13.1s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 14.3s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 15.0s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 15.9s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 16.3s

  'precision', 'predicted', average, warn_for)


[################################        ] | 82% Completed |  4min 17.6s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 18.5s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 18.9s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 21.0s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 21.6s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 23.8s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 26.9s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 27.8s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 28.6s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 29.0s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 33.2s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 33.7s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  4min 35.4s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  4min 36.8s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 87% Completed |  4min 39.3s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 87% Completed |  4min 40.6s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 41.7s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 43.1s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 44.1s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 45.1s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 47.0s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 47.8s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 49.6s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 50.1s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  4min 51.8s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  4min 53.4s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  4min 54.0s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  4min 54.3s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  4min 55.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 92% Completed |  4min 57.4s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 57.5s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 57.8s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min  0.4s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min  0.7s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min  2.3s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min  2.6s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min  4.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min  4.4s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min  6.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min  7.9s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min  8.3s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min  9.7s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  5min 11.8s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  5min 12.7s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 15.1s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 16.0s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 17.8s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 19.2s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 21.1s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 22.0s
[                                        ] | 0% Completed |  0.0s

  'precision', 'predicted', average, warn_for)


[                                        ] | 0% Completed |  1.3s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.4s
[                                        ] | 0% Completed |  1.6s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.3s
[########################################] | 100% Completed |  1.7s
tag 7: future best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 7: future counts - predicted: 81, actual: 41
tag 7: future test f1-score is 0.24590163934426226
tag 7: future test accuracy is 0.8103092783505155
--------------------------
Processing global i

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min 55.0s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min 55.6s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 22% Completed |  1min 56.6s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 22% Completed |  1min 57.0s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 57.8s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 58.3s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 59.1s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 59.7s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  2min  0.1s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  2min  0.5s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  2min  1.5s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  2min  1.8s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  2min  3.1s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  2min  3.6s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  2min  4.7s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  2min  5.4s

  'precision', 'predicted', average, warn_for)


[###########                             ] | 27% Completed |  2min  7.2s

  'precision', 'predicted', average, warn_for)


[###########                             ] | 28% Completed |  2min  9.1s

  'precision', 'predicted', average, warn_for)


[###########                             ] | 29% Completed |  2min 10.4s

  'precision', 'predicted', average, warn_for)


[###########                             ] | 29% Completed |  2min 11.4s

  'precision', 'predicted', average, warn_for)


[###########                             ] | 29% Completed |  2min 11.7s

  'precision', 'predicted', average, warn_for)


[############                            ] | 30% Completed |  2min 12.5s

  'precision', 'predicted', average, warn_for)


[############                            ] | 30% Completed |  2min 13.6s

  'precision', 'predicted', average, warn_for)


[############                            ] | 31% Completed |  2min 14.5s

  'precision', 'predicted', average, warn_for)


[############                            ] | 31% Completed |  2min 14.9s

  'precision', 'predicted', average, warn_for)


[############                            ] | 32% Completed |  2min 16.1s

  'precision', 'predicted', average, warn_for)


[############                            ] | 32% Completed |  2min 16.7s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 32% Completed |  2min 17.2s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 33% Completed |  2min 18.0s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 33% Completed |  2min 18.6s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 34% Completed |  2min 19.8s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 34% Completed |  2min 21.4s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 35% Completed |  2min 22.6s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 35% Completed |  2min 23.5s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  2min 28.5s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 29.4s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 30.1s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 30.7s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 31.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 32.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 33.2s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 34.0s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min 34.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min 35.5s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 42% Completed |  2min 36.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 42% Completed |  2min 36.7s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 37.8s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 40.2s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 41.7s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 42.1s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 42.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 44.4s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 44.8s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min 45.3s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 45.8s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 46.3s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 46.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 47.5s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 48.2s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 48.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 50.4s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 50.9s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 51.3s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 51.9s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  2min 52.6s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  2min 53.4s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 52% Completed |  2min 54.3s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 52.4s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 52.9s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 53.4s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 54.0s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 55.0s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 55.7s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 56.2s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 57.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 58.2s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 58.8s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 59.8s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  4min  0.5s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  4min  0.8s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  4min  1.4s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  4min  2.3s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  4min  3.1s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  4min  3.7s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  4min  4.5s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  4min  5.5s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  4min  7.6s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  4min  8.3s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  4min 10.2s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  4min 10.7s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 11.7s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 13.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 14.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 72% Completed |  4min 16.2s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  4min 16.8s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  4min 17.4s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 18.6s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 19.2s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 19.5s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 20.3s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min 21.6s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min 23.2s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min 24.2s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min 25.0s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  4min 26.0s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###############################         ] | 77% Completed |  4min 28.6s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 33.0s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 34.2s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 35.8s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 36.1s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 37.1s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 38.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 39.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 40.5s

  'precision', 'predicted', average, warn_for)


[################################        ] | 82% Completed |  4min 42.4s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 44.1s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 48.5s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 52.2s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 55.6s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 57.7s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 58.7s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  5min  0.2s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  5min  3.3s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  5min  7.1s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  5min 10.1s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  5min 12.2s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  5min 12.8s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  5min 13.8s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  5min 17.1s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min 18.1s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min 21.7s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  5min 22.6s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 92% Completed |  5min 24.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min 25.7s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min 28.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 32.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 32.7s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 33.0s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 35.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 38.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 38.6s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  5min 41.3s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 46.8s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 50.8s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 53.2s
[                                        ] | 0% Completed |  1.4s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.4s
[                                        ] | 0% Completed |  1.6s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.2s
[########################################] | 100% Completed |  1.7s
tag 9: history best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l1',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l1', 'clf__solver': 'liblinear'}
tag 9: history counts - predicted: 152, actual: 89
tag 9: history test f1-score is 0.3070539419087137
tag 9: history test

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 58.6s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 59.0s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 14% Completed | 59.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed | 59.9s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed |  1min  0.9s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed |  1min  1.3s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 15% Completed |  1min  2.2s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  2.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  3.0s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  3.5s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  3.8s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed |  1min  4.2s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 17% Completed |  1min  5.0s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 17% Completed |  1min  5.4s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 17% Completed |  1min  5.8s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 17% Completed |  1min  6.3s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  7.3s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed |  1min  8.0s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  8.6s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  9.1s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed |  1min  9.9s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min 10.9s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min 11.7s

  'precision', 'predicted', average, warn_for)


[########                                ] | 20% Completed |  1min 12.4s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min 13.0s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min 13.6s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min 14.1s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min 14.8s

  'precision', 'predicted', average, warn_for)


[########                                ] | 22% Completed |  1min 15.5s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 22% Completed |  1min 15.9s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 16.6s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 17.3s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 18.1s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min 18.5s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 18.9s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 24% Completed |  1min 19.7s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 20.7s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 21.4s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 25% Completed |  1min 22.2s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 22.6s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 23.3s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 26% Completed |  1min 24.0s

  'precision', 'predicted', average, warn_for)


[##########                              ] | 27% Completed |  1min 24.5s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###########                             ] | 29% Completed |  1min 29.9s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 36% Completed |  2min 15.2s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  2min 17.3s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 20.0s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 20.6s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 21.0s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 21.9s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 22.6s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 23.3s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 24.5s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 25.1s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 25.9s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 26.6s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min 27.3s

  'precision', 'predicted', average, warn_for)


[################                        ] | 42% Completed |  2min 27.8s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 42% Completed |  2min 28.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 42% Completed |  2min 29.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 29.4s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 30.0s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 30.3s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 31.1s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 31.5s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 44% Completed |  2min 32.0s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 34.0s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 34.8s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 36.3s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 36.7s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min 37.8s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min 38.2s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 38.4s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 38.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 39.3s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 39.6s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 40.3s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 40.8s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 41.1s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 41.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 42.9s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 43.2s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 44.5s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 45.1s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  2min 45.5s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  2min 46.4s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 52% Completed |  2min 47.0s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  2min 52.9s

  'precision', 'predicted', average, warn_for)


[########################                ] | 61% Completed |  3min 41.0s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 62% Completed |  3min 43.4s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 44.3s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 44.7s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 45.2s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 46.0s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 46.5s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 47.0s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 47.9s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 48.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 49.0s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 49.7s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 50.1s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 50.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 51.4s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 51.7s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 52.2s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 53.0s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  3min 53.4s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  3min 53.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 54.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 55.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 56.9s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 57.8s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 58.4s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 58.7s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 59.1s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  3min 59.6s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  4min  0.0s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min  0.5s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min  1.2s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min  1.9s

  'precision', 'predicted', average, warn_for)


[############################            ] | 72% Completed |  4min  2.7s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  4min  3.4s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  4min  4.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min  4.9s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min  6.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min  6.4s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min  7.6s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  8.1s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  9.0s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  9.4s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min 10.0s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 19.5s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 19.7s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 22.3s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 23.7s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 25.1s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 26.5s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 27.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 27.9s

  'precision', 'predicted', average, warn_for)


[################################        ] | 82% Completed |  4min 29.6s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 30.1s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 31.6s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 33.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 34.0s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 35.6s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 37.1s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 37.9s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 38.9s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 39.9s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 41.2s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 42.7s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 42.9s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 43.2s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  4min 45.3s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  4min 46.5s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 87% Completed |  4min 46.9s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 87% Completed |  4min 47.8s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 50.1s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 52.5s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  4min 53.3s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 54.5s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 55.0s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 56.5s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 57.5s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 58.8s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 59.3s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min  0.4s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min  0.9s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min  3.2s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  5min  3.7s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  5min  4.5s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 92% Completed |  5min  6.4s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min  7.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min  7.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min 10.0s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min 10.3s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 12.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 13.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 14.1s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 16.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 17.9s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 19.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 20.3s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 20.6s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 96% Completed |  5min 22.9s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  5min 24.6s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  5min 24.9s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 97% Completed |  5min 26.6s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 97% Completed |  5min 27.8s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 29.1s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 31.1s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 32.8s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 34.2s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 35.1s
[                                        ] | 0% Completed |  0.0s

  'precision', 'predicted', average, warn_for)


[                                        ] | 0% Completed |  1.8s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.8s
[                                        ] | 0% Completed |  2.0s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.2s
[########################################] | 100% Completed |  2.1s
tag 10: humanity best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l1',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l1', 'clf__solver': 'liblinear'}
tag 10: humanity counts - predicted: 120, actual: 55
tag 10: humanity test f1-score is 0.35428571428571426
tag 10: humanity test accuracy is 0.7670103092783506
--------------------------
Proces

  'precision', 'predicted', average, warn_for)


[############                            ] | 30% Completed |  2min 43.0s

  'precision', 'predicted', average, warn_for)


[############                            ] | 30% Completed |  2min 43.7s

  'precision', 'predicted', average, warn_for)


[############                            ] | 30% Completed |  2min 45.1s

  'precision', 'predicted', average, warn_for)


[############                            ] | 30% Completed |  2min 45.9s

  'precision', 'predicted', average, warn_for)


[############                            ] | 31% Completed |  2min 46.5s

  'precision', 'predicted', average, warn_for)


[############                            ] | 31% Completed |  2min 47.8s

  'precision', 'predicted', average, warn_for)


[############                            ] | 31% Completed |  2min 49.0s

  'precision', 'predicted', average, warn_for)


[############                            ] | 32% Completed |  2min 49.9s

  'precision', 'predicted', average, warn_for)


[############                            ] | 32% Completed |  2min 50.5s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 32% Completed |  2min 51.2s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 32% Completed |  2min 52.3s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 33% Completed |  2min 53.5s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 33% Completed |  2min 54.5s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 33% Completed |  2min 55.0s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 34% Completed |  2min 55.5s

  'precision', 'predicted', average, warn_for)


[#############                           ] | 34% Completed |  2min 56.5s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 35% Completed |  2min 59.2s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 36% Completed |  3min  1.8s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 37% Completed |  3min  2.6s

  'precision', 'predicted', average, warn_for)


[##############                          ] | 37% Completed |  3min  3.2s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 37% Completed |  3min  4.0s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  3min  4.9s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  3min  5.4s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 38% Completed |  3min  6.7s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  3min  7.1s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  3min  8.4s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  3min  9.0s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  3min 10.4s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  3min 11.0s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  3min 12.2s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  3min 13.3s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 42% Completed |  3min 15.3s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  3min 16.6s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  3min 22.3s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  3min 22.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  3min 23.7s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  3min 24.1s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  3min 25.1s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  3min 25.5s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  3min 26.4s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  3min 27.1s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  3min 28.0s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  3min 28.7s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  3min 29.8s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 51% Completed |  3min 31.3s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 52% Completed |  3min 33.6s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  3min 35.1s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 53% Completed |  3min 36.1s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 54% Completed |  3min 36.8s

  'precision', 'predicted', average, warn_for)


[#####################                   ] | 54% Completed |  3min 37.5s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 55% Completed |  3min 39.0s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 55% Completed |  3min 39.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 40.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 42.2s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 56% Completed |  3min 42.8s

  'precision', 'predicted', average, warn_for)


[######################                  ] | 57% Completed |  3min 44.1s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 57% Completed |  3min 45.3s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 58% Completed |  3min 46.0s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 58% Completed |  3min 46.9s

  'precision', 'predicted', average, warn_for)


[#######################                 ] | 59% Completed |  3min 48.8s

  'precision', 'predicted', average, warn_for)


[########################                ] | 60% Completed |  3min 51.1s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 56.3s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 57.1s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 57.7s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 58.4s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 58.9s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 59.2s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  4min  0.5s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  4min  0.9s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  4min  1.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  4min  2.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  4min  4.4s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  4min  5.5s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  4min  7.2s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  4min  8.9s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  4min  9.4s

  'precision', 'predicted', average, warn_for)


[############################            ] | 70% Completed |  4min 10.9s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 11.4s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 12.9s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  4min 13.1s

  'precision', 'predicted', average, warn_for)


[############################            ] | 72% Completed |  4min 13.9s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  4min 14.4s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 15.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 16.0s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min 16.8s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min 17.5s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min 17.9s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min 19.3s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  4min 21.5s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 76% Completed |  4min 21.8s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 30.6s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 31.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 33.1s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 33.7s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 34.9s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 36.5s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 38.4s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 39.1s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 42.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 45.0s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 83% Completed |  4min 45.4s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 48.8s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 49.7s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 50.3s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 50.6s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 53.6s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 54.5s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 86% Completed |  4min 56.9s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  5min  1.1s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 88% Completed |  5min  3.3s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  5min  6.4s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  5min  7.4s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  5min  8.0s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  5min 11.6s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  5min 11.9s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min 12.1s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 91% Completed |  5min 15.9s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 92% Completed |  5min 17.3s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 92% Completed |  5min 19.3s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min 20.4s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  5min 23.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 25.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 26.1s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 26.9s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min 27.2s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 29.4s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 31.6s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 97% Completed |  5min 34.8s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 98% Completed |  5min 39.3s

  'precision', 'predicted', average, warn_for)


[####################################### ] | 99% Completed |  5min 42.8s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 45.8s
[                                        ] | 0% Completed |  1.4s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.5s
[                                        ] | 0% Completed |  2.3s[Pipeline] ............... (step 2 of 2) Processing clf, total=   0.7s
[########################################] | 100% Completed |  2.4s
tag 11: media best model {'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 1, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 11: media counts - predicted: 103, actual: 55
tag 11: media test f1-score is 0.31645569620253167
tag 11: media test ac

  'precision', 'predicted', average, warn_for)


[###                                     ] | 9% Completed | 20.8s

  'precision', 'predicted', average, warn_for)


[####                                    ] | 12% Completed | 30.5s

  'precision', 'predicted', average, warn_for)


[#####                                   ] | 13% Completed | 33.4s

  'precision', 'predicted', average, warn_for)


[######                                  ] | 16% Completed | 42.7s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 18% Completed | 50.6s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed | 54.9s

  'precision', 'predicted', average, warn_for)


[#######                                 ] | 19% Completed | 56.5s

  'precision', 'predicted', average, warn_for)


[########                                ] | 21% Completed |  1min  5.0s

  'precision', 'predicted', average, warn_for)


[#########                               ] | 23% Completed |  1min  8.4s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 13.6s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 14.5s

  'precision', 'predicted', average, warn_for)


[###############                         ] | 39% Completed |  2min 15.0s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 15.9s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 16.4s

  'precision', 'predicted', average, warn_for)


[################                        ] | 40% Completed |  2min 17.4s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 17.8s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 19.7s

  'precision', 'predicted', average, warn_for)


[################                        ] | 41% Completed |  2min 20.5s

  'precision', 'predicted', average, warn_for)


[#################                       ] | 43% Completed |  2min 23.4s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 45% Completed |  2min 29.8s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 46% Completed |  2min 31.2s

  'precision', 'predicted', average, warn_for)


[##################                      ] | 47% Completed |  2min 32.4s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 33.5s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 47% Completed |  2min 34.0s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 48% Completed |  2min 35.9s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 36.6s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 37.0s

  'precision', 'predicted', average, warn_for)


[###################                     ] | 49% Completed |  2min 37.9s

  'precision', 'predicted', average, warn_for)


[####################                    ] | 50% Completed |  2min 39.8s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 31.8s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 32.2s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 63% Completed |  3min 32.7s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 33.7s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 34.2s

  'precision', 'predicted', average, warn_for)


[#########################               ] | 64% Completed |  3min 34.8s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 36.6s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 65% Completed |  3min 37.5s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 40.3s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 41.2s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 66% Completed |  3min 42.3s

  'precision', 'predicted', average, warn_for)


[##########################              ] | 67% Completed |  3min 43.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 67% Completed |  3min 44.8s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 46.7s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 68% Completed |  3min 47.5s

  'precision', 'predicted', average, warn_for)


[###########################             ] | 69% Completed |  3min 50.1s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 53.8s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 54.7s

  'precision', 'predicted', average, warn_for)


[############################            ] | 71% Completed |  3min 56.1s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  3min 57.7s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 72% Completed |  3min 58.3s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  3min 59.4s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 73% Completed |  4min  0.2s

  'precision', 'predicted', average, warn_for)


[#############################           ] | 74% Completed |  4min  1.1s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  3.3s

  'precision', 'predicted', average, warn_for)


[##############################          ] | 75% Completed |  4min  5.1s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 77% Completed |  4min  7.4s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 12.1s

  'precision', 'predicted', average, warn_for)


[###############################         ] | 79% Completed |  4min 13.3s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 15.2s

  'precision', 'predicted', average, warn_for)


[################################        ] | 80% Completed |  4min 15.6s

  'precision', 'predicted', average, warn_for)


[################################        ] | 81% Completed |  4min 20.4s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 82% Completed |  4min 24.1s

  'precision', 'predicted', average, warn_for)


[#################################       ] | 84% Completed |  4min 31.5s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 33.5s

  'precision', 'predicted', average, warn_for)


[##################################      ] | 85% Completed |  4min 34.3s

  'precision', 'predicted', average, warn_for)


[###################################     ] | 89% Completed |  4min 46.1s

  'precision', 'predicted', average, warn_for)


[####################################    ] | 90% Completed |  4min 50.2s

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[#####################################   ] | 93% Completed |  4min 58.2s

  'precision', 'predicted', average, warn_for)


[#####################################   ] | 94% Completed |  5min  5.5s

  'precision', 'predicted', average, warn_for)


[######################################  ] | 95% Completed |  5min 10.2s

  'precision', 'predicted', average, warn_for)


[########################################] | 100% Completed |  5min 26.4s
[                                        ] | 0% Completed |  1.3s[Pipeline] ......... (step 1 of 2) Processing processor, total=   1.3s
[                                        ] | 0% Completed |  2.8s[Pipeline] ............... (step 2 of 2) Processing clf, total=   1.5s
[########################################] | 100% Completed |  2.9s
tag 12: politics best model {'clf': LogisticRegression(C=100, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False), 'clf__C': 100, 'clf__class_weight': 'balanced', 'clf__penalty': 'l2', 'clf__solver': 'liblinear'}
tag 12: politics counts - predicted: 32, actual: 41
tag 12: politics test f1-score is 0.410958904109589
tag 12: pol

In [129]:
%%time
the_br_model = joblib.load('best_br_model.joblib')
pprint.pprint(the_br_model.best_params_)
y_pred_new = the_br_model.predict(X_test)
for index in range(len(tags)):
    print(f"Processing {tags[index]}")
    prediction = y_pred_new[:, index].A
    
    if (sum(y_train[:, index]) / len(y_train)) < 0.5:
        baseline_prediction = np.zeros(len(y_test)).astype(int)
    else:
        baseline_prediction = np.ones(len(y_test)).astype(int)
    true_y = y_test[:, index]
    
    
    print(f'tag {index}: {tags[index]} counts - predicted: {sum(prediction)[0]}, actual: {sum(true_y)}')
    
    print(f'tag {index}: {tags[index]} test precision is {precision_score(true_y, prediction, average="binary")}')
    print(f'tag {index}: {tags[index]} test recall is {recall_score(true_y, prediction, average="binary")}')
    
    print(f'tag {index}: {tags[index]} baseline f1-score is {f1_score(true_y, np.ones(len(y_test)).astype(int), average="binary")}')
    print(f'tag {index}: {tags[index]} test f1-score is {f1_score(true_y, prediction, average="binary")}')
    
    print(f'tag {index}: {tags[index]} baseline accuracy is {accuracy_score(true_y, baseline_prediction)}')
    print(f'tag {index}: {tags[index]} test accuracy is {accuracy_score(true_y, prediction)}')
    print('--------------------------')

{'clf__classifier': LogisticRegression(C=100, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False),
 'clf__classifier__C': 100,
 'clf__classifier__class_weight': 'balanced',
 'clf__classifier__penalty': 'l2',
 'clf__classifier__solver': 'liblinear'}
Processing biodiversity
tag 0: biodiversity counts - predicted: 44, actual: 45
tag 0: biodiversity test precision is 0.45454545454545453
tag 0: biodiversity test recall is 0.4444444444444444
tag 0: biodiversity baseline f1-score is 0.16981132075471697
tag 0: biodiversity test f1-score is 0.44943820224719094
tag 0: biodiversity baseline accuracy is 0.9072164948453608
tag 0: biodiversity test accuracy is 0.8989690721649485
--------------------------
Processing biomechanics
tag 1: biomechani

In [31]:
import glob

In [40]:
per_tag_models = glob.glob("*.joblib")
per_tag_models.remove('best_br_model.joblib')
len(per_tag_models)

15

In [53]:
max(sum(y_test[:, 3]), len(y_test) - sum(y_test[:, 3]))

0.9109730848861284

In [124]:
import pprint

In [65]:
sum(y_train[:, 3]) / len(y_train)

0.07759562841530054

In [126]:
%%time
for index in range(len(tags)):
    themodel = joblib.load(f"best_{tags[index]}_model.joblib")
    pprint.pprint(themodel.best_params_)
    print(f"Processing {tags[index]}")
    prediction = themodel.predict(X_test)
    
    if (sum(y_train[:, index]) / len(y_train)) < 0.5:
        baseline_prediction = np.zeros(len(y_test)).astype(int)
    else:
        baseline_prediction = np.ones(len(y_test)).astype(int)
        
    true_y = y_test[:, index]
    
    
    print(f'tag {index}: {tags[index]} counts - predicted: {sum(prediction)}, actual: {sum(true_y)}')
    
    print(f'tag {index}: {tags[index]} test precision is {precision_score(true_y, prediction, average="binary")}')
    print(f'tag {index}: {tags[index]} test recall is {recall_score(true_y, prediction, average="binary")}')
    
    print(f'tag {index}: {tags[index]} baseline f1-score is {f1_score(true_y, np.ones(len(y_test)).astype(int), average="binary")}')
    print(f'tag {index}: {tags[index]} test f1-score is {f1_score(true_y, prediction, average="binary")}')
    
    print(f'tag {index}: {tags[index]} baseline accuracy is {accuracy_score(true_y, baseline_prediction)}')
    print(f'tag {index}: {tags[index]} test accuracy is {accuracy_score(true_y, prediction)}')
    print('--------------------------')

{'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False),
 'clf__C': 1,
 'clf__class_weight': 'balanced',
 'clf__penalty': 'l2',
 'clf__solver': 'liblinear'}
Processing biodiversity
tag 0: biodiversity counts - predicted: 76, actual: 45
tag 0: biodiversity test precision is 0.42105263157894735
tag 0: biodiversity test recall is 0.7111111111111111
tag 0: biodiversity baseline f1-score is 0.16981132075471697
tag 0: biodiversity test f1-score is 0.5289256198347106
tag 0: biodiversity baseline accuracy is 0.9072164948453608
tag 0: biodiversity test accuracy is 0.8824742268041237
--------------------------
{'clf': LogisticRegression(C=1, class_weight='balanced', dual=False, fit_intercept=True,
                 