In [248]:
import pickle
import numpy as np
import pandas as pd

from collections import Counter
from sklearn.metrics import f1_score, make_scorer
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score, \
    StratifiedShuffleSplit, RandomizedSearchCV

# Load learn data

In [25]:
with open("Data/Learn/labels.pkl", "rb") as f:
    learn_labels = pickle.load(f)

with open("Data/Learn/sentences.pkl", "rb") as f:
    learn_sentences = pickle.load(f)

with open("Data/Learn/sequences.pkl", "rb") as f:
    learn_sequences = pickle.load(f)

with open("Data/dict.pkl", "rb") as f:
    vocabulary = pickle.load(f)
    reverse_vocabulary = {v: k for k, v in vocabulary.items()}

# Data exploration

In [16]:
def decode_sequence(sequence, decoding_dict):
    return " ".join((decoding_dict[x] for x in sequence))

In [28]:
print(len(vocabulary), len(reverse_vocabulary))
print(min(reverse_vocabulary.keys()), max(reverse_vocabulary.keys()))
sorted(vocabulary.keys(), key=len, reverse=True)[:5]

30432 30432
0 30431


['déresponsabiliseraient',
 'interprofessionnelles',
 'interprofessionnalité',
 'socioprofessionnelles',
 'intergouvernementale']

In [9]:
Counter(learn_labels)

Counter({'C': 39912, 'M': 6018})

In [13]:
print(len(learn_sentences))
learn_sentences[:5]

45930


["J'aurai l'occasion de dire aux Français comment notre enseignement devra évoluer pour permettre à chaque jeune de trouver sa place, d'entrer dans le monde du travail, de savoir s'adapter et, à partir de là, d'acquérir, tout au long de la vie, de nouvelles compétences et de nouveaux savoirs.",
 'Il est nécessaire.',
 "Dans votre coeur et dans votre vie, la confiance et l'enthousiasme l'emportent sur le doute.",
 "Pour conduire ce débat dans un esprit de véritable dialogue, je compte, si nos partenaires en sont d'accord, inviter au prochain sommet du G7, à Lyon, pour une séance de travail, le secrétaire général des Nations Unies, le président de la Banque mondiale et le directeur général du Fonds monétaire international.",
 "La France et l'Europe construiront ainsi un avenir de coopération avec un Proche-Orient pacifié, stable, prospère, libéré des menaces de la guerre, de la prolifération, du terrorisme."]

In [29]:
print(len(learn_sequences))
[decode_sequence(x, reverse_vocabulary) for x in learn_sequences][:5]

45930


["j' aurai l' occasion de dire aux français comment notre enseignement devra évoluer pour permettre à chaque jeune de trouver sa place , d' entrer dans le monde du travail , de savoir s' adapter et , à partir de là , d' acquérir , tout au long de la vie , de nouvelles compétences et de nouveaux savoirs .",
 'il est nécessaire .',
 "dans votre coeur et dans votre vie , la confiance et l' enthousiasme l' emportent sur le doute .",
 "pour conduire ce débat dans un esprit de véritable dialogue , je compte , si nos partenaires en sont d' accord , inviter au prochain sommet du g7 , à lyon , pour une séance de travail , le secrétaire général des nations unies , le président de la banque mondiale et le directeur général du fonds monétaire international .",
 "la france et l' europe construiront ainsi un avenir de coopération avec un proche - orient pacifié , stable , prospère , libéré des menaces de la guerre , de la prolifération , du terrorisme ."]

# Data splitting

In [43]:
X_train, X_test, y_train, y_test = train_test_split(
    learn_sequences, learn_labels, test_size=0.3,
    shuffle=True, stratify=learn_labels, random_state=42
)
print(len(X_train), len(y_train), len(X_test), len(y_test))
Counter(y_train), Counter(y_test)

32151 32151 13779 13779


(Counter({'C': 27938, 'M': 4213}), Counter({'C': 11974, 'M': 1805}))

# Utils

In [224]:
def f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average="macro")

scoring = make_scorer(f1)

def score_model(model):
    scores = cross_val_score(
        model, X_test, y_test, 
        scoring=scoring,
        cv=StratifiedShuffleSplit(10, test_size=0.3, random_state=42),
        n_jobs=-1,
    )
    print("Average test f1 score:", scores.mean(), "\nDeviation:", scores.std())
    return scores


def grid_search(model, param_grid, n_iter):
    rs = RandomizedSearchCV(
        model, param_grid, 
        n_iter=n_iter, 
        scoring=scoring,
        cv=StratifiedShuffleSplit(10, test_size=0.3, random_state=42),
        n_jobs=-1,
        random_state=42,
    )
    rs.fit(X_train, y_train)
    return pd.DataFrame(rs.cv_results_).sort_values(
        "mean_test_score", ascending=False)


def noop(x):
    return x

# Baseline models

### Majority

In [229]:
class Model(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        return self
    def predict(self, X):
        return np.full(len(X), 'C')

In [230]:
score_model(Model())

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Average test f1 score: 0.4649236344809733 
Deviation: 0.0


array([0.46492363, 0.46492363, 0.46492363, 0.46492363, 0.46492363,
       0.46492363, 0.46492363, 0.46492363, 0.46492363, 0.46492363])

### Word count + Logistic regression

In [243]:
Model = lambda: Pipeline([
    ("vectorizer", CountVectorizer(lowercase=False, tokenizer=noop)),
    ("classifier", LogisticRegression()),
])

In [219]:
results = grid_search(Model(), {
    "classifier__penalty": ["l1", "l2"],
    "classifier__C": np.logspace(-9, 3, 13),
}, n_iter=26)

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

In [220]:
results

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_classifier__C,param_classifier__penalty,params,rank_test_score,split0_test_score,split0_train_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
21,5.565372,0.347423,0.725535,0.980326,10.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",1,0.730118,0.981327,...,0.726586,0.979439,0.71568,0.980141,0.726586,0.981252,0.304697,0.114896,0.006168,0.0009812009
19,3.124709,0.297855,0.723196,0.888252,1.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",2,0.726853,0.891317,...,0.721851,0.887304,0.716108,0.891299,0.731221,0.890121,0.45516,0.070377,0.005467,0.002076642
18,3.302038,0.231709,0.716791,0.824041,1.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",3,0.72094,0.824963,...,0.715317,0.825292,0.710759,0.826422,0.726939,0.823036,0.404302,0.062382,0.006338,0.001499267
20,8.863107,0.241388,0.713626,0.988135,10.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",4,0.715777,0.989331,...,0.706519,0.987434,0.710711,0.986934,0.713909,0.989447,2.697885,0.080745,0.00688,0.0007465767
23,8.715653,0.384213,0.705436,0.995519,100.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",5,0.709417,0.996572,...,0.703634,0.994309,0.695452,0.995981,0.700905,0.995492,1.653359,0.109231,0.006576,0.0007150859
22,3.485504,0.273947,0.688017,0.998535,100.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",6,0.690826,0.998926,...,0.683477,0.998143,0.683641,0.998143,0.691063,0.998925,0.753289,0.061162,0.007489,0.0002966798
25,10.935858,0.37888,0.687658,0.998965,1000.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",7,0.692524,0.998926,...,0.679366,0.998829,0.678453,0.998828,0.685409,0.999317,2.963548,0.164623,0.006567,0.0002104619
17,1.75738,0.304291,0.67794,0.729496,0.1,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",8,0.677501,0.732542,...,0.678976,0.729731,0.675998,0.729775,0.682037,0.730256,0.226898,0.126216,0.00489,0.001711283
24,1.986695,0.263114,0.666681,0.999776,1000.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",9,0.660899,0.99961,...,0.660145,0.999902,0.665679,0.999512,0.668785,0.999805,0.771604,0.102082,0.007021,0.0001450914
16,1.050638,0.184522,0.635284,0.642023,0.1,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",10,0.635014,0.640791,...,0.639075,0.64021,0.640286,0.643033,0.624563,0.643571,0.119518,0.013726,0.008261,0.002348263


In [246]:
best_params = results.params.iloc[0]
print(best_params)
score_model(Model().set_params(**best_params))

{'classifier__penalty': 'l2', 'classifier__C': 1.0}
Average test f1 score: 0.7041689220190468 
Deviation: 0.00663549127932441


array([0.70889655, 0.69910925, 0.70133545, 0.6938578 , 0.70935821,
       0.69852472, 0.71373387, 0.71373819, 0.69820453, 0.70493064])

### TF-IDF + Logistic regression

In [234]:
Model = lambda: Pipeline([
    ("vectorizer", TfidfVectorizer(lowercase=False, tokenizer=noop)),
    ("classifier", LogisticRegression()),
])

In [235]:
results = grid_search(Model(), {
    "classifier__penalty": ["l1", "l2"],
    "classifier__C": np.logspace(-9, 3, 13),
}, n_iter=26)

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.fl

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') a

  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasat

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasat

  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precisi

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') a

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precisi

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasat

  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') a

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasat

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  'precisi

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  'precision', 'predicted', average, warn_for)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype')

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.

  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):


In [236]:
results

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_classifier__C,param_classifier__penalty,params,rank_test_score,split0_test_score,split0_train_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
21,1.434497,0.299876,0.723391,0.912668,10.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",1,0.727909,0.915662,...,0.721407,0.911393,0.719071,0.911785,0.725505,0.913598,0.286961,0.097604,0.004622,0.001398798
23,1.930169,0.28866,0.720005,0.989734,100.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",2,0.724008,0.990458,...,0.716551,0.989686,0.714047,0.988788,0.717036,0.99027,0.3346,0.095913,0.008508,0.0005795167
20,2.688307,0.216762,0.718692,0.975144,10.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",3,0.724522,0.975336,...,0.71056,0.97518,0.716415,0.974774,0.719138,0.977971,0.266594,0.029821,0.007226,0.001234659
25,2.737764,0.263817,0.698738,0.998545,1000.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",4,0.700501,0.998926,...,0.693802,0.998242,0.701324,0.998438,0.696702,0.998437,0.658676,0.121039,0.005603,0.0002976725
22,5.297591,0.231051,0.694757,0.998829,100.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",5,0.697516,0.999317,...,0.689019,0.998829,0.692353,0.998438,0.690273,0.999024,0.669386,0.047828,0.007801,0.0002831079
24,3.34692,0.252826,0.679732,0.999815,1000.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",6,0.680168,0.99961,...,0.680829,0.999902,0.678852,0.999707,0.67733,0.999805,0.436857,0.085195,0.005465,0.0001108355
18,0.83462,0.200199,0.667469,0.686514,1.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",7,0.664963,0.693533,...,0.662877,0.683163,0.671426,0.683975,0.669286,0.685857,0.062729,0.039902,0.005242,0.00384535
19,1.010836,0.265129,0.650994,0.686261,1.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",8,0.648189,0.690823,...,0.64836,0.685079,0.655371,0.685529,0.645803,0.688477,0.119721,0.054512,0.005427,0.003718901
16,0.617273,0.174699,0.525765,0.518078,0.1,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",9,0.522922,0.519436,...,0.536194,0.519832,0.527111,0.512168,0.523421,0.520605,0.063069,0.015981,0.007096,0.003109745
17,0.745795,0.227368,0.486589,0.481142,0.1,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",10,0.482447,0.479955,...,0.491092,0.481968,0.490267,0.479938,0.485634,0.480954,0.075604,0.051829,0.002591,0.001483491


In [237]:
best_params = results.params.iloc[0]
print(best_params)
score_model(Model().set_params(**best_params))

{'classifier__penalty': 'l2', 'classifier__C': 10.0}


  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):


Average test f1 score: 0.7007720619002562 
Deviation: 0.00773949002982553


array([0.6965412 , 0.69830878, 0.68686441, 0.70839132, 0.69068887,
       0.69973529, 0.70965977, 0.6993091 , 0.70965977, 0.70856209])

### Binary word count + Logistic regression

In [239]:
Model = lambda: Pipeline([
    ("vectorizer", CountVectorizer(lowercase=False, tokenizer=noop,
                                   binary=True)),
    ("classifier", LogisticRegression()),
])

In [240]:
results = grid_search(Model(), {
    "classifier__penalty": ["l1", "l2"],
    "classifier__C": np.logspace(-9, 3, 13),
}, n_iter=26)

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

In [241]:
results

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_classifier__C,param_classifier__penalty,params,rank_test_score,split0_test_score,split0_train_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
19,2.04249,0.288099,0.726116,0.888254,1.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",1,0.732443,0.890383,...,0.723411,0.888807,0.718912,0.890017,0.735106,0.888577,0.38435,0.065473,0.007413,0.001268162
21,4.084938,0.324372,0.725729,0.980108,10.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",2,0.734461,0.981748,...,0.726599,0.979034,0.719615,0.979959,0.727162,0.981355,0.62062,0.078436,0.006128,0.0009676664
18,5.775251,0.238005,0.718402,0.822686,1.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",3,0.72706,0.822808,...,0.716397,0.822628,0.715013,0.822343,0.720025,0.825146,0.699569,0.074305,0.005517,0.002460951
20,13.937597,0.248719,0.71378,0.988312,10.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",4,0.721466,0.989037,...,0.707358,0.986949,0.71162,0.987931,0.71605,0.989847,2.069707,0.059504,0.006826,0.0007235075
23,5.981747,0.342802,0.704951,0.995885,100.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",5,0.712455,0.997064,...,0.706166,0.994704,0.698516,0.996082,0.702751,0.995986,0.949434,0.090761,0.006996,0.0005828935
25,8.032444,0.372249,0.686521,0.99917,1000.0,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",6,0.696339,0.999219,...,0.683568,0.999121,0.680743,0.998828,0.688347,0.99961,2.384045,0.150622,0.006101,0.0002403011
22,6.491927,0.252935,0.685574,0.998779,100.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",7,0.689806,0.999024,...,0.683857,0.998731,0.683166,0.998535,0.679388,0.999024,6.37145,0.060409,0.00682,0.0002014961
17,1.158327,0.265682,0.676689,0.729419,0.1,l2,"{'classifier__penalty': 'l2', 'classifier__C':...",8,0.671994,0.733078,...,0.680393,0.727791,0.67735,0.728628,0.674139,0.728972,0.222868,0.042521,0.005367,0.002304206
24,1.585404,0.19502,0.667562,0.999785,1000.0,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",9,0.669479,0.99961,...,0.661483,0.999902,0.663753,0.999512,0.668633,0.999805,0.347592,0.021456,0.006022,0.0001499363
16,1.719179,0.196927,0.633524,0.640283,0.1,l1,"{'classifier__penalty': 'l1', 'classifier__C':...",10,0.627723,0.641607,...,0.630121,0.644156,0.640156,0.638579,0.624563,0.641376,0.197278,0.017904,0.006798,0.003222082


In [242]:
best_params = results.params.iloc[0]
print(best_params)
score_model(Model().set_params(**best_params))

{'classifier__penalty': 'l2', 'classifier__C': 1.0}
Average test f1 score: 0.6994702426727164 
Deviation: 0.005440142079178722


array([0.69660051, 0.70515989, 0.69624664, 0.69756443, 0.69634366,
       0.68999769, 0.70360217, 0.7097386 , 0.70282072, 0.6966281 ])

### Bernoulli Naive Bayes

In [257]:
Model = lambda: Pipeline([
    ("vectorizer", CountVectorizer(lowercase=False, tokenizer=noop,
                                   binary=True)),
    ("classifier", BernoulliNB(binarize=None)),
])

In [258]:
results = grid_search(Model(), {
    "classifier__alpha": np.logspace(-9, 0, 10),
}, n_iter=10)

In [259]:
results

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_classifier__alpha,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
8,0.438671,0.173835,0.72548,0.852863,0.1,{'classifier__alpha': 0.1},1,0.723902,0.851986,0.725331,...,0.725188,0.852125,0.718754,0.858302,0.723458,0.854556,0.026731,0.012379,0.005094,0.002718
7,0.435157,0.177496,0.718709,0.885293,0.01,{'classifier__alpha': 0.01},2,0.712609,0.885633,0.717672,...,0.721208,0.88613,0.709932,0.889791,0.722506,0.88659,0.014435,0.003611,0.005876,0.002235
6,0.45068,0.181328,0.699967,0.8925,0.001,{'classifier__alpha': 0.001},3,0.696199,0.89224,0.694695,...,0.70446,0.893104,0.694189,0.897125,0.699057,0.893158,0.033396,0.012807,0.006902,0.002077
5,0.446345,0.17788,0.685921,0.89409,0.0001,{'classifier__alpha': 0.0001},4,0.678233,0.894038,0.680737,...,0.688293,0.894599,0.685512,0.898629,0.686042,0.894654,0.033527,0.003241,0.007169,0.002051
9,0.434378,0.154089,0.677761,0.742106,1.0,{'classifier__alpha': 1.0},5,0.679472,0.742481,0.67386,...,0.67964,0.74154,0.675564,0.743497,0.678788,0.739771,0.046805,0.017563,0.004098,0.001602
4,0.437337,0.178542,0.676701,0.895119,1e-05,{'classifier__alpha': 1e-05},6,0.669616,0.894603,0.672134,...,0.680746,0.895659,0.674848,0.89934,0.675795,0.895715,0.030633,0.015238,0.007018,0.002031
3,0.433154,0.174866,0.668969,0.895434,1e-06,{'classifier__alpha': 1e-06},7,0.665086,0.895046,0.664832,...,0.675337,0.895958,0.667175,0.899638,0.66899,0.896191,0.017309,0.005401,0.006435,0.002014
2,0.437302,0.166655,0.663846,0.895567,1e-07,{'classifier__alpha': 1e-07},8,0.658146,0.895135,0.660086,...,0.669286,0.896224,0.66148,0.899727,0.664525,0.896368,0.020695,0.013191,0.006835,0.002008
1,0.397198,0.174071,0.660178,0.895602,1e-08,{'classifier__alpha': 1e-08},9,0.655146,0.895224,0.658112,...,0.664034,0.896224,0.65927,0.899727,0.661489,0.896457,0.051843,0.006759,0.006192,0.002001
0,0.378462,0.169021,0.658015,0.895638,1e-09,{'classifier__alpha': 1e-09},10,0.652122,0.895312,0.654962,...,0.662712,0.896313,0.656159,0.899727,0.659745,0.896457,0.053609,0.025921,0.006109,0.001995


In [260]:
best_params = results.params.iloc[0]
print(best_params)
score_model(Model().set_params(**best_params))

{'classifier__alpha': 0.1}
Average test f1 score: 0.709164636148923 
Deviation: 0.008640397844601827


array([0.71662157, 0.70401356, 0.70560541, 0.71662157, 0.69393401,
       0.70903555, 0.70385864, 0.71194977, 0.70359973, 0.72640656])

### Multinomial Naive Bayes

In [251]:
Model = lambda: Pipeline([
    ("vectorizer", CountVectorizer(lowercase=False, tokenizer=noop)),
    ("classifier", MultinomialNB()),
])

In [254]:
results = grid_search(Model(), {
    "classifier__alpha": np.logspace(-9, 0, 10),
}, n_iter=10)

In [255]:
results

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_classifier__alpha,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
8,0.433706,0.171573,0.740717,0.878687,0.1,{'classifier__alpha': 0.1},1,0.735984,0.878816,0.734357,...,0.741879,0.878675,0.746012,0.88259,0.733778,0.877715,0.041056,0.007316,0.006057,0.001789
7,0.449957,0.182932,0.724129,0.895344,0.01,{'classifier__alpha': 0.01},2,0.715807,0.895851,0.717087,...,0.727877,0.895877,0.727298,0.898411,0.713014,0.895173,0.026033,0.024787,0.006969,0.001426
9,0.447752,0.159951,0.711324,0.819392,1.0,{'classifier__alpha': 1.0},3,0.711764,0.821269,0.706119,...,0.707806,0.818777,0.716371,0.818676,0.704429,0.818092,0.043995,0.025646,0.006168,0.001346
6,0.455679,0.17554,0.70719,0.899355,0.001,{'classifier__alpha': 0.001},4,0.700315,0.899299,0.697856,...,0.708669,0.900047,0.710895,0.902833,0.696393,0.89936,0.036026,0.010675,0.007935,0.001532
5,0.463484,0.175528,0.695931,0.900573,0.0001,{'classifier__alpha': 0.0001},5,0.689294,0.900826,0.686572,...,0.698917,0.901093,0.701628,0.903694,0.68635,0.900912,0.035953,0.005785,0.00777,0.001539
4,0.44291,0.173725,0.688419,0.900951,1e-05,{'classifier__alpha': 1e-05},6,0.684235,0.901076,0.678323,...,0.69322,0.901535,0.692303,0.904056,0.679944,0.90119,0.025355,0.003094,0.006942,0.001546
3,0.432042,0.172352,0.683433,0.901034,1e-06,{'classifier__alpha': 1e-06},7,0.67846,0.901159,0.67523,...,0.687286,0.9017,0.686456,0.90414,0.675241,0.901273,0.009599,0.006724,0.006643,0.001535
2,0.426212,0.171801,0.681093,0.90105,1e-07,{'classifier__alpha': 1e-07},8,0.675088,0.901159,0.672507,...,0.684367,0.9017,0.685305,0.90414,0.674858,0.901356,0.02759,0.007354,0.006393,0.001533
1,0.401678,0.172583,0.679038,0.90105,1e-08,{'classifier__alpha': 1e-08},9,0.673132,0.901159,0.669641,...,0.682834,0.9017,0.682994,0.90414,0.672551,0.901356,0.049805,0.013526,0.00654,0.001533
0,0.381472,0.164295,0.677612,0.90105,1e-09,{'classifier__alpha': 1e-09},10,0.672475,0.901159,0.668068,...,0.68091,0.9017,0.681447,0.90414,0.671394,0.901356,0.055784,0.019605,0.006445,0.001533


In [256]:
best_params = results.params.iloc[0]
print(best_params)
score_model(Model().set_params(**best_params))

{'classifier__alpha': 0.1}
Average test f1 score: 0.7187444947234882 
Deviation: 0.0069602539972568505


array([0.71527935, 0.71540038, 0.72837884, 0.71504892, 0.70825904,
       0.72825007, 0.71949057, 0.71000278, 0.71985723, 0.72747776])