# Download Data

In [2]:
import pandas as pd

buzzfeedbr_clickbait_titles = pd.read_csv("../train_data/buzzfeedbr/clickbait_titles.csv")
print(buzzfeedbr_clickbait_titles[0:5])

buzzfeedbr_non_clickbait_titles = pd.read_csv("../train_data/buzzfeedbr/non_clickbait_titles.csv")
print(buzzfeedbr_non_clickbait_titles[0:5])

clickbait_titles = pd.read_csv("../train_data/bhargaviparanjape/clickbait_data.csv", sep="\n", header=None)
print(clickbait_titles[0:5])

non_clickbait_titles = pd.read_csv("../train_data/bhargaviparanjape/non_clickbait_data.csv", sep="\n", header=None)
print(non_clickbait_titles[0:5])

all_links = pd.read_csv("../train_data/links.csv")
all_links[0:5]

   Unnamed: 0                                              title  \
0           0  20 imagens que, por incrível que pareça, não s...   
1           1  24 filmes da Disney explicados por homens que ...   
2           2  Todo mundo é uma princesa da Disney OU um heró...   
3           3  Quais são as três comidas que combinam com a s...   
4           4  Este teste sobre comida vai dizer de uma vez p...   

                                                 url  
0  https://www.buzzfeed.com/daves4/animais-penis-...  
1  https://www.buzzfeed.com/keelyflaherty/filmes-...  
2  https://www.buzzfeed.com/perpetua/teste-prince...  
3  https://www.buzzfeed.com/joannaborns/teste-tre...  
4  https://www.buzzfeed.com/joannaborns/teste-com...  
   Unnamed: 0                                              title  \
0           0  Nem todos que assinam manifesto pró-Boulos dec...   
1           1  Centenas de professores universitários não est...   
2           2  Janaina Paschoal diz que ser vice de Bolso

Unnamed: 0.1,Unnamed: 0,category_id,clickbait_title,content,count,id,title,url,verified_category_id,verified_clickbait_title
0,0,1,0.0,Tweet\n\nThe Environmental Protection Agency (...,1,2078,Wolf in Sheep’s Clothing (or a Scientist’s Lab...,http://www.pogo.org/blog/2018/05/wolf_in_sheep...,,
1,1,1,0.0,Reveja todos os finais de 'O Outro Lado'\n,1,2077,"globo.com - Absolutamente tudo sobre notícias,...",https://www.globo.com/,,
2,2,1,,A black Yale student was taking a nap in a com...,1,2076,White people keep calling the cops on black pe...,https://www.vox.com/identities/2018/5/11/17340...,,
3,3,1,,"Friday afternoon, the Justice Department relea...",1,2075,"Donald Trump, Bernie Sanders, and Jill Stein a...",https://www.vox.com/policy-and-politics/2018/2...,,
4,4,1,0.0,I regularly attend an annual security conferen...,1,2074,John McCain: ‘Vladimir Putin Is an Evil Man’,https://www.wsj.com/articles/john-mccain-vladi...,,


# Build DataFrame

In [2]:
from sklearn.model_selection import train_test_split
import numpy as np

df = all_links.copy()

df['title'].replace('', np.nan, inplace=True)
df.dropna(subset=["title"], inplace=True)
df = df.loc[df['title'].str.len() > 30]

df["clickbait_title"] = df['verified_clickbait_title'].fillna(df['clickbait_title'])

df["is_clickbait"] = [ 0 if c == 0 else 1 if c == 1 else 0.5 for c in df['clickbait_title'] ]

df = df[["title", "is_clickbait"]]

print("Number of click bait samples", len(df[df["is_clickbait"] == 1]))

df

Number of click bait samples 164


Unnamed: 0,title,is_clickbait
0,Pay Little Wanderers NYC using PayPal.Me,0.5
1,Evangelical Christian Radio Host Says Nothing ...,0.0
2,Did Armed Trump Supporters Ask a Navajo Legisl...,0.0
3,"'SPYGATE': Trump ramps up attacks on FBI, Russ...",0.5
4,Top intel official says Chinese ZTE cellphones...,0.5
5,"Lesley Stahl: Trump admitted mission to ""discr...",0.5
6,Tomi Lahren Gets Owned By Genealogist After He...,1.0
7,SMH: Tomi Lahren Gets A Drink Thrown At Her At...,0.5
9,Trump's norm-breaking is leading to a constitu...,0.0
10,Richard Painter says there is more evidence ag...,0.5


# Default classification approach, ignoring "I don't know"

In [3]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.dummy import DummyClassifier

positive_df = df[df["is_clickbait"] == 1]
negative_df = df[df["is_clickbait"] == 0].apply(np.random.permutation)[0:len(positive_df)]
balanced_df = positive_df.append(negative_df)
balanced_df = balanced_df.reindex(np.random.permutation(balanced_df.index))

X = balanced_df
y = balanced_df["is_clickbait"]
y = [ False if yi == 0 else True for yi in y ]

pipeline = Pipeline([
    ('selector', FunctionTransformer(lambda x: x['title'], validate=False)),
    ('tfidf', TfidfVectorizer(strip_accents='ascii', ngram_range=(1, 3), max_df=0.5, min_df=2, token_pattern='[A-Za-z0-9]+')),
    ('clf', MultinomialNB()),
])

clf = GridSearchCV(pipeline, verbose=1, scoring='f1', param_grid={
    'clf': [
        MultinomialNB(),
        RandomForestClassifier(),
        DummyClassifier("constant", constant=1.0)
    ],
    'tfidf__max_df': [0.5, 0.1],
    'tfidf__min_df': [1, 2],
    'tfidf__token_pattern': ['[A-Za-z0-9]+', r"(?u)\b\w\w+\b"]
})
clf = clf.fit(X, y)

results = pd.DataFrame(clf.cv_results_)
results['clf'] = [ p['clf'] for p in results['params'] ]
results['tfidf__max_df'] = [ p['tfidf__max_df'] for p in results['params'] ]
results['tfidf__min_df'] = [ p['tfidf__min_df'] for p in results['params'] ]
results['tfidf__token_pattern'] = [ p['tfidf__token_pattern'] for p in results['params'] ]
results[['clf', 'tfidf__max_df', 'tfidf__min_df', 'tfidf__token_pattern', 'mean_test_score']]

Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=1)]: Done  72 out of  72 | elapsed:    7.1s finished


Unnamed: 0,clf,tfidf__max_df,tfidf__min_df,tfidf__token_pattern,mean_test_score
0,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.5,1,[A-Za-z0-9]+,0.620094
1,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.5,1,(?u)\b\w\w+\b,0.624098
2,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.5,2,[A-Za-z0-9]+,0.630742
3,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.5,2,(?u)\b\w\w+\b,0.624223
4,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.1,1,[A-Za-z0-9]+,0.623281
5,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.1,1,(?u)\b\w\w+\b,0.614153
6,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.1,2,[A-Za-z0-9]+,0.621734
7,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.1,2,(?u)\b\w\w+\b,0.606199
8,"(DecisionTreeClassifier(class_weight=None, cri...",0.5,1,[A-Za-z0-9]+,0.347618
9,"(DecisionTreeClassifier(class_weight=None, cri...",0.5,1,(?u)\b\w\w+\b,0.340246


# Regressor Appoach, taking "I don't know" into account

In [4]:
positive_df = df[df["is_clickbait"] == 1]
negative_df = df[df["is_clickbait"] == 0].apply(np.random.permutation)[0:len(positive_df)]
idk_df = df[(df["is_clickbait"] != 0) & (df["is_clickbait"] != 1)].apply(np.random.permutation)[0:len(positive_df)]
balanced_df = positive_df.append(negative_df).append(idk_df)
balanced_df = balanced_df.reindex(np.random.permutation(balanced_df.index))

X = balanced_df
y = balanced_df["is_clickbait"]

balanced_df

Unnamed: 0,title,is_clickbait
1552,facts about the iss - Google Search,1.0
185,O que espanta no caso de Marcelo Sereno - O An...,0.0
144,Tout ce qu'il faut savoir pour prendre soin d'...,0.0
89,Pyongyang ne renoncera jamais entièrement à se...,1.0
2046,Fato inédito na história do Barça!,1.0
290,Lula joga a toalha - O Antagonista,0.0
32,Hillary Clinton: Destroy Syria for Israel: « T...,1.0
221,SATURDAY OF LAZARUS by Archpriest Timothy Crem...,0.0
180,"Edit: FOI AGREDIDO, NÃO! AGREDIU VERBALMENTE P...",0.0
431,Artistas da lacração socialista: Pabllo Vittar...,0.5


In [5]:
from sklearn.metrics import classification_report, f1_score, recall_score, accuracy_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn import linear_model

class ModelTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, *args, **kwargs):
        self.model.fit(*args, **kwargs)
        return self

    def transform(self, X, **transform_params):
        return pd.DataFrame(self.model.predict(X))

class RoundTransformer(BaseEstimator):
    def __init__(self, limit=0.5):
        self.limit = limit
        
    def fit(self, *args, **kwargs):
        return self
        
    def predict(self, X):
        return [ 1.0 if x >= self.limit else 0.0 for x in X[0] ]
    
    def score(self, X, y):
        # Ignore the "i don't know" click bait titles for scoring later,
        # because even if the humans are not sure, it is not a problem for
        # the machine to be wrong
        y = pd.Series(y).reset_index(drop=True)
        indexes = y.index[(y != 0.5)].tolist()
        X_test = X.loc[indexes]
        y_test = y.loc[indexes]

        if len(y_test) == 0:
            return 0
        
        score = accuracy_score(self.predict(X_test), y_test)
        return score
    
pipeline = Pipeline([
    ('selector', FunctionTransformer(lambda x: x['title'], validate=False)),
    ('tfidf', TfidfVectorizer(strip_accents='ascii', ngram_range=(1, 3), max_df=0.5, min_df=2)),
    ('clf', ModelTransformer(RandomForestRegressor())),
    ('round', RoundTransformer(limit=0.5))
])

clf = GridSearchCV(pipeline, verbose=1, scoring='f1', param_grid={
    'round__limit': [0.5, 0.7, 0.3],
    'clf': [ModelTransformer(DummyClassifier("constant", constant=1.0)),
            ModelTransformer(RandomForestRegressor()),
            ModelTransformer(linear_model.LinearRegression()),
            ModelTransformer(linear_model.Ridge()),
            ModelTransformer(linear_model.ElasticNet()),
#             ModelTransformer(linear_model.LassoLars()),
#             ModelTransformer(linear_model.OrthogonalMatchingPursuit()),
#             ModelTransformer(linear_model.BayesianRidge()),
#             ModelTransformer(linear_model.ARDRegression()),
#             ModelTransformer(linear_model.LogisticRegression()),
#             ModelTransformer(linear_model.SGDRegressor()),
            ModelTransformer(linear_model.PassiveAggressiveRegressor()),
#             ModelTransformer(linear_model.TheilSenRegressor()),
            ModelTransformer(linear_model.HuberRegressor()),
#             ModelTransformer(linear_model.RANSACRegressor()),
            ModelTransformer(linear_model.Lasso())
           ]
})
clf = clf.fit(X, y)

results = pd.DataFrame(clf.cv_results_)
results['clf'] = [ p['clf'] for p in results['params'] ]
results['round__limit'] = [ p['round__limit'] for p in results['params'] ]
results[['clf', 'round__limit', 'mean_test_score']]

Fitting 3 folds for each of 24 candidates, totalling 72 fits


ValueError: Classification metrics can't handle a mix of continuous and binary targets

# Train on another dataset and score against ours

In [6]:
all_cb_titles = clickbait_titles[0].append(buzzfeedbr_clickbait_titles["title"])
df2 = pd.DataFrame({
        "title": all_cb_titles,
        "is_clickbait": [1] * len(all_cb_titles)
    })

all_ncb_titles = non_clickbait_titles[0].append(buzzfeedbr_non_clickbait_titles["title"])
df2 = df2.append(pd.DataFrame({
        "title": all_ncb_titles,
        "is_clickbait": [0] * len(all_ncb_titles)
     }), ignore_index=True)
df2 = df2.reindex(np.random.permutation(df2.index))

X_train = df2[["title"]]
y_train = df2["is_clickbait"]

df3 = df[(df["is_clickbait"] == 1) | (df["is_clickbait"] == 0)]
df3 = df3.reindex(np.random.permutation(df3.index))

X_test = df3[["title"]]
y_test = df3["is_clickbait"]

pipeline = Pipeline([
    ('selector', FunctionTransformer(lambda x: x['title'], validate=False)),
    ('tfidf', TfidfVectorizer(strip_accents='ascii', ngram_range=(1, 3), max_df=0.5, min_df=5, token_pattern='[A-Za-z0-9]+')),
    ('clf', MultinomialNB()),
])

clf = pipeline.fit(X_train, y_train)

clf.score(X_test, y_test)

0.55752212389380529

# Using all data together

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import SGDClassifier

df4 = df.append(df2, ignore_index=True)

positive_df = df4[df4["is_clickbait"] == 1]
negative_df = df4[df4["is_clickbait"] == 0].apply(np.random.permutation)[0:len(positive_df)]
balanced_df = positive_df.append(negative_df)
balanced_df = balanced_df.reindex(np.random.permutation(balanced_df.index))

X = balanced_df
y = balanced_df["is_clickbait"]
y = [ False if yi == 0 else True for yi in y ]

pipeline = Pipeline([
    ('selector', FunctionTransformer(lambda x: x['title'], validate=False)),
    ('tfidf', TfidfVectorizer(strip_accents='ascii', ngram_range=(1, 3), max_df=0.5, min_df=2, token_pattern='[A-Za-z0-9]+')),
    ('clf', MultinomialNB()),
])

clf = GridSearchCV(pipeline, verbose=2, scoring='f1', param_grid={
    'clf': [
        DummyClassifier("constant", constant=1.0),
        SGDClassifier(),
        AdaBoostClassifier(),
        BaggingClassifier(),
        ExtraTreesClassifier(),
        RandomForestClassifier(),
        GradientBoostingClassifier(),
        MultinomialNB(),
        LogisticRegression(),
        MLPClassifier(max_iter=5, early_stopping=True),
    ],
#     'tfidf__max_df': [0.5, 0.1],
#     'tfidf__min_df': [2, 5],
#     'tfidf__token_pattern': ['[A-Za-z0-9]+', r"(?u)\b\w\w+\b"]
})
clf = clf.fit(X, y)

results = pd.DataFrame(clf.cv_results_)
results['clf'] = [ p['clf'] for p in results['params'] ]
# results['tfidf__max_df'] = [ p['tfidf__max_df'] for p in results['params'] ]
# results['tfidf__min_df'] = [ p['tfidf__min_df'] for p in results['params'] ]
# results[['clf', 'tfidf__max_df', 'tfidf__min_df', 'mean_test_score']]
results[['clf', 'mean_test_score']]

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] clf=DummyClassifier(constant=1.0, random_state=None, strategy='constant') 
[CV]  clf=DummyClassifier(constant=1.0, random_state=None, strategy='constant'), total=   4.3s
[CV] clf=DummyClassifier(constant=1.0, random_state=None, strategy='constant') 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.6s remaining:    0.0s


[CV]  clf=DummyClassifier(constant=1.0, random_state=None, strategy='constant'), total=   3.4s
[CV] clf=DummyClassifier(constant=1.0, random_state=None, strategy='constant') 
[CV]  clf=DummyClassifier(constant=1.0, random_state=None, strategy='constant'), total=   3.2s
[CV] clf=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False) 




[CV]  clf=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False), total=   3.1s
[CV] clf=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False) 




[CV]  clf=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False), total=   3.0s
[CV] clf=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False) 




[CV]  clf=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False), total=   3.0s
[CV] clf=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None) 
[CV]  clf=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None), total=   6.3s
[CV] clf=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None) 
[CV]  clf=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None), total=   6.4s
[CV] clf=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
       

[CV]  clf=GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False), total=  17.0s
[CV] clf=GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False) 
[CV]  clf=GradientBoost



[CV]  clf=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=5, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False), total= 1.4min
[CV] clf=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=5, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False) 




[CV]  clf=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=5, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False), total= 1.4min
[CV] clf=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=5, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False) 




[CV]  clf=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=5, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False), total= 1.4min


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 10.0min finished


Unnamed: 0,clf,mean_test_score
0,"DummyClassifier(constant=1.0, random_state=Non...",0.670003
1,"SGDClassifier(alpha=0.0001, average=False, cla...",0.968237
2,"(DecisionTreeClassifier(class_weight=None, cri...",0.884506
3,"(DecisionTreeClassifier(class_weight=None, cri...",0.910446
4,"(ExtraTreeClassifier(class_weight=None, criter...",0.946726
5,"(DecisionTreeClassifier(class_weight=None, cri...",0.933223
6,([DecisionTreeRegressor(criterion='friedman_ms...,0.874177
7,"MultinomialNB(alpha=1.0, class_prior=None, fit...",0.968891
8,"LogisticRegression(C=1.0, class_weight=None, d...",0.96138
9,"MLPClassifier(activation='relu', alpha=0.0001,...",0.972639
