# Sentiment Modeling

## Imports

In [1]:
import os
import gensim

import numpy as np
import pandas as pd 

import seaborn as sns
import matplotlib.pyplot as plt 

from gensim.models import Word2Vec, FastText
from scipy.sparse import save_npz, load_npz

from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import StandardScaler, RobustScaler 

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold

from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import (
    MultinomialNB, ComplementNB
)
from sklearn.ensemble import (
    RandomForestClassifier, ExtraTreesClassifier
)
from sklearn.neural_network import MLPClassifier


import sys
sys.path.append("../")
from src.sentiment_modeling import * 
from src.feature_engineering import *
from src.utility import *

## Loading Config

## Loading data

In [2]:
df = load_pickle("../data/interim/IMDB_feature_engineered.pkl")
df.head()

Unnamed: 0,review,sentiment,review_charecters_len,review_word_len,has_html,cleaned_review,tokens,cleaned_review_charecter_len,cleaned_review_word_len,cleaned_review_has_html,positive_tokens,negative_tokens,positive_tokens_len,negative_tokens_len
0,One of the other reviewers has mentioned that ...,1,1377,320,True,one reviewer mentioned watching oz episode hoo...,"[one, reviewer, mentioned, watching, oz, episo...",931,162,False,"[right, right, trust, regard, classic, appeal,...","[struck, brutality, faint, timid, punch, priso...",13,20
1,A wonderful little production. <br /><br />The...,1,793,166,True,wonderful little production filming technique ...,"[wonderful, little, production, filming, techn...",557,84,False,"[wonderful, comforting, well, seamless, well, ...",[terribly],11,1
2,I thought this was a wonderful way to spend ti...,1,721,172,True,thought wonderful way spend time hot summer we...,"[thought, wonderful, way, spend, time, hot, su...",495,83,False,"[wonderful, hot, witty, likable, well, impress...","[plot, simplistic, killer, disappointed, risk,...",11,6
3,Basically there's a family where a little boy ...,0,569,141,True,basically family little boy jake think zombie ...,"[basically, family, little, boy, jake, think, ...",362,62,False,"[like, well]","[zombie, slower, kill, ruin, meaningless, ignore]",2,6
4,"Petter Mattei's ""Love in the Time of Money"" is...",1,1032,236,True,petter matteis love time money visually stunni...,"[petter, matteis, love, time, money, visually,...",725,123,False,"[love, stunning, vivid, success, stylishly, so...","[loneliness, anxiously]",15,2


In [3]:
# "review_charecters_len","review_word_len",
#["tokens"]
features = ["cleaned_review_charecter_len","cleaned_review_word_len","positive_tokens_len","negative_tokens_len"]
target = "sentiment"

In [4]:
countVectorized = load_npz("../data/interim/count_vectorized_reviwes.npz")
tfidfVectorized = load_npz("../data/interim/tfidf_vectorized_reviwes.npz")
w2vVectorized = np.load("../data/interim/w2v_review_vecs.npy")
ftVectorized = np.load("../data/interim/ft_review_vecs.npy")

In [5]:
countVectorized, tfidfVectorized, w2vVectorized, ftVectorized

(<Compressed Sparse Row sparse matrix of dtype 'float32'
 	with 5044360 stored elements and shape (50000, 20000)>,
 <Compressed Sparse Row sparse matrix of dtype 'float32'
 	with 5044360 stored elements and shape (50000, 20000)>,
 array([[ 4.2732398e-05,  1.9302557e-04, -5.8641137e-05, ...,
         -8.2263170e-05,  1.1703277e-04,  1.8301233e-05],
        [ 5.2201816e-05,  2.7585472e-04, -3.4006036e-06, ...,
          2.5638103e-04,  3.5770788e-04,  5.7117966e-05],
        [ 1.2033064e-04,  4.1904690e-04,  6.5128232e-05, ...,
         -1.8050938e-04, -5.1984804e-05,  3.3234592e-04],
        ...,
        [ 1.7658886e-04, -4.0092540e-04, -2.8987983e-04, ...,
         -2.0702722e-04, -2.3320242e-04,  2.6683396e-05],
        [ 8.9120213e-06,  4.4251559e-05,  2.6783053e-04, ...,
          2.2672680e-04,  8.6163520e-05,  1.1480367e-04],
        [ 3.1963334e-04,  1.7563683e-04,  4.5814701e-05, ...,
          2.6755944e-07,  2.3819497e-04, -3.0530614e-04]],
       shape=(50000, 300), dtype=flo

## Splitting the data

In [6]:
test_size = 0.2

In [7]:
df_X = df[features]
y = df[target] 

In [8]:
df_X_train, df_X_test, df_y_train, df_y_test = train_test_split(df_X, y, test_size=test_size, stratify=y)

In [9]:
df_X_train.shape, df_X_test.shape, df_y_train.shape, df_y_test.shape

((40000, 4), (10000, 4), (40000,), (10000,))

In [10]:
count_X_train, count_X_test, count_y_train, count_y_test = train_test_split(countVectorized, y, test_size=test_size, stratify=y)

In [11]:
count_X_train.shape, count_X_test.shape, count_y_train.shape, count_y_test.shape

((40000, 20000), (10000, 20000), (40000,), (10000,))

In [12]:
tfidf_X_train, tfidf_X_test, tfidf_y_train, tfidf_y_test = train_test_split(tfidfVectorized, y, test_size=test_size, stratify=y)

In [13]:
tfidf_X_train.shape, tfidf_X_test.shape, tfidf_y_train.shape, tfidf_y_test.shape

((40000, 20000), (10000, 20000), (40000,), (10000,))

In [14]:
w2v_X_train, w2v_X_test, w2v_y_train, w2v_y_test = train_test_split(w2vVectorized, y, test_size=test_size, stratify=y)

In [15]:
w2v_X_train.shape, w2v_X_test.shape, w2v_y_train.shape, w2v_y_test.shape

((40000, 300), (10000, 300), (40000,), (10000,))

In [16]:
ft_X_train, ft_X_test, ft_y_train, ft_y_test = train_test_split(ftVectorized, y, test_size=test_size, stratify=y)

In [17]:
ft_X_train.shape, ft_X_test.shape, ft_y_train.shape, ft_y_test.shape

((40000, 300), (10000, 300), (40000,), (10000,))

## Models

In [18]:
models = {
    "LogisticRegression": LogisticRegression(random_state=42, max_iter=1000), 
    "LinearSVC": LinearSVC(random_state=42, max_iter=5000), 
    "MultinomialNB": MultinomialNB(),
    "ComplementNB": ComplementNB(),
    "RandomForestClassifier": RandomForestClassifier(random_state=42, n_jobs=-1), 
    "ExtraTreesClassifier": ExtraTreesClassifier(random_state=42, n_jobs=-1),
    
}
scoring_metrics = [
    'accuracy', 'precision', 'recall', 'f1', 'roc_auc'
]

folds = 5
cv_strategy = KFold(n_splits=folds, shuffle=True, random_state=42)

"MLPClassifier": MLPClassifier(
        hidden_layer_sizes=(256, 128), 
        activation="relu", 
        solver="adam", 
        alpha=1e-4, 
        learning_rate="adaptive",
        learning_rate_init=0.001, 
        batch_size=256,
        max_iter=80, 
        early_stopping=False,
        tol=1e-4,
        random_state=42
    )

## Traning models on Numeric cols in df

In [19]:
df_models, df_results = train_and_eval_models(
    data=(df_X_train, df_X_test, df_y_train, df_y_test),
    models=models.copy()
)
save_data_csv(df_results, "../results/metrics/df_results.csv")
save_pickle(df_models, "../models/df_models.pkl")

Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating MultinomialNB...
Evaluating ComplementNB...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [20]:
df_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,LogisticRegression,0.338401,0.7316,0.731322,0.7322,0.731761,0.797138,0.737,0.736716,0.7376,0.737158,0.802777
1,LinearSVC,0.072655,0.73115,0.731127,0.7312,0.731163,0.797066,0.7365,0.736358,0.7368,0.736579,0.802701
2,MultinomialNB,0.0077,0.72955,0.719518,0.7524,0.735592,0.794299,0.7321,0.72109,0.757,0.738609,0.801279
3,ComplementNB,0.008099,0.72955,0.719518,0.7524,0.735592,0.794299,0.7321,0.72109,0.757,0.738609,0.801279
4,RandomForestClassifier,3.784943,0.99575,0.9958,0.9957,0.99575,0.999937,0.703,0.708848,0.689,0.698783,0.771047
5,ExtraTreesClassifier,2.445767,0.995825,0.999849,0.9918,0.995808,0.999965,0.6871,0.689488,0.6808,0.685116,0.752445


In [21]:
df_cv_models, df_cv_results = train_and_eval_models(
    data=(df_X, y),
    models=models.copy(),
    cv_strategy=cv_strategy
)
save_data_csv(df_cv_results, "../results/metrics/df_cv_results.csv")
save_pickle(df_cv_models, "../models/df_cv_models.pkl")

Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating MultinomialNB...
Evaluating ComplementNB...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [22]:
df_cv_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,LogisticRegression,0.326921,0.732545,0.732417,0.732819,0.732617,0.79826,0.73246,0.732362,0.732677,0.73251,0.798222
1,LinearSVC,0.15204,0.7325,0.732593,0.732299,0.732445,0.798173,0.73252,0.732694,0.732157,0.732415,0.798136
2,MultinomialNB,0.021632,0.730095,0.719869,0.753349,0.736228,0.795684,0.73026,0.720009,0.753555,0.736397,0.795664
3,ComplementNB,0.021808,0.73011,0.719894,0.75334,0.736237,0.795684,0.7302,0.719892,0.753634,0.736374,0.795664
4,RandomForestClassifier,11.995397,0.995835,0.995691,0.99598,0.995835,0.999936,0.7002,0.706522,0.684915,0.695545,0.768369
5,ExtraTreesClassifier,8.645533,0.99587,0.999849,0.99189,0.995853,0.999966,0.6847,0.689666,0.671675,0.680539,0.747424


## Traning models on count vectors

In [23]:
count_models, count_results = train_and_eval_models(
    data=(count_X_train, count_X_test, count_y_train, count_y_test),
    models=models.copy()
)
save_data_csv(count_results, "../results/metrics/count_results.csv")
save_pickle(count_models, "../models/count_models.pkl")

Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating MultinomialNB...
Evaluating ComplementNB...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [24]:
count_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,LogisticRegression,20.276598,0.99745,0.997102,0.9978,0.997451,0.999918,0.8789,0.872713,0.8872,0.879897,0.944542
1,LinearSVC,65.468143,1.0,1.0,1.0,1.0,1.0,0.8566,0.854614,0.8594,0.857,0.928898
2,MultinomialNB,0.044177,0.88265,0.879952,0.8862,0.883065,0.944476,0.8626,0.85901,0.8676,0.863284,0.92515
3,ComplementNB,0.047755,0.88265,0.879952,0.8862,0.883065,0.944476,0.8626,0.85901,0.8676,0.863284,0.925151
4,RandomForestClassifier,82.58448,1.0,1.0,1.0,1.0,1.0,0.8537,0.847788,0.8622,0.854933,0.930884
5,ExtraTreesClassifier,114.762681,1.0,1.0,1.0,1.0,1.0,0.8716,0.866469,0.8786,0.872493,0.941921


In [25]:
count_cv_models, count_cv_results = train_and_eval_models(
    data=(countVectorized, y),
    models=models.copy(),
    cv_strategy=cv_strategy
)
save_data_csv(count_cv_results, "../results/metrics/count_cv_results.csv")
save_pickle(count_cv_models, "../models/count_cv_models.pkl")

Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating MultinomialNB...
Evaluating ComplementNB...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [26]:
count_cv_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,LogisticRegression,9.660205,0.997285,0.996972,0.9976,0.997286,0.999918,0.88068,0.876848,0.885751,0.881275,0.94663
1,LinearSVC,109.421471,1.0,1.0,1.0,1.0,1.0,0.86024,0.858231,0.863026,0.860619,0.930234
2,MultinomialNB,0.156994,0.88209,0.879804,0.885101,0.882444,0.943761,0.86562,0.86199,0.870646,0.866288,0.929283
3,ComplementNB,0.161348,0.88211,0.879832,0.885111,0.882463,0.94376,0.8656,0.861985,0.870606,0.866266,0.929283
4,RandomForestClassifier,292.684736,1.0,1.0,1.0,1.0,1.0,0.85482,0.852778,0.857719,0.855235,0.929113
5,ExtraTreesClassifier,398.44184,1.0,1.0,1.0,1.0,1.0,0.8739,0.875839,0.871325,0.873568,0.943051


## Traning models on tfidf vectors

In [27]:
tfidf_models, tfidf_results = train_and_eval_models(
    data=(tfidf_X_train, tfidf_X_test, tfidf_y_train, tfidf_y_test),
    models=models.copy()
)
save_data_csv(tfidf_results, "../results/metrics/tfidf_results.csv")
save_pickle(tfidf_models, "../models/tfidf_models.pkl")

Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating MultinomialNB...
Evaluating ComplementNB...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [28]:
tfidf_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,LogisticRegression,2.308213,0.9331,0.926742,0.94055,0.933595,0.98197,0.8925,0.886091,0.9008,0.893385,0.959484
1,LinearSVC,1.68512,0.98215,0.980229,0.98415,0.982186,0.998278,0.8888,0.886635,0.8916,0.88911,0.956172
2,MultinomialNB,0.04098,0.895925,0.886476,0.90815,0.897182,0.959616,0.8694,0.861448,0.8804,0.870821,0.943082
3,ComplementNB,0.041232,0.895925,0.886476,0.90815,0.897182,0.959616,0.8694,0.861448,0.8804,0.870821,0.943082
4,RandomForestClassifier,79.575191,1.0,1.0,1.0,1.0,1.0,0.849,0.852525,0.844,0.848241,0.929221
5,ExtraTreesClassifier,116.911558,1.0,1.0,1.0,1.0,1.0,0.8636,0.863019,0.8644,0.863709,0.93863


In [29]:
tfidf_cv_models, tfidf_cv_results = train_and_eval_models(
    data=(tfidfVectorized, y),
    models=models.copy(),
    cv_strategy=cv_strategy
)
save_data_csv(tfidf_cv_results, "../results/metrics/tfidf_cv_results.csv")
save_pickle(tfidf_cv_models, "../models/tfidf_cv_models.pkl")

Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating MultinomialNB...
Evaluating ComplementNB...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [30]:
tfidf_cv_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,LogisticRegression,1.962671,0.931705,0.924619,0.940049,0.93227,0.981499,0.89728,0.888966,0.90796,0.89836,0.962119
1,LinearSVC,3.030797,0.98161,0.979329,0.98399,0.981653,0.998227,0.89222,0.886727,0.899324,0.892978,0.958725
2,MultinomialNB,0.153627,0.89392,0.884523,0.906141,0.895201,0.959384,0.87458,0.864649,0.888209,0.876264,0.945146
3,ComplementNB,0.155106,0.893945,0.884514,0.906211,0.89523,0.959384,0.87476,0.864781,0.888447,0.876448,0.945146
4,RandomForestClassifier,276.912337,1.0,1.0,1.0,1.0,1.0,0.85488,0.85533,0.854296,0.854794,0.932163
5,ExtraTreesClassifier,404.875632,1.0,1.0,1.0,1.0,1.0,0.86896,0.867934,0.870373,0.869139,0.941005


## Train on w2v

In [31]:
models = {
    'SGDClassifier': SGDClassifier(loss='log_loss', max_iter=1000, random_state=42),
    "LogisticRegression": LogisticRegression(random_state=42, max_iter=1000), 
    "LinearSVC": LinearSVC(random_state=42, max_iter=5000), 
    "RandomForestClassifier": RandomForestClassifier(random_state=42, n_jobs=-1), 
    "ExtraTreesClassifier": ExtraTreesClassifier(random_state=42, n_jobs=-1),
}
scoring_metrics = [
    'accuracy', 'precision', 'recall', 'f1', 'roc_auc'
]

folds = 5
cv_strategy = KFold(n_splits=folds, shuffle=True, random_state=42)

    "MLPClassifier": MLPClassifier(
        hidden_layer_sizes=(512, 256, 128),
        activation="relu", 
        solver="adam", 
        alpha=1e-5, 
        learning_rate="adaptive",
        learning_rate_init=0.0005, 
        batch_size=128,
        max_iter=100, 
        early_stopping=False,
        tol=1e-4,
        random_state=42
    )

In [32]:
w2v_models, w2v_results = train_and_eval_models(
    data=(w2v_X_train, w2v_X_test, w2v_y_train, w2v_y_test),
    models=models.copy()
)
save_data_csv(w2v_results, "../results/metrics/w2v_results.csv")
save_pickle(w2v_models, "../models/w2v_models.pkl")

Evaluating SGDClassifier...
Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [33]:
w2v_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,SGDClassifier,0.801465,0.5,0.5,1.0,0.666667,0.740747,0.5,0.5,1.0,0.666667,0.743363
1,LogisticRegression,0.103649,0.5,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.5
2,LinearSVC,0.782062,0.677475,0.659952,0.73225,0.694224,0.745603,0.676,0.658501,0.7312,0.692949,0.748151
3,RandomForestClassifier,56.070802,1.0,1.0,1.0,1.0,1.0,0.6819,0.683182,0.6784,0.680783,0.749419
4,ExtraTreesClassifier,11.755632,1.0,1.0,1.0,1.0,1.0,0.6696,0.673415,0.6586,0.665925,0.740263


In [34]:
w2v_cv_models, w2v_cv_results = train_and_eval_models(
    data=(w2vVectorized, y),
    models=models.copy(),
    cv_strategy=cv_strategy
)
save_data_csv(w2v_cv_results, "../results/metrics/w2v_cv_results.csv")
save_pickle(w2v_cv_models, "../models/w2v_cv_models.pkl")

Evaluating SGDClassifier...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Evaluating LogisticRegression...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Evaluating LinearSVC...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [35]:
w2v_cv_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,SGDClassifier,1.15831,0.5,0.4,0.8,0.533333,0.738977,0.5,0.4,0.8,0.533331,0.736899
1,LogisticRegression,0.450901,0.53147,0.323787,0.562887,0.407379,0.691082,0.5276,0.321424,0.560208,0.404641,0.688707
2,LinearSVC,0.99121,0.673805,0.658443,0.729338,0.690406,0.743671,0.66964,0.654961,0.725127,0.686465,0.741532
3,RandomForestClassifier,195.582893,1.0,1.0,1.0,1.0,1.0,0.6748,0.673788,0.677814,0.675758,0.74367
4,ExtraTreesClassifier,41.474783,1.0,1.0,1.0,1.0,1.0,0.66886,0.671282,0.661865,0.666515,0.735422


## Train on FastText

In [36]:
ft_models, ft_results = train_and_eval_models(
    data=(ft_X_train, ft_X_test, ft_y_train, ft_y_test),
    models=models.copy()
)
save_data_csv(ft_results, "../results/metrics/ft_results.csv")
save_pickle(ft_models, "../models/ft_models.pkl")

Evaluating SGDClassifier...
Evaluating LogisticRegression...
Evaluating LinearSVC...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [37]:
ft_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,SGDClassifier,0.83297,0.5,0.5,1.0,0.666667,0.734611,0.5,0.5,1.0,0.666667,0.731012
1,LogisticRegression,0.056494,0.5,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.5
2,LinearSVC,0.533621,0.667675,0.649356,0.729,0.686877,0.735037,0.6684,0.65298,0.7188,0.684311,0.731419
3,RandomForestClassifier,55.98701,1.0,1.0,1.0,1.0,1.0,0.6792,0.679559,0.6782,0.678879,0.747983
4,ExtraTreesClassifier,11.674114,1.0,1.0,1.0,1.0,1.0,0.6735,0.674864,0.6696,0.672222,0.739744


In [38]:
ft_cv_models, ft_cv_results = train_and_eval_models(
    data=(ftVectorized, y),
    models=models.copy(),
    cv_strategy=cv_strategy
)
save_data_csv(ft_cv_results, "../results/metrics/ft_cv_results.csv")
save_pickle(ft_cv_models, "../models/ft_cv_models.pkl")

Evaluating SGDClassifier...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Evaluating LogisticRegression...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Evaluating LinearSVC...
Evaluating RandomForestClassifier...
Evaluating ExtraTreesClassifier...

Evaluation Complete.


In [39]:
ft_cv_results

Unnamed: 0,Model,Fit_Time_sec,Train_Accuracy,Train_Precision,Train_Recall,Train_F1,Train_Roc_auc,Test_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Roc_auc
0,SGDClassifier,1.281374,0.5,0.4,0.8,0.533333,0.733897,0.5,0.4,0.8,0.533331,0.732106
1,LogisticRegression,0.424494,0.50039,0.300195,0.6,0.400173,0.686976,0.49844,0.29922,0.6,0.399306,0.685172
2,LinearSVC,1.806066,0.60006,0.674135,0.680313,0.557119,0.734234,0.598,0.667233,0.681034,0.55614,0.732435
3,RandomForestClassifier,194.179625,1.0,1.0,1.0,1.0,1.0,0.68436,0.68068,0.694539,0.687533,0.754224
4,ExtraTreesClassifier,41.42741,1.0,1.0,1.0,1.0,1.0,0.67492,0.674261,0.677071,0.675602,0.743733
