## Fake News Classifer

**Project Link**: https://www.kaggle.com/c/fake-news/overview

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width: 100% !important; }</style>"))
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from pylab import rcParams
rcParams['figure.figsize'] = 22, 7

import pandas as pd 
import numpy as np

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import re 

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline

import pickle
#from joblib import Memory
import gzip

In [2]:
df = pd.read_csv('./data/train.csv')
df.head(2)

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0


In [3]:
df = df.dropna().reset_index()
y = df['label']
messages = df.copy()
messages.shape, df.shape

((18285, 6), (18285, 6))

In [5]:
#memory = Memory(location='cachedir')
class Preprocessor(BaseEstimator, TransformerMixin):
    def __init__ (self):
        return None
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        corpus = []
        for i in range(len(X)):
            text = re.sub(r"[^a-zA-Z]", ' ', X['text'][i])
            text = text.lower().split()
            text = [PorterStemmer().stem(word) for word in text if word not in stopwords.words('english')]
            text = ' '.join(text)
            corpus.append(text)
        return corpus

In [6]:
corpus = Preprocessor().fit_transform(messages) 

In [7]:
corpus[0:4]

['hous dem aid even see comey letter jason chaffetz tweet darrel lucu octob subscrib jason chaffetz stump american fork utah imag courtesi michael jolley avail creativ common licens apolog keith olbermann doubt worst person world week fbi director jame comey accord hous democrat aid look like also know second worst person well turn comey sent infam letter announc fbi look email may relat hillari clinton email server rank democrat relev committe hear comey found via tweet one republican committe chairmen know comey notifi republican chairmen democrat rank member hous intellig judiciari oversight committe agenc review email recent discov order see contain classifi inform long letter went oversight committe chairman jason chaffetz set polit world ablaz tweet fbi dir inform fbi learn exist email appear pertin investig case reopen jason chaffetz jasoninthehous octob cours know case comey actual say review email light unrel case know anthoni weiner sext teenag appar littl thing fact matter c

In [8]:
tfidf = TfidfVectorizer(max_features=5000,ngram_range=(1,3))
X = tfidf.fit_transform(corpus).toarray()
y = messages['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

## Naive Bayes Classifier

In [9]:
classifier=MultinomialNB()
classifier.fit(X_train, y_train)
pred = classifier.predict(X_test)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.88      0.95      0.91      2040
           1       0.93      0.83      0.88      1617

    accuracy                           0.90      3657
   macro avg       0.90      0.89      0.90      3657
weighted avg       0.90      0.90      0.90      3657



## Passive Aggressive Classifer

In [10]:
classifier=PassiveAggressiveClassifier()
classifier.fit(X_train, y_train)
pred = classifier.predict(X_test)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2040
           1       0.94      0.94      0.94      1617

    accuracy                           0.95      3657
   macro avg       0.95      0.95      0.95      3657
weighted avg       0.95      0.95      0.95      3657



## Logistic Regression Classifier

In [11]:
classifier=LogisticRegression()
classifier.fit(X_train, y_train)
pred = classifier.predict(X_test)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.95      0.96      0.96      2040
           1       0.95      0.94      0.95      1617

    accuracy                           0.95      3657
   macro avg       0.95      0.95      0.95      3657
weighted avg       0.95      0.95      0.95      3657



## Multinomial Classifier with Hyperparameter Tuning

In [12]:
clf_cv = GridSearchCV(MultinomialNB(), param_grid={'alpha': np.arange(0,1,0.1)}, cv=5, verbose=2).fit(X_train, y_train)
clf_cv

Fitting 5 folds for each of 10 candidates, totalling 50 fits


  'setting alpha = %.1e' % _ALPHA_MIN)


[CV] END ..........................................alpha=0.0; total time=   1.3s


  'setting alpha = %.1e' % _ALPHA_MIN)


[CV] END ..........................................alpha=0.0; total time=   0.3s


  'setting alpha = %.1e' % _ALPHA_MIN)


[CV] END ..........................................alpha=0.0; total time=   0.2s


  'setting alpha = %.1e' % _ALPHA_MIN)


[CV] END ..........................................alpha=0.0; total time=   0.2s


  'setting alpha = %.1e' % _ALPHA_MIN)


[CV] END ..........................................alpha=0.0; total time=   0.2s
[CV] END ..........................................alpha=0.1; total time=   0.3s
[CV] END ..........................................alpha=0.1; total time=   0.2s
[CV] END ..........................................alpha=0.1; total time=   0.3s
[CV] END ..........................................alpha=0.1; total time=   0.2s
[CV] END ..........................................alpha=0.1; total time=   0.2s
[CV] END ..........................................alpha=0.2; total time=   0.3s
[CV] END ..........................................alpha=0.2; total time=   0.2s
[CV] END ..........................................alpha=0.2; total time=   0.2s
[CV] END ..........................................alpha=0.2; total time=   0.3s
[CV] END ..........................................alpha=0.2; total time=   0.3s
[CV] END ..........................alpha=0.30000000000000004; total time=   0.2s
[CV] END ...................

  'setting alpha = %.1e' % _ALPHA_MIN)


GridSearchCV(cv=5, estimator=MultinomialNB(),
             param_grid={'alpha': array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])},
             verbose=2)

In [13]:
clf_cv.best_estimator_, clf_cv.best_params_, clf_cv.best_score_

(MultinomialNB(alpha=0.0), {'alpha': 0.0}, 0.9029259395575184)

## Logistic Regression Hyperparameter tuning

In [14]:
%%time 
grid_values = {'penalty': ['l1', 'l2'],'C':[0.001,.009,0.01,.09,1,5,10,25]}
lr_cv = GridSearchCV(LogisticRegression(), param_grid= grid_values, cv=5, verbose=2).fit(X_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ................................C=0.001, penalty=l1; total time=   0.0s
[CV] END ................................C=0.001, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ................................C=0.001, penalty=l1; total time=   0.1s
[CV] END ................................C=0.001, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ................................C=0.001, penalty=l1; total time=   0.1s
[CV] END ................................C=0.001, penalty=l2; total time=   0.4s
[CV] END ................................C=0.001, penalty=l2; total time=   0.4s
[CV] END ................................C=0.001, penalty=l2; total time=   0.4s
[CV] END ................................C=0.001, penalty=l2; total time=   0.4s
[CV] END ................................C=0.001, penalty=l2; total time=   0.4s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ................................C=0.009, penalty=l1; total time=   0.1s
[CV] END ................................C=0.009, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ................................C=0.009, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ................................C=0.009, penalty=l1; total time=   0.1s
[CV] END ................................C=0.009, penalty=l1; total time=   0.1s
[CV] END ................................C=0.009, penalty=l2; total time=   0.6s
[CV] END ................................C=0.009, penalty=l2; total time=   0.6s
[CV] END ................................C=0.009, penalty=l2; total time=   0.6s
[CV] END ................................C=0.009, penalty=l2; total time=   0.5s
[CV] END ................................C=0.009, penalty=l2; total time=   0.6s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END .................................C=0.01, penalty=l1; total time=   0.1s
[CV] END .................................C=0.01, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END .................................C=0.01, penalty=l1; total time=   0.1s
[CV] END .................................C=0.01, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END .................................C=0.01, penalty=l1; total time=   0.1s
[CV] END .................................C=0.01, penalty=l2; total time=   0.7s
[CV] END .................................C=0.01, penalty=l2; total time=   0.7s
[CV] END .................................C=0.01, penalty=l2; total time=   0.6s
[CV] END .................................C=0.01, penalty=l2; total time=   0.7s
[CV] END .................................C=0.01, penalty=l2; total time=   0.6s
[CV] END .................................C=0.09, penalty=l1; total time=   0.0s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END .................................C=0.09, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END .................................C=0.09, penalty=l1; total time=   0.1s
[CV] END .................................C=0.09, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END .................................C=0.09, penalty=l1; total time=   0.1s
[CV] END .................................C=0.09, penalty=l2; total time=   0.9s
[CV] END .................................C=0.09, penalty=l2; total time=   1.0s
[CV] END .................................C=0.09, penalty=l2; total time=   0.9s
[CV] END .................................C=0.09, penalty=l2; total time=   0.9s
[CV] END .................................C=0.09, penalty=l2; total time=   0.9s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ....................................C=1, penalty=l1; total time=   0.1s
[CV] END ....................................C=1, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ....................................C=1, penalty=l1; total time=   0.1s
[CV] END ....................................C=1, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ....................................C=1, penalty=l1; total time=   0.1s
[CV] END ....................................C=1, penalty=l2; total time=   1.6s
[CV] END ....................................C=1, penalty=l2; total time=   2.0s
[CV] END ....................................C=1, penalty=l2; total time=   1.8s
[CV] END ....................................C=1, penalty=l2; total time=   2.0s
[CV] END ....................................C=1, penalty=l2; total time=   1.6s
[CV] END ....................................C=5, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ....................................C=5, penalty=l1; total time=   0.1s
[CV] END ....................................C=5, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ....................................C=5, penalty=l1; total time=   0.1s
[CV] END ....................................C=5, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ....................................C=5, penalty=l2; total time=   2.7s
[CV] END ....................................C=5, penalty=l2; total time=   3.1s
[CV] END ....................................C=5, penalty=l2; total time=   2.7s
[CV] END ....................................C=5, penalty=l2; total time=   3.1s
[CV] END ....................................C=5, penalty=l2; total time=   2.8s
[CV] END ...................................C=10, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ...................................C=10, penalty=l1; total time=   0.1s
[CV] END ...................................C=10, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ...................................C=10, penalty=l1; total time=   0.2s
[CV] END ...................................C=10, penalty=l1; total time=   0.1s
[CV] END ...................................C=10, penalty=l2; total time=   4.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[CV] END ...................................C=10, penalty=l2; total time=   4.7s
[CV] END ...................................C=10, penalty=l2; total time=   3.9s
[CV] END ...................................C=10, penalty=l2; total time=   3.3s
[CV] END ...................................C=10, penalty=l2; total time=   4.3s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ...................................C=25, penalty=l1; total time=   0.2s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line

[CV] END ...................................C=25, penalty=l1; total time=   0.1s
[CV] END ...................................C=25, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ...................................C=25, penalty=l1; total time=   0.1s


Traceback (most recent call last):
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\92304\AppData\Roaming\Python\Python37\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



[CV] END ...................................C=25, penalty=l1; total time=   0.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[CV] END ...................................C=25, penalty=l2; total time=   4.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[CV] END ...................................C=25, penalty=l2; total time=   4.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[CV] END ...................................C=25, penalty=l2; total time=   4.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[CV] END ...................................C=25, penalty=l2; total time=   4.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


[CV] END ...................................C=25, penalty=l2; total time=   4.7s


        nan 0.9230926         nan 0.94770273        nan 0.95474395
        nan 0.95617975        nan 0.95617954]


Wall time: 1min 42s


In [15]:
lr_cv.best_estimator_, lr_cv.best_score_ 

(LogisticRegression(C=10), 0.956179750074487)

In [18]:
## Feature Names
feature_names = tfidf.get_feature_names()
feature_names[0:5], classifier.coef_[0][0:5]

(['aaron', 'abandon', 'abc', 'abe', 'abedin'],
 array([-0.78278861, -0.05810273, -0.11783989, -0.50612478, -0.17314779]))

In [19]:
#Getting the most real words
sorted(zip(classifier.coef_[0],feature_names), reverse=True)[0:20]

[(6.896506700958803, 'octob'),
 (5.638402935166183, 'novemb'),
 (5.451048847546451, 'anti'),
 (5.245854076886702, 'us'),
 (5.243326263476474, 'hillari'),
 (4.169063517259233, 'year old'),
 (3.9617325385780138, 'sourc'),
 (3.904633029212416, 'non'),
 (3.86966452673566, 'clinton'),
 (3.7941143809616866, 'co'),
 (3.635942021932334, 'elect'),
 (3.5198023122181286, 'self'),
 (3.510951423153407, 'comment'),
 (2.9988544751454156, 'share'),
 (2.8727276483676154, 'presid elect'),
 (2.844428809135522, 'com'),
 (2.813814705626213, 'howev'),
 (2.8071016626577263, 'al'),
 (2.7968274133673443, 'old'),
 (2.70626726726611, 'via')]

In [20]:
# Most Fake words
sorted(zip(classifier.coef_[0], feature_names), reverse=False)[:20]

[(-10.200557403899325, 'said'),
 (-8.942111310063618, 'mr'),
 (-7.387992526134953, 'breitbart'),
 (-6.738698035175341, 'twitter'),
 (-4.984233085695171, 'ms'),
 (-4.534774064516067, 'presid donald trump'),
 (-4.528857715342893, 'presid donald'),
 (-4.269923334298096, 'follow'),
 (-3.8509982482252716, 'presid trump'),
 (-3.538701791261236, 'breitbart news'),
 (-3.47778227186818, 'presid'),
 (-3.051894654485774, 'sunday'),
 (-2.9679123702339436, 'friday'),
 (-2.818411358949909, 'milo'),
 (-2.7884916454686763, 'march'),
 (-2.7052701573721114, 'percent'),
 (-2.5585192953693823, 'dr'),
 (-2.524631059408287, 'follow twitter'),
 (-2.520226174615601, 'januari'),
 (-2.5144273887444477, 'game')]