In [100]:
import nltk
from nltk import word_tokenize
from nltk import pos_tag
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer

#########################################################################################################
# Training data for the 8 categories                                                                    #
#categories = ['comp.graphics','comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware',
#              'comp.sys.mac.hardware','rec.autos', 'rec.motorcycles',
#              'rec.sport.baseball', 'rec.sport.hockey']
categories = ['comp.graphics','comp.os.ms-windows.misc']

###################################### How to run code start ############################################
# Step 1:  Pass at max 2 or 3 categories in the above step as it will take a long time to run
# Step 2: Based on scenario uncomment code and run grid search
# Step 3 : Ensure to pass the correct dataset to gridsearch
# Example for step 3 : grid.fit(newsgroups_trainwithoutheaders.data, newsgroups_trainwithoutheaders.target)
###################################### How to run code end  #############################################

# The below code is to determine the accuracy on train data by removing headers and footers             #
newsgroups_trainwithoutheaders = fetch_20newsgroups(subset='test',
                                     remove=('headers', 'footers'),
                                     categories=categories)
#newsgroups_trainheaders = fetch_20newsgroups(subset='test',
#                                     remove=('headers', 'footers'),
#                                     categories=categories)

# The below code is to determine the accuracy on train data by NOT removing headers,footers and quotes  #
#newsgroups_testwithoutheaders = fetch_20newsgroups(subset='test',
                                     categories=categories)
#newsgroups_testwithoutheaders = fetch_20newsgroups(subset='test',
                                     categories=categories)
#########################################################################################################

#########################################################################################################
# The below code is to check if Lemmatizer can be used for feature extraction
# Assign Lemmatizer to Count Vectorizer [http://scikit-learn.org/stable/modules/feature_extraction.html]#
import nltk
from nltk import pos_tag
wnl = nltk.wordnet.WordNetLemmatizer()
analyzer=CountVectorizer().build_analyzer()
import re
pattern = re.compile("[+-]?\d+(?:\.\d+)?$")

def penn2morphy(penntag):
    """ Converts Penn Treebank tags to WordNet. """
    morphy_tag = {'NN':'n', 'JJ':'a',
                  'VB':'v', 'RB':'r'}
    try:
        return morphy_tag[penntag[:2]]
    except:
        return 'n' 
    
def lemmatize_sent(list_word):
    return [wnl.lemmatize(word.lower(),pos=penn2morphy(tag)) 
           for word, tag in pos_tag(list_word)]

def lem_rmv_digit(doc):
    word = [word for word in lemmatize_sent(analyzer(doc))]
    return (filter(lambda x: not pattern.match(x),word))
   
count_vect = CountVectorizer(min_df=3, stop_words="english",analyzer=lem_rmv_digit)

#########################################################################################################


In [101]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.decomposition import TruncatedSVD, NMF
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import word_tokenize   
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import GaussianNB

# used to cache results
from tempfile import mkdtemp
from shutil import rmtree
from sklearn.externals.joblib import Memory
# print(__doc__)
cachedir = mkdtemp()
memory = Memory(cachedir=cachedir, verbose=10)

#########################################################################################################
#Construct Pipeline
pipeline = Pipeline([
    ('vect', CountVectorizer(min_df=1, stop_words='english')), # Feature Extraction
    ('tfidf', TfidfTransformer()),                             # Feature Extraction 
    ('reduce_dim', TruncatedSVD(random_state=0)),              # Dimensionality Reduction    
    ('clf', GaussianNB()),                                     # Classifier 
], 
memory=memory
)
#########################################################################################################

#########################################################################################################
# Feature Extraction : Min Df values for CountVectorizer                                                #
T_OPTIONS = [3,5]
# Feature Extraction : for Lemmatization                                                                #
N_FEATURES_OPTIONS = [10, 50]
# Classifer : Best Gamma Value                                                                          #
C_OPTIONS = [1000]
# Classifer : Regularization Strengths  for regression#
REG_OPTIONS = [{'penalty': ['l1']},{'penalty': ['l2']}]
REG_STRENGTH_OPTIONS = [10,100]
#########################################################################################################

#########################################################################################################
# Set up the parameter grid                                                                             #
param_grid = [
    {
        'vect__min_df': T_OPTIONS,
       'vect__analyzer' : ['word',lem_rmv_digit],
        'reduce_dim': [TruncatedSVD(), NMF()],
        'reduce_dim__n_components': N_FEATURES_OPTIONS,
        'clf': [LinearSVC()],
        'clf__C': C_OPTIONS
    },
    {
        'vect__min_df': T_OPTIONS,
        'vect__analyzer' : ['word',lem_rmv_digit],
        'reduce_dim': [TruncatedSVD(), NMF()],
        'reduce_dim__n_components': N_FEATURES_OPTIONS,
        'clf': [LogisticRegression()],
        'clf__C': REG_OPTIONS,
        'clf__C': REG_STRENGTH_OPTIONS
    },    
    {
        'vect__min_df': T_OPTIONS,
        'vect__analyzer' : ['word',lem_rmv_digit],
        'reduce_dim': [TruncatedSVD(), NMF()],
        'reduce_dim__n_components': N_FEATURES_OPTIONS,
        'clf': [GaussianNB()],
    },
]
#########################################################################################################

grid = GridSearchCV(pipeline, cv=3, n_jobs=1, param_grid=param_grid, scoring='accuracy')
grid.fit(newsgroups_trainwithoutheaders.data, newsgroups_trainwithoutheaders.target)
rmtree(cachedir)



________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=3,
        ngram_range=(1, 1), preprocessor=None, stop_words='english',
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None), 
None, [ 'From: ferguson@cs.rochester.edu (George Ferguson)\n'
  "Subject: What's up in Rochester???\n"
  'Reply-To: ferguson@cs.rochester.edu (George Ferguson)\n'
  'Organization: University of Rochester Hockey Science Dept.\n'
  'Distribution: na\n'
  'Lines: 25\n'
  '\n'
  '\n'
  "Here we are with the Sabres up 2-0 to the Bruins and the Sabres' farm\n"
  'team, the Rochester Amerks, on their way to the Calder Cup (urp!) and\n'
  'what kind of hockey covera

________________________________________________fit_transform_one - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(norm='l2', smooth_idf=True, sublinear_tf=False, use_idf=True), None, <530x2319 sparse matrix of type '<class 'numpy.int64'>'
	with 38153 stored elements in Compressed Sparse Row format>, 
array([1, ..., 1], dtype=int64))
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(algorithm='randomized', n_components=10, n_iter=5,
       random_state=None, tol=0.0), 
None, <530x2319 sparse matrix of type '<class 'numpy.float64'>'
	with 38153 stored elements in Compressed Sparse Row format>, 
array([1, ..., 1], dtype=int64))
___________________________

________________________________________________fit_transform_one - 9.5s, 0.2min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(norm='l2', smooth_idf=True, sublinear_tf=False, use_idf=True), None, <531x3616 sparse matrix of type '<class 'numpy.int64'>'
	with 63451 stored elements in Compressed Sparse Row format>, 
array([0, ..., 1], dtype=int64))
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(algorithm='randomized', n_components=10, n_iter=5,
       random_state=None, tol=0.0), 
None, <531x3616 sparse matrix of type '<class 'numpy.float64'>'
	with 63451 stored elements in Compressed Sparse Row format>, 
array([0, ..., 1], dtype=int64))
___________________________

________________________________________________fit_transform_one - 7.8s, 0.1min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(norm='l2', smooth_idf=True, sublinear_tf=False, use_idf=True), None, <531x2237 sparse matrix of type '<class 'numpy.int64'>'
	with 56158 stored elements in Compressed Sparse Row format>, 
array([0, ..., 0], dtype=int64))
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(algorithm='randomized', n_components=10, n_iter=5,
       random_state=None, tol=0.0), 
None, <531x2237 sparse matrix of type '<class 'numpy.float64'>'
	with 56158 stored elements in Compressed Sparse Row format>, 
array([0, ..., 0], dtype=int64))
___________________________

________________________________________________fit_transform_one - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7860e71abcba7172486c1af962fe0aa9
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\4796c30779583812d601648c47209e42
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,
       random_state=None, tol=0.0), 
None, <531x3616 sparse matrix of type '<class 'numpy.float64'>'
	with 63451 stored elements in Compressed Sparse Row format>, 
a

________________________________________________fit_transform_one - 0.5s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\5549b38dd6b88109bebdf2528db86695
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\fe462dbac0712a0bb6bb08741a277388
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,
  n_components=10, random_state=None, shuffle=False, solver='cd',
  tol=0.0001, verbose=0), 
None, <530x2319 sparse matrix of type '<class 'numpy.float6

________________________________________________fit_transform_one - 0.3s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\5ff567dfe1cca325be338fe89ae7b073
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\feb000bc1f64cc6fed85d066263598c4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,
  n_components=10, random_state=None, shuffle=False, solver='cd',
  tol=0.0001, verbose=0), 
None, <531x2237 sparse matrix of type '<class 'numpy.float6

________________________________________________fit_transform_one - 3.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7860e71abcba7172486c1af962fe0aa9
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\4796c30779583812d601648c47209e42
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,
  n_components=50, random_state=None, shuffle=False, solver='cd',
  tol=0.0001, verbose=0), 
None, <531x3616 sparse matrix of type '<class 'numpy.float6

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\03d523934ede9d076eb2cd69f41c10ba
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\fcf20a8ace0687f0b70e81eb8dd099a4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\c1a32c97a527764ff1a7e5f7508e827c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\b52e600e343a88e8867077a7ced44903
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\5549b38dd6b88109bebdf2528db86695
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\fe462dbac0712a0bb6bb08741a277388
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\bcc0ec5a59297bb10696dfc5f7cb5296
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\03d523934ede9d076eb2cd69f41c10ba
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7f3376f0ff58277f896d0545d8440d9e
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\ee4b8c3405aabcbee8e31f16e611c703
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\e6ce2ff0ba69a80acaf5ed76980b7fd7
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\5549b38dd6b88109bebdf2528db86695
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\8266d64d3e81d577da1fe8657c85036d
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\abbe5ec70f8920037ec9fc4b87d0801c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\6fdbc8cc50b6d44533987e80568112de
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7f3376f0ff58277f896d0545d8440d9e
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\9e1aaa1f43ed3ee3d2dd6d27a12e5342
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\517e18070e423d4fe7f536dc22121529
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\108e8aff371e27d87736b2ec0042e98d
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\8266d64d3e81d577da1fe8657c85036d
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\5ff567dfe1cca325be338fe89ae7b073
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\feb000bc1f64cc6fed85d066263598c4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\a0726489c3a2d097a4d694915f67a6e0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\9e1aaa1f43ed3ee3d2dd6d27a12e5342
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\43bb13b9bf8912f7ae416cac77710fed
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\a403d75b538873e1d5edc3bbf2c15851
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\b17855d4948b5cb46eae621786cb1964
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\5ff567dfe1cca325be338fe89ae7b073
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\096c0afa7f240b78f81d960016347821
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\e6dcdbb734a3fd3a503e41d0dd3b9ecd
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\cb4621fee35492e16c454ca20430054a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\43bb13b9bf8912f7ae416cac77710fed
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\ad8c2540c51ae61facb8c71bbcd46d36
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\6949539987189486dc3097454e5e0a0b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\08472f96e8a607f1b0b2d1e982e797a0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\096c0afa7f240b78f81d960016347821
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7860e71abcba7172486c1af962fe0aa9
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\4796c30779583812d601648c47209e42
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7ec3885a1b75f48d66aaee4b75540181
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\ad8c2540c51ae61facb8c71bbcd46d36
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\66c9b470b74b5f45068cc5936670d37b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\a1937cc5cdcd56b3a304c7e06742b725
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\57d2e910a4b41120ebedcfa59b140b9d
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\7860e71abcba7172486c1af962fe0aa9
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\b52e600e343a88e8867077a7ced44903
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\36d058230f04d5523db98139ee09d429
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\9fa5254aab811397fc76b999d5ac58c6
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\66c9b470b74b5f45068cc5936670d37b
_____________

[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\03d523934ede9d076eb2cd69f41c10ba
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\fcf20a8ace0687f0b70e81eb8dd099a4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\3e0de9155d02b590d7e6d5ea6c9ecaf6
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]    0.0s, 0.0min: Loading _fit_transform_one from C:\Users\MADHU~1.KOL\AppData\Local\Temp\tmpygvedhna\joblib\sklearn\pipeline\_fit_transform_one\b52e600e343a88e8867077a7ced44903
_____________

________________________________________________fit_transform_one - 0.1s, 0.0min


In [103]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(grid.cv_results_) # With headers and footers





Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_clf,param_clf__C,param_reduce_dim,param_reduce_dim__n_components,param_vect__analyzer,param_vect__min_df,...,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,0.597575,0.069352,0.928392,0.937808,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",10,word,3,...,0.902256,0.928302,0.932075,0.956685,0.950943,0.928437,0.052436,0.014782,0.020052,0.013348
1,0.626285,0.082050,0.900754,0.917071,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",10,word,5,...,0.864662,0.894340,0.879245,0.924670,0.958491,0.932203,0.014957,0.019341,0.041220,0.016365
2,9.295043,4.570321,0.915829,0.928400,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",10,<function lem_rmv_digit at 0x000001DCD144B9D8>,3,...,0.913534,0.941509,0.905660,0.943503,0.928302,0.900188,0.393938,0.554896,0.009380,0.019965
3,8.286953,4.761483,0.896985,0.900769,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",10,<function lem_rmv_digit at 0x000001DCD144B9D8>,5,...,0.906015,0.924528,0.833962,0.851224,0.950943,0.926554,0.219834,0.993047,0.048154,0.035043
4,0.376099,0.082049,0.958543,0.995606,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",50,word,3,...,0.954887,1.000000,0.947170,0.988701,0.973585,0.998117,0.024337,0.032748,0.011084,0.004943
5,0.198426,0.063370,0.958543,0.997487,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",50,word,5,...,0.939850,0.996226,0.958491,0.998117,0.977358,0.998117,0.004180,0.003849,0.015318,0.000891
6,0.256382,4.112712,0.958543,0.994343,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",50,<function lem_rmv_digit at 0x000001DCD144B9D8>,3,...,0.947368,0.988679,0.954717,1.000000,0.973585,0.994350,0.013699,0.174841,0.011042,0.004622
7,0.217856,4.202346,0.958543,0.991837,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"TruncatedSVD(algorithm='randomized', n_components=50, n_iter=5,\n random_state=None, tol=0.0)",50,<function lem_rmv_digit at 0x000001DCD144B9D8>,5,...,0.962406,0.996226,0.962264,0.998117,0.950943,0.981168,0.009846,0.169242,0.005369,0.007584
8,0.492947,0.059679,0.878141,0.884443,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,\n n_components=50, random_state=None, shuffle=False, solver='cd',\n tol=0.0001, verbose=0)",10,word,3,...,0.902256,0.916981,0.822642,0.838041,0.909434,0.898305,0.155257,0.002135,0.039316,0.033685
9,0.428101,0.061174,0.891960,0.893230,"LinearSVC(C=1000, class_weight=None, dual=True, fit_intercept=True,\n intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n verbose=0)",1000,"NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,\n n_components=50, random_state=None, shuffle=False, solver='cd',\n tol=0.0001, verbose=0)",10,word,5,...,0.879699,0.915094,0.883019,0.892655,0.913208,0.871940,0.071626,0.002947,0.015071,0.017622
