In [1]:
import pandas as pd
from textblob import TextBlob
import string


In [2]:
df = pd.read_csv("Data/train.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31962 entries, 0 to 31961
Data columns (total 3 columns):
id       31962 non-null int64
label    31962 non-null int64
tweet    31962 non-null object
dtypes: int64(2), object(1)
memory usage: 749.2+ KB


In [4]:
df.head()

Unnamed: 0,id,label,tweet
0,1,0,@user when a father is dysfunctional and is s...
1,2,0,@user @user thanks for #lyft credit i can't us...
2,3,0,bihday your majesty
3,4,0,#model i love u take with u all the time in ...
4,5,0,factsguide: society now #motivation


In [5]:
import re 

def preprocess_word(word):
    # Remove punctuation
    word = word.strip('\'"?!,.():;')
    # Convert more than 2 letter repetitions to 2 letter
    # funnnnny --> funny
    word = re.sub(r'(.)\1+', r'\1\1', word)
    # Remove - & '
    word = re.sub(r'(-|\')', '', word)
    return word


def is_valid_word(word):
    # Check if word begins with an alphabet
    return (re.search(r'^[a-zA-Z][a-z0-9A-Z\._]*$', word) is not None)


def handle_emojis(tweet):
    # Smile -- :), : ), :-), (:, ( :, (-:, :')
    tweet = re.sub(r'(:\s?\)|:-\)|\(\s?:|\(-:|:\'\))', ' EMO_POS ', tweet)
    # Laugh -- :D, : D, :-D, xD, x-D, XD, X-D
    tweet = re.sub(r'(:\s?D|:-D|x-?D|X-?D)', ' EMO_POS ', tweet)
    # Love -- <3, :*
    tweet = re.sub(r'(<3|:\*)', ' EMO_POS ', tweet)
    # Wink -- ;-), ;), ;-D, ;D, (;,  (-;
    tweet = re.sub(r'(;-?\)|;-?D|\(-?;)', ' EMO_POS ', tweet)
    # Sad -- :-(, : (, :(, ):, )-:
    tweet = re.sub(r'(:\s?\(|:-\(|\)\s?:|\)-:)', ' EMO_NEG ', tweet)
    # Cry -- :,(, :'(, :"(
    tweet = re.sub(r'(:,\(|:\'\(|:"\()', ' EMO_NEG ', tweet)
    return tweet


def process_tweet(tweet):
    processed_tweet = []
    # Convert to lower case
    tweet = tweet.lower()
    # Replaces URLs with the word URL
    tweet = re.sub(r'((www\.[\S]+)|(https?://[\S]+))', ' URL ', tweet)
    # Replace @handle with the word USER_MENTION
    tweet = re.sub(r'@[\S]+', '', tweet)
    # Replaces #hashtag with hashtag
    tweet = re.sub(r'#(\S+)', r' \1 ', tweet)
    # Remove RT (retweet)
    tweet = re.sub(r'\brt\b', '', tweet)
    # Replace 2+ dots with space
    tweet = re.sub(r'\.{2,}', ' ', tweet)
    # Strip space, " and ' from tweet
    tweet = tweet.strip(' "\'')
    # Replace emojis with either EMO_POS or EMO_NEG
    tweet = handle_emojis(tweet)
    # Replace multiple spaces with a single space
    tweet = re.sub(r'\s+', ' ', tweet)
    words = tweet.split()

    for word in words:
        word = preprocess_word(word)
        if is_valid_word(word):
            processed_tweet.append(word)

    return ' '.join(processed_tweet)

In [6]:
df['processed_tweets'] = df['tweet'].apply(process_tweet)
df.drop('tweet',axis=1,inplace=True)

In [7]:
df['processed_tweets'].head()

0    when a father is dysfunctional and is so selfi...
1    thanks for lyft credit i cant use cause they d...
2                                  bihday your majesty
3           model i love u take with u all the time in
4                    factsguide society now motivation
Name: processed_tweets, dtype: object

In [8]:
df['char_count'] = df['processed_tweets'].apply(len)
df['word_count'] = df['processed_tweets'].apply(lambda x: len(x.split()))
df['word_density'] = df['char_count'] / (df['word_count']+1)

In [9]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [10]:
df.head()

Unnamed: 0,id,label,processed_tweets,char_count,word_count,word_density
0,1,0,when a father is dysfunctional and is so selfi...,91,17,5.055556
1,2,0,thanks for lyft credit i cant use cause they d...,101,17,5.611111
2,3,0,bihday your majesty,19,3,4.75
3,4,0,model i love u take with u all the time in,42,11,3.5
4,5,0,factsguide society now motivation,33,4,6.6


In [11]:
TextBlob(df['processed_tweets'][0]).tags

[('when', 'WRB'),
 ('a', 'DT'),
 ('father', 'NN'),
 ('is', 'VBZ'),
 ('dysfunctional', 'JJ'),
 ('and', 'CC'),
 ('is', 'VBZ'),
 ('so', 'RB'),
 ('selfish', 'JJ'),
 ('he', 'PRP'),
 ('drags', 'VBZ'),
 ('his', 'PRP$'),
 ('kids', 'NNS'),
 ('into', 'IN'),
 ('his', 'PRP$'),
 ('dysfunction', 'NN'),
 ('run', 'VB')]

In [12]:
pos_family = {
    'noun' : ['NN','NNS','NNP','NNPS'],
    'pron' : ['PRP','PRP$','WP','WP$'],
    'verb' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
    'adj' :  ['JJ','JJR','JJS'],
    'adv' : ['RB','RBR','RBS','WRB']
}

# function to check and get the part of speech tag count of a words in a given sentence
def check_pos_tag(x, flag):
    cnt = 0
    wiki = TextBlob(x)
    for tup in wiki.tags:
        ppo = list(tup)[1]
        if ppo in pos_family[flag]:
            cnt += 1

    return cnt

df['noun_count'] = df['processed_tweets'].apply(lambda x: check_pos_tag(x, 'noun'))
df['verb_count'] = df['processed_tweets'].apply(lambda x: check_pos_tag(x, 'verb'))
df['adj_count'] = df['processed_tweets'].apply(lambda x: check_pos_tag(x, 'adj'))
df['adv_count'] = df['processed_tweets'].apply(lambda x: check_pos_tag(x, 'adv'))
df['pron_count'] = df['processed_tweets'].apply(lambda x: check_pos_tag(x, 'pron'))

In [13]:
df.head()

Unnamed: 0,id,label,processed_tweets,char_count,word_count,word_density,noun_count,verb_count,adj_count,adv_count,pron_count
0,1,0,when a father is dysfunctional and is so selfi...,91,17,5.055556,3,4,2,2,3
1,2,0,thanks for lyft credit i cant use cause they d...,101,17,5.611111,7,5,2,0,1
2,3,0,bihday your majesty,19,3,4.75,1,0,0,1,1
3,4,0,model i love u take with u all the time in,42,11,3.5,4,1,2,0,0
4,5,0,factsguide society now motivation,33,4,6.6,2,0,1,1,0


In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[['char_count','word_count','word_density', 'noun_count','verb_count','adj_count','adv_count','pron_count']] = \
 scaler.fit_transform(df[['char_count','word_count','word_density', 'noun_count','verb_count','adj_count','adv_count','pron_count']])

In [15]:
df.head()

Unnamed: 0,id,label,processed_tweets,char_count,word_count,word_density,noun_count,verb_count,adj_count,adv_count,pron_count
0,1,0,when a father is dysfunctional and is so selfi...,0.65942,0.53125,0.324074,0.166667,0.363636,0.166667,0.222222,0.375
1,2,0,thanks for lyft credit i cant use cause they d...,0.731884,0.53125,0.359687,0.388889,0.454545,0.166667,0.0,0.125
2,3,0,bihday your majesty,0.137681,0.09375,0.304487,0.055556,0.0,0.0,0.111111,0.125
3,4,0,model i love u take with u all the time in,0.304348,0.34375,0.224359,0.222222,0.090909,0.166667,0.0,0.0
4,5,0,factsguide society now motivation,0.23913,0.125,0.423077,0.111111,0.0,0.083333,0.111111,0.0


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
y = df['label']
df.drop('id',axis=1,inplace=True)
df.drop('label',axis=1,inplace=True)
X = df
#X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)

In [17]:
X.head()

Unnamed: 0,processed_tweets,char_count,word_count,word_density,noun_count,verb_count,adj_count,adv_count,pron_count
0,when a father is dysfunctional and is so selfi...,0.65942,0.53125,0.324074,0.166667,0.363636,0.166667,0.222222,0.375
1,thanks for lyft credit i cant use cause they d...,0.731884,0.53125,0.359687,0.388889,0.454545,0.166667,0.0,0.125
2,bihday your majesty,0.137681,0.09375,0.304487,0.055556,0.0,0.0,0.111111,0.125
3,model i love u take with u all the time in,0.304348,0.34375,0.224359,0.222222,0.090909,0.166667,0.0,0.0
4,factsguide society now motivation,0.23913,0.125,0.423077,0.111111,0.0,0.083333,0.111111,0.0


In [42]:

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.feature_selection import SelectKBest, chi2, f_classif

count_vect = CountVectorizer(df['processed_tweets'], stop_words='english', ngram_range=(1,3), max_df=0.9, strip_accents="unicode")
tfidf = TfidfVectorizer(use_idf=True)
tfidf = tfidf.fit(df['processed_tweets'])
tfidf = tfidf.transform(X)
svc = LinearSVC()
type(tfidf)
svc.fit(X, tfidf)


# text_clf = Pipeline([('vect', CountVectorizer(stop_words='english')),
#                       ('tfidf', TfidfTransformer()),
#                      ('chi2', SelectKBest(chi2)),
#                      ('clf', LinearSVC(verbose=True, )),])

scipy.sparse.csr.csr_matrix

In [19]:
params = {"vect__ngram_range": [(1, 3)],
          "vect__max_df": [0.9],
          "vect__strip_accents": ["unicode"],
        "tfidf__use_idf": [True],
          "chi2__k" : [25000],
          "clf__C": [2.4],
          "clf__max_iter" : [6],
          "clf__loss" : ["hinge"],
          "clf__tol":[0.00001]
         }

# #Random Forest

# params = {"vect__ngram_range": [(1, 2)],
#           "vect__max_df": [0.1],
#           "vect__strip_accents": ["ascii"],
#         "tfidf__use_idf": [True],
#           "chi2__k" :[27000],
#          }

In [20]:
from sklearn.model_selection import GridSearchCV
text_clf = GridSearchCV(text_clf, params, verbose=2, n_jobs=-1)

In [30]:
y = y.to_frame()


AttributeError: 'DataFrame' object has no attribute 'to_frame'

In [36]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31962 entries, 0 to 31961
Data columns (total 9 columns):
processed_tweets    31962 non-null object
char_count          31962 non-null float64
word_count          31962 non-null float64
word_density        31962 non-null float64
noun_count          31962 non-null float64
verb_count          31962 non-null float64
adj_count           31962 non-null float64
adv_count           31962 non-null float64
pron_count          31962 non-null float64
dtypes: float64(8), object(1)
memory usage: 2.2+ MB


In [35]:
y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31962 entries, 0 to 31961
Data columns (total 1 columns):
label    31962 non-null int64
dtypes: int64(1)
memory usage: 249.8 KB


In [28]:
model = text_clf.fit(X,y)

Fitting 3 folds for each of 1 candidates, totalling 3 fits


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\ProgramData\Anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\ProgramData\Anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x000001C1AA674F60, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\ProgramData\Anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\ProgramD...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000001C1AA674F60, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\ProgramData\Anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\ProgramD...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
C:\ProgramData\Anaconda3\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
C:\ProgramData\Anaconda3\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(488, 1)>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
C:\ProgramData\Anaconda3\lib\asyncio\events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(488, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (488, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\tornado\platform\asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=488, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': 'model = text_clf.fit(X,y)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 9, 4, 21, 11, 236007, tzinfo=tzutc()), 'msg_id': '6bbabcabdc584bdd9bccfd746ee4321c', 'msg_type': 'execute_request', 'session': 'dcc93219c0274af88d1892c389ab5a27', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '6bbabcabdc584bdd9bccfd746ee4321c', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'dcc93219c0274af88d1892c389ab5a27']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': 'model = text_clf.fit(X,y)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 9, 4, 21, 11, 236007, tzinfo=tzutc()), 'msg_id': '6bbabcabdc584bdd9bccfd746ee4321c', 'msg_type': 'execute_request', 'session': 'dcc93219c0274af88d1892c389ab5a27', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '6bbabcabdc584bdd9bccfd746ee4321c', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'dcc93219c0274af88d1892c389ab5a27'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': 'model = text_clf.fit(X,y)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 9, 4, 21, 11, 236007, tzinfo=tzutc()), 'msg_id': '6bbabcabdc584bdd9bccfd746ee4321c', 'msg_type': 'execute_request', 'session': 'dcc93219c0274af88d1892c389ab5a27', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '6bbabcabdc584bdd9bccfd746ee4321c', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='model = text_clf.fit(X,y)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'model = text_clf.fit(X,y)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('model = text_clf.fit(X,y)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('model = text_clf.fit(X,y)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='model = text_clf.fit(X,y)', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'model = text_clf.fit(X,y)'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='model = text_clf.fit(X,y)', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>], cell_name='<ipython-input-28-2213270f5880>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 1c1b4d82fd0, executio...rue silent=False shell_futures=True> result=None>)
   2898 
   2899         try:
   2900             for i, node in enumerate(to_run_exec):
   2901                 mod = ast.Module([node])
   2902                 code = compiler(mod, cell_name, "exec")
-> 2903                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x000001C1B4D7ED20, file "<ipython-input-28-2213270f5880>", line 1>
        result = <ExecutionResult object at 1c1b4d82fd0, executio...rue silent=False shell_futures=True> result=None>
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x000001C1B4D7ED20, file "<ipython-input-28-2213270f5880>", line 1>, result=<ExecutionResult object at 1c1b4d82fd0, executio...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x000001C1B4D7ED20, file "<ipython-input-28-2213270f5880>", line 1>
        self.user_global_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'ExtraTreesClassifier': <class 'sklearn.ensemble.forest.ExtraTreesClassifier'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\nfrom textblob import TextBlob\nimport string', 'df = pd.read_csv("Data/train.csv")', 'df.info()', 'df.head()', "import re \n\ndef preprocess_word(word):\n    # Rem...ppend(word)\n\n    return ' '.join(processed_tweet)", "df['processed_tweets'] = df['tweet'].apply(process_tweet)\ndf.drop('tweet',axis=1,inplace=True)", "df['processed_tweets'].head()", "df['char_count'] = df['processed_tweets'].apply(...nsity'] = df['char_count'] / (df['word_count']+1)", 'string.punctuation', 'df.head()', "TextBlob(df['processed_tweets'][0]).tags", "pos_family = {\n    'noun' : ['NN','NNS','NNP','N...weets'].apply(lambda x: check_pos_tag(x, 'pron'))", 'df.head()', "from sklearn.preprocessing import MinMaxScaler\ns...rb_count','adj_count','adv_count','pron_count']])", 'df.head()', 'from sklearn.model_selection import train_test_s...st_split(X,y, test_size = 0.2, random_state = 42)', 'X.head()', "\nfrom sklearn.feature_extraction.text import Cou...            ('clf', LinearSVC(verbose=True, )),])", 'params = {"vect__ngram_range": [(1, 3)],\n       ...ue],\n#           "chi2__k" :[27000],\n#          }', ...], 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Out': {4:    id  label                                    ...           factsguide: society now    #motivation, 7: 0    when a father is dysfunctional and is so se... motivation
Name: processed_tweets, dtype: object, 9: '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~', 10:    id  label                                   p... 11      0.224359  
4           4      0.423077  , 11: [('when', 'WRB'), ('a', 'DT'), ('father', 'NN'), ('is', 'VBZ'), ('dysfunctional', 'JJ'), ('and', 'CC'), ('is', 'VBZ'), ('so', 'RB'), ('selfish', 'JJ'), ('he', 'PRP'), ('drags', 'VBZ'), ('his', 'PRP$'), ('kids', 'NNS'), ('into', 'IN'), ('his', 'PRP$'), ('dysfunction', 'NN'), ('run', 'VB')], 13:    id  label                                   p... 
2           1  
3           0  
4           0  , 15:    id  label                                   p... 
2       0.125  
3       0.000  
4       0.000  , 17:                                     processed_tw...1    0.000000   0.083333   0.111111       0.000  , 23: (31962,), 24: (31962, 9), ...}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, ...}
        self.user_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'ExtraTreesClassifier': <class 'sklearn.ensemble.forest.ExtraTreesClassifier'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\nfrom textblob import TextBlob\nimport string', 'df = pd.read_csv("Data/train.csv")', 'df.info()', 'df.head()', "import re \n\ndef preprocess_word(word):\n    # Rem...ppend(word)\n\n    return ' '.join(processed_tweet)", "df['processed_tweets'] = df['tweet'].apply(process_tweet)\ndf.drop('tweet',axis=1,inplace=True)", "df['processed_tweets'].head()", "df['char_count'] = df['processed_tweets'].apply(...nsity'] = df['char_count'] / (df['word_count']+1)", 'string.punctuation', 'df.head()', "TextBlob(df['processed_tweets'][0]).tags", "pos_family = {\n    'noun' : ['NN','NNS','NNP','N...weets'].apply(lambda x: check_pos_tag(x, 'pron'))", 'df.head()', "from sklearn.preprocessing import MinMaxScaler\ns...rb_count','adj_count','adv_count','pron_count']])", 'df.head()', 'from sklearn.model_selection import train_test_s...st_split(X,y, test_size = 0.2, random_state = 42)', 'X.head()', "\nfrom sklearn.feature_extraction.text import Cou...            ('clf', LinearSVC(verbose=True, )),])", 'params = {"vect__ngram_range": [(1, 3)],\n       ...ue],\n#           "chi2__k" :[27000],\n#          }', ...], 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Out': {4:    id  label                                    ...           factsguide: society now    #motivation, 7: 0    when a father is dysfunctional and is so se... motivation
Name: processed_tweets, dtype: object, 9: '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~', 10:    id  label                                   p... 11      0.224359  
4           4      0.423077  , 11: [('when', 'WRB'), ('a', 'DT'), ('father', 'NN'), ('is', 'VBZ'), ('dysfunctional', 'JJ'), ('and', 'CC'), ('is', 'VBZ'), ('so', 'RB'), ('selfish', 'JJ'), ('he', 'PRP'), ('drags', 'VBZ'), ('his', 'PRP$'), ('kids', 'NNS'), ('into', 'IN'), ('his', 'PRP$'), ('dysfunction', 'NN'), ('run', 'VB')], 13:    id  label                                   p... 
2           1  
3           0  
4           0  , 15:    id  label                                   p... 
2       0.125  
3       0.000  
4       0.000  , 17:                                     processed_tw...1    0.000000   0.083333   0.111111       0.000  , 23: (31962,), 24: (31962, 9), ...}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
D:\My Works\Twitter Sentiment Analysis\<ipython-input-28-2213270f5880> in <module>()
----> 1 model = text_clf.fit(X,y)

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=None, error_score='raise',
     ...ain_score='warn',
       scoring=None, verbose=2), X=                                        processe...0  
31961       0.250  

[31962 rows x 9 columns], y=       label
0          0
1          0
2        ...960      1
31961      0

[31962 rows x 1 columns], groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=3, random_state=None, shuffle=False)>
        X =                                         processe...0  
31961       0.250  

[31962 rows x 9 columns]
        y =        label
0          0
1          0
2        ...960      1
31961      0

[31962 rows x 1 columns]
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Sun Sep  9 09:51:15 2018
PID: 5448                 Python 3.6.5: C:\ProgramData\Anaconda3\python.exe
...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (Pipeline(memory=None,
     steps=[('vect', Count...', random_state=None, tol=1e-05, verbose=True))]),                                         processe...   0.000000       0.250

[31962 rows x 9 columns],        label
0          0
1          0
2        ...960      1
31961      0

[31962 rows x 1 columns], {'score': <function _passthrough_scorer>}, array([10655, 10656, 10657, ..., 31959, 31960, 31961]), array([    0,     1,     2, ..., 10652, 10653, 10654]), 2, {'chi2__k': 25000, 'clf__C': 2.4, 'clf__loss': 'hinge', 'clf__max_iter': 6, 'clf__tol': 1e-05, 'tfidf__use_idf': True, 'vect__max_df': 0.9, 'vect__ngram_range': (1, 3), 'vect__strip_accents': 'unicode'}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(memory=None,
     steps=[('vect', Count...', random_state=None, tol=1e-05, verbose=True))]),                                         processe...   0.000000       0.250

[31962 rows x 9 columns],        label
0          0
1          0
2        ...960      1
31961      0

[31962 rows x 1 columns], {'score': <function _passthrough_scorer>}, array([10655, 10656, 10657, ..., 31959, 31960, 31961]), array([    0,     1,     2, ..., 10652, 10653, 10654]), 2, {'chi2__k': 25000, 'clf__C': 2.4, 'clf__loss': 'hinge', 'clf__max_iter': 6, 'clf__tol': 1e-05, 'tfidf__use_idf': True, 'vect__max_df': 0.9, 'vect__ngram_range': (1, 3), 'vect__strip_accents': 'unicode'})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=Pipeline(memory=None,
     steps=[('vect', Count...', random_state=None, tol=1e-05, verbose=True))]), X=                                        processe...   0.000000       0.250

[31962 rows x 9 columns], y=       label
0          0
1          0
2        ...960      1
31961      0

[31962 rows x 1 columns], scorer={'score': <function _passthrough_scorer>}, train=array([10655, 10656, 10657, ..., 31959, 31960, 31961]), test=array([    0,     1,     2, ..., 10652, 10653, 10654]), verbose=2, parameters={'chi2__k': 25000, 'clf__C': 2.4, 'clf__loss': 'hinge', 'clf__max_iter': 6, 'clf__tol': 1e-05, 'tfidf__use_idf': True, 'vect__max_df': 0.9, 'vect__ngram_range': (1, 3), 'vect__strip_accents': 'unicode'}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    453 
    454     try:
    455         if y_train is None:
    456             estimator.fit(X_train, **fit_params)
    457         else:
--> 458             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method Pipeline.fit of Pipeline(memory=No..., random_state=None, tol=1e-05, verbose=True))])>
        X_train =                                         processe...   0.000000       0.250

[21307 rows x 9 columns]
        y_train =        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns]
        fit_params = {}
    459 
    460     except Exception as e:
    461         # Note fit time as time until error
    462         fit_time = time.time() - start_time

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self=Pipeline(memory=None,
     steps=[('vect', Count...', random_state=None, tol=1e-05, verbose=True))]), X=                                        processe...   0.000000       0.250

[21307 rows x 9 columns], y=       label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns], **fit_params={})
    243         Returns
    244         -------
    245         self : Pipeline
    246             This estimator
    247         """
--> 248         Xt, fit_params = self._fit(X, y, **fit_params)
        Xt = undefined
        fit_params = {}
        self._fit = <bound method Pipeline._fit of Pipeline(memory=N..., random_state=None, tol=1e-05, verbose=True))])>
        X =                                         processe...   0.000000       0.250

[21307 rows x 9 columns]
        y =        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns]
    249         if self._final_estimator is not None:
    250             self._final_estimator.fit(Xt, y, **fit_params)
    251         return self
    252 

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit(self=Pipeline(memory=None,
     steps=[('vect', Count...', random_state=None, tol=1e-05, verbose=True))]), X=                                        processe...   0.000000       0.250

[21307 rows x 9 columns], y=       label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns], **fit_params={})
    208                 else:
    209                     cloned_transformer = clone(transformer)
    210                 # Fit or load from cache the current transfomer
    211                 Xt, fitted_transformer = fit_transform_one_cached(
    212                     cloned_transformer, None, Xt, y,
--> 213                     **fit_params_steps[name])
        fit_params_steps = {'chi2': {}, 'clf': {}, 'tfidf': {}, 'vect': {}}
        name = 'chi2'
    214                 # Replace the transformer of the step with the fitted
    215                 # transformer. This is necessary when loading the transformer
    216                 # from the cache.
    217                 self.steps[step_idx] = (name, fitted_transformer)

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\memory.py in __call__(self=NotMemorizedFunc(func=<function _fit_transform_one at 0x0000018EC420F1E0>), *args=(SelectKBest(k=25000, score_func=<function chi2 at 0x0000018EC40462F0>), None, <9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>,        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns]), **kwargs={})
    357     # Should be a light as possible (for speed)
    358     def __init__(self, func):
    359         self.func = func
    360 
    361     def __call__(self, *args, **kwargs):
--> 362         return self.func(*args, **kwargs)
        self.func = <function _fit_transform_one>
        args = (SelectKBest(k=25000, score_func=<function chi2 at 0x0000018EC40462F0>), None, <9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>,        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns])
        kwargs = {}
    363 
    364     def call_and_shelve(self, *args, **kwargs):
    365         return NotMemorizedResult(self.func(*args, **kwargs))
    366 

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer=SelectKBest(k=25000, score_func=<function chi2 at 0x0000018EC40462F0>), weight=None, X=<9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>, y=       label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns], **fit_params={})
    576 
    577 
    578 def _fit_transform_one(transformer, weight, X, y,
    579                        **fit_params):
    580     if hasattr(transformer, 'fit_transform'):
--> 581         res = transformer.fit_transform(X, y, **fit_params)
        res = undefined
        transformer.fit_transform = <bound method TransformerMixin.fit_transform of ...core_func=<function chi2 at 0x0000018EC40462F0>)>
        X = <9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>
        y =        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns]
        fit_params = {}
    582     else:
    583         res = transformer.fit(X, y, **fit_params).transform(X)
    584     # if we have a weight for this transformer, multiply output
    585     if weight is None:

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self=SelectKBest(k=25000, score_func=<function chi2 at 0x0000018EC40462F0>), X=<9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>, y=       label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns], **fit_params={})
    515         if y is None:
    516             # fit method of arity 1 (unsupervised transformation)
    517             return self.fit(X, **fit_params).transform(X)
    518         else:
    519             # fit method of arity 2 (supervised transformation)
--> 520             return self.fit(X, y, **fit_params).transform(X)
        self.fit = <bound method _BaseFilter.fit of SelectKBest(k=2...core_func=<function chi2 at 0x0000018EC40462F0>)>
        X = <9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>
        y =        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns]
        fit_params.transform = undefined
    521 
    522 
    523 class DensityMixin(object):
    524     """Mixin class for all density estimators in scikit-learn."""

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\feature_selection\univariate_selection.py in fit(self=SelectKBest(k=25000, score_func=<function chi2 at 0x0000018EC40462F0>), X=<9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>, y=       label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns])
    336         Returns
    337         -------
    338         self : object
    339             Returns self.
    340         """
--> 341         X, y = check_X_y(X, y, ['csr', 'csc'], multi_output=True)
        X = <9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>
        y =        label
10655      0
10656      0
10657    ...960      1
31961      0

[21307 rows x 1 columns]
    342 
    343         if not callable(self.score_func):
    344             raise TypeError("The score function should be a callable, %s (%s) "
    345                             "was passed."

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X=<9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>, y=array([[0],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]], dtype=int64), accept_sparse=['csr', 'csc'], dtype='numeric', order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, multi_output=True, ensure_min_samples=1, ensure_min_features=1, y_numeric=False, warn_on_dtype=False, estimator=None)
    578         y = column_or_1d(y, warn=True)
    579         _assert_all_finite(y)
    580     if y_numeric and y.dtype.kind == 'O':
    581         y = y.astype(np.float64)
    582 
--> 583     check_consistent_length(X, y)
        X = <9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>
        y = array([[0],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]], dtype=int64)
    584 
    585     return X, y
    586 
    587 

...........................................................................
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays=(<9x9 sparse matrix of type '<class 'numpy.float6... stored elements in Compressed Sparse Row format>, array([[0],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]], dtype=int64)))
    199 
    200     lengths = [_num_samples(X) for X in arrays if X is not None]
    201     uniques = np.unique(lengths)
    202     if len(uniques) > 1:
    203         raise ValueError("Found input variables with inconsistent numbers of"
--> 204                          " samples: %r" % [int(l) for l in lengths])
        lengths = [9, 21307]
    205 
    206 
    207 def indexable(*iterables):
    208     """Make arrays indexable for cross-validation.

ValueError: Found input variables with inconsistent numbers of samples: [9, 21307]
___________________________________________________________________________

In [None]:
model.best_params_

In [None]:
predicted = model.predict(X_test)

In [None]:
from sklearn.metrics import f1_score

In [None]:
f1_score(y_test, predicted)

In [None]:
submission = pd.read_csv('Data/test.csv')
submission.info()

submission['processed_tweet'] = submission['tweet'].apply(process_tweet)
submission.head()

submission.drop('tweet', axis=1,inplace=True)
submission.head()

submission['char_count'] = submission['processed_tweets'].apply(len)
submission['word_count'] = submission['processed_tweets'].apply(lambda x: len(x.split()))
submission['word_density'] = submission['char_count'] / (submission['word_count']+1)
submission['noun_count'] = submission['processed_tweets'].apply(lambda x: check_pos_tag(x, 'noun'))
submission['verb_count'] = submission['processed_tweets'].apply(lambda x: check_pos_tag(x, 'verb'))
submission['adj_count'] = submission['processed_tweets'].apply(lambda x: check_pos_tag(x, 'adj'))
submission['adv_count'] = submission['processed_tweets'].apply(lambda x: check_pos_tag(x, 'adv'))
submission['pron_count'] = submission['processed_tweets'].apply(lambda x: check_pos_tag(x, 'pron'))

predicted = text_clf.predict(submission)

predicted

final_predict = pd.DataFrame(predicted,columns=['label'])
result = pd.DataFrame(submission['id'],columns=['id'])
result = pd.concat([result,final_predict],axis=1)
result.to_csv('final_predictions.csv',index=False)

result['label'].value_counts()