In [1]:
import os
import re
import pandas as pd
import numpy as np
import pickle
import joblib
from sqlalchemy import create_engine
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.multioutput import MultiOutputClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, make_scorer, classification_report
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import xgboost as xgb

nltk.download(['punkt', 'wordnet', 'averaged_perceptron_tagger'])

[nltk_data] Remote end closed connection without response


False

In [None]:
conn = create_engine('sqlite:///data/disaster_response.db')
df = pd.read_sql_table('df', conn)

In [None]:
# pd.read_sql_query('SELECT * FROM df;', conn)

In [None]:
df.head(1)

In [None]:
# X = df['message'][:5]
# y = df.drop(['id', 'message', 'original', 'genre_news', 'genre_social', 'genre'], axis = 1).iloc[:5,]

In [None]:
X = df['message']
y = df.drop(['id', 'message', 'original', 'genre_news', 'genre_social', 'genre'], axis = 1)

In [48]:
url_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'

def tokenize(text):
    detected_urls = re.findall(url_regex, text)
    for url in detected_urls:
        text = text.replace(url, 'url_placeholder')
        
    text = re.sub(r"[^a-zA-Z0-9]", " ", text.lower())
        
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()
    
    stopwords_ = stopwords.words("english")
    words = [word for word in tokens if word not in stopwords_]
    
    clean_tokens = []
    for token in tokens:
        clean_token = lemmatizer.lemmatize(token).lower().strip()
        clean_tokens.append(clean_token)
        
    return clean_tokens

def build_model_pipeline():
    pipeline = Pipeline([
        ('features', FeatureUnion([
                    ('text_pipeline', Pipeline([
                                ('vect', CountVectorizer(tokenizer=tokenize)),
                                ('tfidf', TfidfTransformer())
                            ])),
                    ('starting_verb', StartingVerbExtractor())
                        ])),
        ('clf', MultiOutputClassifier(xgb.XGBClassifier()))
                    ])
    return pipeline

class StartingVerbExtractor(BaseEstimator, TransformerMixin):
    
    def starting_verb(self, text):
        sentence_list = nltk.sent_tokenize(text)
        
        for sentence in sentence_list:
            pos_tags = nltk.pos_tag(tokenize(sentence))
            
            try:
                first_word, first_tag = pos_tags[0]
            except IndexError:
                pass
            else:
                if first_tag in ['VB', 'VBP']:
                    return True
        return False
    
    def fit(self, x, y=None):
        return self
    
    def transform(self, X):
        X_tagged = pd.Series(X).apply(self.starting_verb)
        return pd.DataFrame(X_tagged)
            

def eval_metrics(actual, predicted, column_names):
    """
    calculate evaluation metrics for model
    
    INPUT: 
    --actual : array of actual labels
    --predicted: array of predicted labels
    
    OUTPUT:
    --- Pandas Dataframe containing the accuracy, precision, recall 
        and f1 score for a given set of actual and predicted labels.
    """
    metrics = {'accuracy':[], 'precision':[], 'recall':[], 'f1':[]}
    for i in range(len(column_names)):
        accuracy = accuracy_score(actual.iloc[:,i], predicted[:,i])
        precision = precision_score(actual.iloc[:,i], predicted[:,i], average='weighted')
        recall = recall_score(actual.iloc[:,i], predicted[:,i], average='weighted')
        f1 = f1_score(actual.iloc[:,i], predicted[:,i], average='weighted', )
        
        metrics['accuracy'].append(accuracy)
        metrics['precision'].append(precision)
        metrics['recall'].append(recall)
        metrics['f1'].append(f1)
        print(classification_report(actual.iloc[:,i], predicted[:,i]))
        
    df = pd.DataFrame(metrics, index=column_names)
    return df

def gridsCV_scorer(actual, predicted):
    """
    calculate median median f1 score for all output classifiers
    INPUT: 
    --actual : array of actual labels
    --predicted: array of predicted labels
    
    OUTPUT: Median Output Score
    """
    f1 = []
    for i in range(actual.shape[1]):
        f1_scr = f1_score(actual.iloc[:,i], predicted[:,i], average='weighted')
        f1.append(f1_scr)
        return np.median(f1)

In [49]:
%%time
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=19)
pipeline = build_model_pipeline()
pipeline.fit(X_train, y_train)

[('unicef', 'JJ'), ('ha', 'NN'), ('offered', 'VBD'), ('20', 'CD'), ('squatting', 'VBG'), ('pan', 'NN'), ('to', 'TO'), ('the', 'DT'), ('mosque', 'NN'), ('and', 'CC'), ('a', 'DT'), ('temporary', 'JJ'), ('building', 'NN'), ('is', 'VBZ'), ('being', 'VBG'), ('constructed', 'VBN'), ('to', 'TO'), ('supplement', 'VB'), ('the', 'DT'), ('existing', 'VBG'), ('shelter', 'NN'), ('at', 'IN'), ('the', 'DT'), ('mosque', 'NN'), ('with', 'IN'), ('four', 'CD'), ('semi', 'NN'), ('permanent', 'JJ'), ('toilet', 'NN'), ('and', 'CC'), ('a', 'DT'), ('bathing', 'NN'), ('site', 'NN')]
[('what', 'WDT'), ('are', 'VBP'), ('the', 'DT'), ('authority', 'NN'), ('saying', 'VBG'), ('about', 'IN'), ('the', 'DT'), ('functioning', 'NN'), ('of', 'IN'), ('the', 'DT'), ('school', 'NN'), ('in', 'IN'), ('the', 'DT'), ('rest', 'NN'), ('of', 'IN'), ('the', 'DT'), ('country', 'NN')]
[('i', 'NNS'), ('have', 'VBP'), ('a', 'DT'), ('lot', 'NN'), ('of', 'IN'), ('problem', 'NN'), ('and', 'CC'), ('i', 'NN'), ('do', 'VBP'), ('not', 'RB'), 

[('he', 'PRP'), ('further', 'RB'), ('hoped', 'VBD'), ('that', 'IN'), ('the', 'DT'), ('recent', 'JJ'), ('increased', 'VBN'), ('participation', 'NN'), ('by', 'IN'), ('the', 'DT'), ('united', 'JJ'), ('state', 'NN'), ('in', 'IN'), ('climate', 'NN'), ('debate', 'NN'), ('indicated', 'VBD'), ('it', 'PRP'), ('intention', 'NN'), ('to', 'TO'), ('play', 'VB'), ('a', 'DT'), ('major', 'JJ'), ('role', 'NN')]
[('we', 'PRP'), ('need', 'VBP'), ('help', 'VB'), ('in', 'IN'), ('croix', 'NN'), ('de', 'FW'), ('bouquet', 'FW'), ('call', 'VB'), ('me', 'PRP'), ('for', 'IN'), ('more', 'JJR'), ('info', 'NN')]
[('poliomyelitis', 'NN'), ('according', 'VBG'), ('to', 'TO'), ('the', 'DT'), ('world', 'NN'), ('initiative', 'NN'), ('for', 'IN'), ('the', 'DT'), ('eradication', 'NN'), ('of', 'IN'), ('polio4', 'NN'), ('there', 'EX'), ('is', 'VBZ'), ('a', 'DT'), ('potential', 'JJ'), ('threat', 'NN'), ('of', 'IN'), ('a', 'DT'), ('polio', 'JJ'), ('epidemic', 'NN'), ('in', 'IN'), ('west', 'NN'), ('africa', 'NN')]
[('note', 'NN

[('and', 'CC'), ('the', 'DT'), ('overall', 'JJ'), ('number', 'NN'), ('of', 'IN'), ('death', 'NN'), ('attributed', 'VBN'), ('to', 'TO'), ('pro', 'VB'), ('government', 'NN'), ('force', 'NN'), ('which', 'WDT'), ('include', 'VBP'), ('the', 'DT'), ('ana', 'NN'), ('afghan', 'JJ'), ('national', 'JJ'), ('army', 'NN'), ('anp', 'NN'), ('afghan', 'IN'), ('national', 'JJ'), ('police', 'NNS'), ('nd', 'JJ'), ('national', 'JJ'), ('directorate', 'NN'), ('of', 'IN'), ('security', 'NN'), ('and', 'CC'), ('international', 'JJ'), ('military', 'JJ'), ('force', 'NN'), ('marginally', 'RB'), ('exceeds', 'VBZ'), ('that', 'IN'), ('caused', 'VBN'), ('by', 'IN'), ('antigovernment', 'JJ'), ('force', 'NN')]
[('would', 'MD'), ('like', 'VB'), ('to', 'TO'), ('know', 'VB'), ('if', 'IN'), ('school', 'NN'), ('will', 'MD'), ('resume', 'VB'), ('again', 'RB'), ('or', 'CC'), ('not', 'RB')]
[('help', 'VB'), ('me', 'PRP'), ('because', 'IN'), ('i', 'NN'), ('am', 'VBP'), ('alone', 'RB'), ('during', 'IN'), ('the', 'DT'), ('earthqu

[('can', 'MD'), ('you', 'PRP'), ('help', 'VB'), ('me', 'PRP')]
[('good', 'JJ'), ('evening', 'NN'), ('when', 'WRB'), ('you', 'PRP'), ('active', 'JJ'), ('50', 'CD'), ('g', 'NN'), ('sm', 'NN'), ('is', 'VBZ'), ('it', 'PRP'), ('for', 'IN'), ('international', 'JJ'), ('thank', 'NN'), ('you', 'PRP')]
[('the', 'DT'), ('route', 'JJ'), ('north', 'NN'), ('and', 'CC'), ('south', 'NN'), ('can', 'MD'), ('handle', 'VB'), ('two', 'CD'), ('lane', 'NN'), ('traffic', 'NN'), ('on', 'IN'), ('most', 'JJS'), ('of', 'IN'), ('the', 'DT'), ('coastal', 'JJ'), ('road', 'NN'), ('from', 'IN'), ('e', '$'), ('04', 'CD'), ('b043', 'NN'), ('53', 'CD'), ('8', 'CD'), ('n', 'JJ'), ('95', 'CD'), ('b030', 'NN'), ('27', 'CD'), ('4', 'CD'), ('to', 'TO'), ('e', 'VB'), ('04', 'CD'), ('b032', 'NN'), ('32', 'CD'), ('4', 'CD'), ('n', 'JJ'), ('95', 'CD'), ('b043', 'NN'), ('02', 'CD'), ('0', 'CD'), ('with', 'IN'), ('area', 'NN'), ('that', 'WDT'), ('are', 'VBP'), ('reduced', 'VBN'), ('to', 'TO'), ('single', 'JJ'), ('lane', 'NN'), ('tr

[('no', 'DT'), ('gps', 'NN'), ('coordinate', 'NN'), ('available', 'JJ'), ('from', 'IN'), ('here', 'RB')]
[('rt', 'NN'), ('cnn', 'NN'), ('find', 'VB'), ('out', 'RP'), ('how', 'WRB'), ('you', 'PRP'), ('can', 'MD'), ('help', 'VB'), ('make', 'VB'), ('a', 'DT'), ('difference', 'NN'), ('to', 'TO'), ('haiti', 'VB'), ('u2019s', 'JJ'), ('earthquake', 'NN'), ('victim', 'NN'), ('please', 'NN'), ('visit', 'NN'), ('cnn', 'NN'), ('com', 'NN'), ('impact', 'NN')]
[('the', 'DT'), ('team', 'NN'), ('previously', 'RB'), ('tackled', 'VBD'), ('two', 'CD'), ('other', 'JJ'), ('cholera', 'NN'), ('epidemic', 'NN'), ('in', 'IN'), ('kamanyola', 'NN'), ('and', 'CC'), ('uvira', 'VB'), ('a', 'DT'), ('malaria', 'NN'), ('emergency', 'NN'), ('in', 'IN'), ('lulingo', 'NN'), ('and', 'CC'), ('two', 'CD'), ('measles', 'NNS'), ('emergency', 'NN'), ('one', 'CD'), ('in', 'IN'), ('kitutu', 'NN'), ('and', 'CC'), ('another', 'DT'), ('in', 'IN'), ('bunyakiri', 'NN'), ('which', 'WDT'), ('included', 'VBD'), ('a', 'DT'), ('vaccinati

[('all', 'DT'), ('our', 'PRP$'), ('heart', 'NN'), ('go', 'VB'), ('out', 'IN'), ('to', 'TO'), ('everyone', 'NN'), ('in', 'IN'), ('santiago', 'NN')]
[('rt', 'NN'), ('myfoxdc', 'NN'), ('myfoxdc', 'NN'), ('photo', 'NN'), ('haiti', 'JJ'), ('earthquake', 'NN'), ('aftermath', 'IN'), ('a', 'DT'), ('major', 'JJ'), ('earthquake', 'NN'), ('left', 'VBD'), ('thousand', 'CD'), ('of', 'IN'), ('people', 'NNS'), ('in', 'IN'), ('and', 'CC'), ('around', 'IN'), ('nport', 'JJ'), ('o', 'NN'), ('http', 'NN'), ('tiny', 'JJ')]
[('this', 'DT'), ('is', 'VBZ'), ('the', 'DT'), ('founder', 'NN'), ('of', 'IN'), ('college', 'NN'), ('jerusalem', 'NN'), ('in', 'IN'), ('la', 'NN'), ('gonave', 'VBP')]
[('we', 'PRP'), ('are', 'VBP'), ('use', 'IN'), ('the', 'DT'), ('school', 'NN'), ('a', 'DT'), ('a', 'DT'), ('shelter', 'NN'), ('for', 'IN'), ('people', 'NNS'), ('to', 'TO'), ('sleep', 'VB'), ('in', 'IN')]
[('these', 'DT'), ('people', 'NNS'), ('need', 'VBP'), ('aid', 'NN'), ('too', 'RB')]
[('he', 'PRP'), ('then', 'RB'), ('exp

[('hi', 'NN'), ('i', 'VBZ'), ('great', 'JJ'), ('you', 'PRP'), ('for', 'IN'), ('all', 'PDT'), ('the', 'DT'), ('good', 'JJ'), ('work', 'NN'), ('that', 'IN'), ('you', 'PRP'), ('have', 'VBP'), ('done', 'VBN'), ('but', 'CC'), ('i', 'RB'), ('would', 'MD'), ('like', 'VB'), ('to', 'TO'), ('know', 'VB'), ('when', 'WRB'), ('st', 'JJ'), ('gerard', 'NN'), ('university', 'NN'), ('will', 'MD'), ('re', 'VB'), ('opening', 'JJ'), ('please', 'NN')]
[('to', 'TO'), ('me', 'PRP'), ('it', 'PRP'), ('sound', 'VBD'), ('a', 'DT'), ('long', 'JJ'), ('time', 'NN'), ('to', 'TO'), ('go', 'VB'), ('to', 'TO'), ('a', 'DT'), ('nearby', 'JJ'), ('pond', 'NN'), ('water', 'NN'), ('pump', 'NN'), ('who', 'WP'), ('is', 'VBZ'), ('seeking', 'VBG'), ('assistance', 'NN'), ('in', 'IN'), ('sourcing', 'VBG'), ('large', 'JJ'), ('size', 'NN'), ('pump', 'NN'), ('to', 'TO'), ('drain', 'VB'), ('stagnant', 'JJ'), ('water', 'NN'), ('in', 'IN'), ('various', 'JJ'), ('area', 'NN')]
[('please', 'VB'), ('help', 'VB'), ('me', 'PRP'), ('to', 'TO')

[('battery', 'NN'), ('tunnel', 'NN'), ('still', 'RB'), ('closed', 'VBD'), ('holland', 'NN'), ('tunnel', 'NN'), ('open', 'JJ'), ('to', 'TO'), ('bus', 'VB'), ('only', 'RB')]
[('sandy', 'JJ'), ('hurricanesandy', 'NN'), ('nyc', 'JJ'), ('brooklyn', 'NN'), ('newjersey', 'NN')]
[('for', 'IN'), ('live', 'JJ'), ('tweeting', 'NN'), ('from', 'IN'), ('haiti', 'NN'), ('follow', 'VBP'), ('fredodupoux', 'NN'), ('carelpedre', 'NN'), ('internethaiti', 'NN'), ('photomorel', 'NN'), ('haiti', 'NN'), ('eq', 'JJ'), ('haitiquake', 'NN'), ('earthquake', 'NN')]
[('r', 'NN'), ('u', 'JJ'), ('angry', 'JJ'), ('to', 'TO'), ('me', 'PRP'), ('daddy', 'VB')]
[('i', 'NN'), ('call', 'VBP'), ('you', 'PRP'), ('you', 'PRP'), ('don', 'VBP'), ('t', 'JJ'), ('take', 'VBP'), ('the', 'DT'), ('telephone', 'NN')]
[('my', 'PRP$'), ('family', 'NN'), ('in', 'IN'), ('ti', 'NN'), ('goave', 'NN'), ('and', 'CC'), ('my', 'PRP$'), ('mother', 'NN'), ('who', 'WP'), ('is', 'VBZ'), ('there', 'RB'), ('need', 'VBN'), ('food', 'NN'), ('and', 'CC')

[('is', 'VBZ'), ('looking', 'VBG'), ('to', 'TO'), ('communicate', 'VB'), ('with', 'IN'), ('cauquenes', 'NNS'), ('maule', 'JJ'), ('region', 'NN'), ('chile', 'NN'), ('after', 'IN'), ('the', 'DT'), ('earthquake', 'NN')]
[('are', 'VBP'), ('the', 'DT'), ('people', 'NNS'), ('who', 'WP'), ('left', 'VBD'), ('to', 'TO'), ('the', 'DT'), ('country', 'NN'), ('side', 'NN'), ('will', 'MD'), ('not', 'RB'), ('be', 'VB'), ('able', 'JJ'), ('to', 'TO'), ('return', 'VB')]
[('at', 'IN'), ('enough', 'JJ'), ('one', 'CD'), ('way', 'NN'), ('we', 'PRP'), ('have', 'VBP'), ('suggested', 'VBN'), ('overcoming', 'VBG'), ('this', 'DT'), ('divide', 'NN'), ('is', 'VBZ'), ('by', 'IN'), ('engaging', 'VBG'), ('in', 'IN'), ('robust', 'JJ'), ('public', 'JJ'), ('opinion', 'NN'), ('polling', 'NN'), ('to', 'TO'), ('allow', 'VB'), ('these', 'DT'), ('community', 'NN'), ('to', 'TO'), ('weigh', 'VB'), ('in', 'IN'), ('on', 'IN'), ('key', 'JJ'), ('issue', 'NN'), ('related', 'VBN'), ('to', 'TO'), ('the', 'DT'), ('transition', 'NN')]


[('we', 'PRP'), ('have', 'VBP'), ('no', 'DT'), ('food', 'NN'), ('clothes', 'NNS'), ('money', 'NN')]
[('what', 'WP'), ('will', 'MD'), ('we', 'PRP'), ('do', 'VB')]
[('there', 'EX'), ('are', 'VBP'), ('people', 'NNS'), ('with', 'IN'), ('typhoid', 'NN'), ('who', 'WP'), ('are', 'VBP'), ('becoming', 'VBG'), ('very', 'RB'), ('sick', 'JJ')]
[('please', 'VB'), ('help', 'NN'), ('u', 'VB')]
[('twitter', 'NN'), ('carry', 'VBP'), ('day', 'NN'), ('once', 'RB'), ('again', 'RB'), ('on', 'IN'), ('haiti', 'NNS'), ('earthquake', 'NN'), ('a', 'DT'), ('we', 'PRP'), ('ve', 'VBP'), ('seen', 'VBN'), ('in', 'IN'), ('another', 'DT'), ('quick', 'JJ'), ('breaking', 'NN'), ('story', 'NN'), ('and', 'CC'), ('trage', 'NN'), ('http', 'JJ'), ('bit', 'NN'), ('ly', 'JJ'), ('4ya2rq', 'CD')]
[('the', 'DT'), ('namibian', 'JJ'), ('government', 'NN'), ('said', 'VBD'), ('it', 'PRP'), ('objected', 'VBD'), ('to', 'TO'), ('the', 'DT'), ('summit', 'NN'), ('taking', 'VBG'), ('place', 'NN'), ('in', 'IN'), ('lome', 'NN'), ('because', 

[('ap', 'RB'), ('1', 'CD'), ('9', 'CD'), ('10', 'CD'), ('gal', 'JJ'), ('4', 'CD'), ('10', 'CD'), ('11', 'CD'), ('col', 'NN'), ('2', 'CD'), ('14', 'CD'), ('18', 'CD')]
[('we', 'PRP'), ('are', 'VBP'), ('afraid', 'JJ'), ('of', 'IN'), ('a', 'DT'), ('huge', 'JJ'), ('mudslide', 'NN'), ('triggered', 'VBN'), ('by', 'IN'), ('heavy', 'JJ'), ('rain', 'NN')]
[('according', 'VBG'), ('to', 'TO'), ('statistical', 'JJ'), ('rainfall', 'NN'), ('record', 'NN'), ('the', 'DT'), ('city', 'NN'), ('of', 'IN'), ('jiangyou', 'NN'), ('had', 'VBD'), ('129', 'CD'), ('4', 'CD'), ('mm', 'NN'), ('and', 'CC'), ('in', 'IN'), ('some', 'DT'), ('district', 'NN'), ('they', 'PRP'), ('attained', 'VBD'), ('338', 'CD'), ('7', 'CD'), ('mm', 'NN')]
[('farmer', 'NN'), ('drought', 'NN'), ('reduction', 'NN'), ('in', 'IN'), ('rain', 'NN'), ('fed', 'NN'), ('and', 'CC'), ('karez', 'NN'), ('irrigated', 'VBN'), ('harvest', 'NN')]
[('powerful', 'JJ'), ('wave', 'NN'), ('also', 'RB'), ('damaged', 'VBD'), ('3', 'CD'), ('km', 'NN'), ('of', '

[('unique', 'JJ'), ('hospital', 'NN'), ('centre', 'NN'), ('is', 'VBZ'), ('affording', 'VBG'), ('too', 'RB'), ('much', 'JJ'), ('people', 'NNS'), ('already', 'RB')]
[]

Index Error

[('while', 'IN'), ('the', 'DT'), ('rebel', 'NN'), ('in', 'IN'), ('particular', 'JJ'), ('made', 'VBN'), ('headline', 'NN'), ('worldwide', 'NN'), ('with', 'IN'), ('decapitation', 'NN'), ('of', 'IN'), ('buddhist', 'NN'), ('around', 'IN'), ('60', 'CD'), ('percent', 'NN'), ('of', 'IN'), ('the', 'DT'), ('people', 'NNS'), ('violently', 'RB'), ('put', 'VBD'), ('to', 'TO'), ('death', 'NN'), ('in', 'IN'), ('patani', 'NN'), ('are', 'VBP'), ('muslim', 'JJ')]
[('help', 'NN'), ('delmas', 'VB'), ('4', 'CD'), ('st', 'NN'), ('martin', 'NNS'), ('area', 'NN')]
[('spent', 'NN'), ('an', 'DT'), ('entire', 'JJ'), ('day', 'NN'), ('w', 'VBD'), ('o', 'JJ'), ('tv', 'NN'), ('or', 'CC'), ('a', 'DT'), ('computer', 'NN'), ('and', 'CC'), ('it', 'PRP'), ('wa', 'VBZ'), ('nice', 'JJ'), ('sandy', 'NN')]
[('most', 'JJS'), ('of', 'IN'), ('the', '

[('good', 'JJ'), ('morning', 'NN'), ('4636', 'CD')]
[('we', 'PRP'), ('are', 'VBP'), ('really', 'RB'), ('hungry', 'JJ'), ('we', 'PRP'), ('are', 'VBP'), ('dying', 'VBG'), ('here', 'RB')]
[('we', 'PRP'), ('do', 'VBP'), ('not', 'RB'), ('have', 'VB'), ('anything', 'NN')]
[('please', 'VB'), ('help', 'NN'), ('u', 'VB')]
[('hi', 'NN'), ('we', 'PRP'), ('live', 'VBP'), ('in', 'IN'), ('carrefour', 'NN'), ('feuilles', 'NNS')]
[('we', 'PRP'), ('need', 'VBP'), ('aid', 'JJ'), ('tent', 'NN'), ('food', 'NN')]
[('good', 'JJ'), ('evening', 'NN')]
[('please', 'NN'), ('we', 'PRP'), ('need', 'VBP'), ('help', 'VB'), ('in', 'IN'), ('delma', 'JJ'), ('16', 'CD'), ('number', 'NN'), ('27', 'CD')]
[('we', 'PRP'), ('don', 'VBP'), ('t', 'NNS'), ('have', 'VBP'), ('water', 'NN'), ('nor', 'CC'), ('food', 'NN')]
[('we', 'PRP'), ('don', 'VBP'), ('t', 'NNS'), ('have', 'VBP'), ('tent', 'NN')]
[('thank', 'NN'), ('you', 'PRP'), ('for', 'IN'), ('your', 'PRP$'), ('understanding', 'NN')]
[('just', 'RB'), ('getting', 'VBG'), ('h

[('lower', 'JJR'), ('cotton', 'NN'), ('production', 'NN'), ('will', 'MD'), ('also', 'RB'), ('impact', 'VB'), ('on', 'IN'), ('the', 'DT'), ('availability', 'NN'), ('of', 'IN'), ('protein', 'JJ'), ('additive', 'NN'), ('for', 'IN'), ('animal', 'JJ'), ('feed', 'NN'), ('and', 'CC'), ('the', 'DT'), ('availability', 'NN'), ('of', 'IN'), ('vegetable', 'JJ'), ('oil', 'NN'), ('for', 'IN'), ('human', 'JJ'), ('consumption', 'NN')]
[('under', 'IN'), ('the', 'DT'), ('approved', 'JJ'), ('plan', 'NN'), ('reconstruction', 'NN'), ('in', 'IN'), ('the', 'DT'), ('quake', 'NN'), ('zone', 'NN'), ('will', 'MD'), ('be', 'VB'), ('mainly', 'RB'), ('funded', 'VBN'), ('by', 'IN'), ('the', 'DT'), ('central', 'JJ'), ('government', 'NN')]
[('to', 'TO'), ('whom', 'WP'), ('should', 'MD'), ('ask', 'VB'), ('for', 'IN'), ('help', 'NN')]
[('i', 'NN'), ('am', 'VBP'), ('waiting', 'VBG'), ('for', 'IN'), ('your', 'PRP$'), ('response', 'NN')]
[('the', 'DT'), ('programme', 'NN'), ('doe', 'VBZ'), ('not', 'RB'), ('deliver', 'JJ'),

[('their', 'PRP$'), ('is', 'VBZ'), ('someone', 'NN'), ('that', 'WDT'), ('left', 'VBD'), ('their', 'PRP$'), ('home', 'NN'), ('and', 'CC'), ('never', 'RB'), ('remembered', 'VBN'), ('to', 'TO'), ('call', 'VB'), ('their', 'PRP$'), ('family', 'NN'), ('they', 'PRP'), ('will', 'MD'), ('never', 'RB'), ('know', 'VB'), ('what', 'WP'), ('happen', 'VB')]
[('can', 'MD'), ('of', 'IN'), ('olive', 'JJ'), ('can', 'MD'), ('of', 'IN'), ('chickpea', 'JJ'), ('bag', 'NN'), ('of', 'IN'), ('pasta', 'JJ'), ('bag', 'NN'), ('of', 'IN'), ('hard', 'JJ'), ('pretzel', 'NN'), ('also', 'RB'), ('have', 'VBP'), ('sneaker', 'NN'), ('i', 'NN'), ('can', 'MD'), ('donate', 'VB')]
[('sandyhelp', 'NN'), ('please', 'VB'), ('help', 'NN')]
[('sandy', 'JJ'), ('litterly', 'RB'), ('broke', 'VBD'), ('atlantic', 'JJ'), ('city', 'NN')]
[('ventnor', 'NN'), ('is', 'VBZ'), ('like', 'IN'), ('a', 'DT'), ('ghost', 'NN'), ('town', 'NN')]
[('this', 'DT'), ('hurt', 'NN'), ('like', 'IN'), ('hell', 'NN'), ('i', 'VB'), ('my', 'PRP$'), ('heart', 'N

[('sweet', 'JJ'), ('potato', 'NN'), ('starch', 'NN'), ('is', 'VBZ'), ('suitable', 'JJ'), ('for', 'IN'), ('production', 'NN'), ('of', 'IN'), ('noodle', 'JJ')]
[('ifad', 'NN'), ('funded', 'VBD'), ('cereal', 'NN'), ('bank', 'NN'), ('in', 'IN'), ('niger', 'NN'), ('were', 'VBD'), ('traditionally', 'RB'), ('used', 'VBN'), ('to', 'TO'), ('store', 'VB'), ('crop', 'NN'), ('immediately', 'RB'), ('after', 'IN'), ('harvest', 'NN'), ('so', 'RB'), ('farmer', 'RB'), ('could', 'MD'), ('sell', 'VB'), ('them', 'PRP'), ('during', 'IN'), ('the', 'DT'), ('dry', 'JJ'), ('season', 'NN'), ('when', 'WRB'), ('market', 'NN'), ('price', 'NN'), ('tend', 'NN'), ('to', 'TO'), ('rise', 'VB')]
[('out', 'IN'), ('of', 'IN'), ('fear', 'NN'), ('that', 'IN'), ('local', 'JJ'), ('water', 'NN'), ('source', 'NN'), ('could', 'MD'), ('be', 'VB'), ('contaminated', 'VBN'), ('unicef', 'JJ'), ('wa', 'NN'), ('collaborating', 'VBG'), ('with', 'IN'), ('the', 'DT'), ('somali', 'NN'), ('red', 'JJ'), ('crescent', 'NN'), ('society', 'NN'),

[('the', 'DT'), ('drc', 'NN'), ('s', 'JJ'), ('eastern', 'JJ'), ('province', 'NN'), ('of', 'IN'), ('north', 'JJ'), ('and', 'CC'), ('south', 'JJ'), ('kivu', 'NNS'), ('have', 'VBP'), ('witnessed', 'VBN'), ('increased', 'VBN'), ('fighting', 'VBG'), ('between', 'IN'), ('government', 'NN'), ('troop', 'NN'), ('and', 'CC'), ('the', 'DT'), ('so', 'RB'), ('called', 'VBN'), ('march', 'NN'), ('23', 'CD'), ('movement', 'NN'), ('m23', 'NN'), ('which', 'WDT'), ('is', 'VBZ'), ('composed', 'VBN'), ('of', 'IN'), ('renegade', 'NN'), ('soldier', 'NN'), ('who', 'WP'), ('mutinied', 'VBD'), ('in', 'IN'), ('april', 'NN')]
[('saed', 'NN'), ('khan', 'NN'), ('rakh', 'NN'), ('azmt', 'NN'), ('wala', 'JJ'), ('tahsel', 'NN'), ('jampy', 'NN'), ('ds', 'NN'), ('rajanpur', 'NN'), ('punjab', 'NN')]
[('haig', 'NN'), ('that', 'WDT'), ('suck', 'VBD'), ('dude', 'NN')]
[('at', 'IN'), ('least', 'JJS'), ('the', 'DT'), ('49ers', 'NNS'), ('are', 'VBP'), ('dominating', 'VBG')]
[('sandyso', 'JJ'), ('glad', 'NN'), ('we', 'PRP'), ('h

[('i', 'JJ'), ('wrote', 'VBD'), ('you', 'PRP'), ('and', 'CC'), ('got', 'VBD'), ('no', 'DT'), ('answer', 'NN')]
[('will', 'MD'), ('the', 'DT'), ('21st', 'CD'), ('of', 'IN'), ('december', 'NN'), ('be', 'VB'), ('the', 'DT'), ('end', 'NN'), ('of', 'IN'), ('the', 'DT'), ('world', 'NN')]
[('flood', 'NN'), ('victim', 'NN'), ('in', 'IN'), ('assam', 'JJ'), ('sought', 'JJ'), ('refuge', 'NN'), ('atop', 'NN'), ('mud', 'NN'), ('and', 'CC'), ('straw', 'JJ'), ('hut', 'NN'), ('and', 'CC'), ('clambered', 'VBD'), ('onto', 'IN'), ('mud', 'NN'), ('embankment', 'NN'), ('and', 'CC'), ('high', 'JJ'), ('lying', 'NN'), ('ground', 'NN')]
[('through', 'IN'), ('the', 'DT'), ('oti', 'NN'), ('funded', 'VBD'), ('debris', 'JJ'), ('clearing', 'NN'), ('grant', 'NN'), ('muslim', 'NN'), ('and', 'CC'), ('tamil', 'NN'), ('youth', 'NN'), ('within', 'IN'), ('the', 'DT'), ('mutur', 'JJ'), ('peace', 'NN'), ('committee', 'NN'), ('mobilized', 'VBD'), ('other', 'JJ'), ('young', 'JJ'), ('sri', 'NNS'), ('lankans', 'VBZ'), ('includi

[('i', 'NN'), ('wan', 'VBP'), ('na', 'TO'), ('know', 'VB'), ('all', 'DT'), ('information', 'NN'), ('about', 'IN'), ('the', 'DT'), ('country', 'NN')]
[('one', 'CD'), ('hundred', 'CD'), ('percent', 'NN'), ('of', 'IN'), ('your', 'PRP$'), ('gift', 'NN'), ('go', 'VBP'), ('to', 'TO'), ('relief', 'NN'), ('and', 'CC'), ('recovery', 'NN'), ('effort', 'NN')]
[('in', 'IN'), ('jacmel', 'PDT'), ('the', 'DT'), ('aid', 'NN'), ('is', 'VBZ'), ('poorly', 'RB'), ('organized', 'VBN')]
[('they', 'PRP'), ('only', 'RB'), ('go', 'VBP'), ('to', 'TO'), ('one', 'CD'), ('place', 'NN'), ('park', 'NN'), ('pinchinat', 'NN'), ('they', 'PRP'), ('don', 'VBP'), ('t', 'JJ'), ('come', 'VBN'), ('by', 'IN'), ('to', 'TO'), ('see', 'VB'), ('the', 'DT'), ('rest', 'NN'), ('of', 'IN'), ('the', 'DT'), ('people', 'NNS'), ('who', 'WP'), ('are', 'VBP'), ('left', 'VBN')]
[('it', 'PRP'), ('s', 'VBZ'), ('only', 'RB'), ('the', 'DT'), ('rasta', 'NN'), ('and', 'CC'), ('the', 'DT'), ('strongman', 'NN'), ('who', 'WP'), ('get', 'VBP'), ('aid

[('hello', 'NN')]
[('i', 'JJ'), ('live', 'VBP'), ('in', 'IN'), ('leogane', 'NN')]
[('my', 'PRP$'), ('house', 'NN'), ('is', 'VBZ'), ('destroyed', 'VBN'), ('and', 'CC'), ('my', 'PRP$'), ('family', 'NN'), ('and', 'CC'), ('i', 'NN'), ('are', 'VBP'), ('sleeping', 'VBG'), ('in', 'IN'), ('the', 'DT'), ('street', 'NN')]
[('we', 'PRP'), ('are', 'VBP'), ('hungry', 'JJ'), ('and', 'CC'), ('haven', 'JJ'), ('t', 'NN'), ('found', 'VBD'), ('food', 'NN'), ('to', 'TO'), ('eat', 'VB')]
[('help', 'VB'), ('me', 'PRP'), ('please', 'VB')]
[('this', 'DT'), ('week', 'NN'), ('the', 'DT'), ('government', 'NN'), ('launched', 'VBD'), ('a', 'DT'), ('100', 'CD'), ('million', 'CD'), ('dollar', 'NN'), ('climate', 'NN'), ('resilience', 'NN'), ('programme', 'NN'), ('that', 'WDT'), ('will', 'MD'), ('spend', 'VB'), ('the', 'DT'), ('bulk', 'NN'), ('of', 'IN'), ('it', 'PRP'), ('fund', 'VBP'), ('around', 'RB'), ('90', 'CD'), ('million', 'CD'), ('dollar', 'NN'), ('on', 'IN'), ('infrastructure', 'NN'), ('upgrade', 'NN')]
[('st

[('the', 'DT'), ('wounded', 'VBN'), ('in', 'IN'), ('the', 'DT'), ('saint', 'NN'), ('michely', 'RB'), ('hospital', 'JJ'), ('have', 'VBP'), ('no', 'DT'), ('medicine', 'NN')]
[('this', 'DT'), ('look', 'NN'), ('like', 'IN'), ('part', 'NN'), ('of', 'IN'), ('a', 'DT'), ('message', 'NN'), ('that', 'WDT'), ('got', 'VBD'), ('cut', 'VBN'), ('off', 'RP')]
[('in', 'IN'), ('several', 'JJ'), ('country', 'NN'), ('these', 'DT'), ('national', 'JJ'), ('forest', 'NN'), ('programme', 'NN'), ('have', 'VBP'), ('been', 'VBN'), ('implemented', 'VBN'), ('with', 'IN'), ('the', 'DT'), ('support', 'NN'), ('of', 'IN'), ('the', 'DT'), ('international', 'JJ'), ('donor', 'NN'), ('community', 'NN'), ('undp', 'JJ'), ('global', 'JJ'), ('environment', 'NN'), ('facility', 'NN'), ('gef', 'NN'), ('mobilizing', 'VBG'), ('financial', 'JJ'), ('national', 'JJ'), ('and', 'CC'), ('external', 'JJ'), ('resource', 'NN'), ('for', 'IN'), ('programme', 'NN'), ('and', 'CC'), ('project', 'NN'), ('relevant', 'NN'), ('to', 'TO'), ('the', '

[('rickywussup', 'JJ'), ('fuckit', 'NN'), ('who', 'WP'), ('tryna', 'VBZ'), ('party', 'NN'), ('tonight', 'NN'), ('sippin', 'NN'), ('on', 'IN'), ('hurricane', 'NN'), ('through', 'IN'), ('out', 'IN'), ('the', 'DT'), ('hurricane', 'NN')]
[('rt', 'NN'), ('protagonistas', 'NNS'), ('impresionante', 'VBP'), ('fotograf', 'JJ'), ('u00eda', 'JJ'), ('del', 'NN'), ('palacio', 'NN'), ('presidencial', 'JJ'), ('en', 'NN'), ('hait', 'NN'), ('u00ed', 'JJ'), ('ante', 'NN'), ('y', 'NN'), ('despu', 'NN'), ('u00e9s', 'JJ'), ('http', 'NN'), ('twitpic', 'NN'), ('com', 'NN'), ('xvzzt', 'NNP'), ('haiti', 'VBD'), ('earthquake', 'NN')]
[('info', 'NN'), ('for', 'IN'), ('the', 'DT'), ('possibility', 'NN'), ('of', 'IN'), ('employment', 'NN'), ('for', 'IN'), ('a', 'DT'), ('civil', 'JJ'), ('engineer', 'NN')]
[('today', 'NN'), ('there', 'EX'), ('is', 'VBZ'), ('a', 'DT'), ('lot', 'NN'), ('of', 'IN'), ('civil', 'JJ'), ('unrest', 'NN')]
[('we', 'PRP'), ('need', 'VBP'), ('police', 'NNS'), ('assistance', 'VB')]
[('i', 'NNS'

[('what', 'WP'), ('should', 'MD'), ('we', 'PRP'), ('do', 'VB'), ('when', 'WRB'), ('people', 'NNS'), ('are', 'VBP'), ('talking', 'VBG'), ('back', 'RB'), ('to', 'TO'), ('u', 'VB'), ('thank', 'NN'), ('you', 'PRP')]
[('according', 'VBG'), ('to', 'TO'), ('the', 'DT'), ('agreement', 'NN'), ('the', 'DT'), ('grant', 'NN'), ('will', 'MD'), ('be', 'VB'), ('used', 'VBN'), ('to', 'TO'), ('purchase', 'VB'), ('fertilizer', 'NN'), ('that', 'WDT'), ('aimed', 'VBD'), ('at', 'IN'), ('improving', 'VBG'), ('the', 'DT'), ('food', 'NN'), ('security', 'NN'), ('for', 'IN'), ('underprivileged', 'JJ'), ('farmer', 'NN'), ('in', 'IN'), ('the', 'DT'), ('country', 'NN')]
[('my', 'PRP$'), ('love', 'NN'), ('i', 'NN'), ('can', 'MD'), ('t', 'VB'), ('live', 'VB'), ('without', 'IN'), ('you', 'PRP'), ('but', 'CC'), ('ala', 'VB')]
[('tell', 'VB'), ('me', 'PRP'), ('why', 'WRB'), ('you', 'PRP'), ('are', 'VBP'), ('angry', 'JJ'), ('and', 'CC'), ('you', 'PRP'), ('are', 'VBP'), ('humiliating', 'VBG'), ('me', 'PRP')]
[('i', 'NN')

UnboundLocalError: local variable 'first_tag' referenced before assignment

In [None]:
predictions = pipeline.predict(X_train)
eval_metrics(y_train, predictions, y.columns.values)

In [None]:
predictions = pipeline.predict(X_test)
eval_metrics(y_test, predictions, y.columns.values)

In [None]:
param_grid = {'features__text_pipeline__vect__ngram_range': ((1, 1), (1, 2)),
#               'features__text_pipeline__vect__max_df': (0.75, 1.0),
#               'clf__estimator__n_estimators': [100, 250, 500]
             }
model = build_model_pipeline()
scorer = make_scorer(gridsCV_scorer)
grid_model = GridSearchCV(model, param_grid=param_grid, scoring=scorer, verbose=20)
grid_model.fit(X_train, y_train)

In [None]:
best = grid_model.best_params_
best

In [None]:
predictions = grid_model.predict(X_test)
eval_m= eval_metrics(y_test, predictions, y.columns.values)

In [None]:
eval_m.describe()

In [None]:
joblib.dump(grid_model, 'DisasterResponse.pkl')

In [None]:
# # load the model from disk
# loaded_model = load('DisasterResponse.pkl')
# result = loaded_model.score(X_test, y_test)
# print(result)

In [None]:
grid_model