In [9]:
import sys,pickle
from nltk.stem import WordNetLemmatizer
import nltk
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
# nltk.download('wordnet')
# nltk.download('punkt')
# nltk.download('stopwords')
nltk.download('omw-1.4')
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from nltk.corpus import stopwords
from sqlalchemy import create_engine
import pandas as pd
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Donia\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [46]:
def load_data(database_filepath):
    engine = create_engine(f'sqlite:///'+database_filepath)
    df = pd.read_sql_table("last", engine)
    X = df['message']
    y = df.drop(['message','original',"id",'genre'],axis =1)
    category_names = y.columns
    return X,y,category_names

In [47]:
def tokenize(text):
    words = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()
    lem = []
    # stop_words = set(stopwords.words('english'))
    # words = [w for w in words if w not in stop_words]
    for w in words:
        word = lemmatizer.lemmatize(w).lower().strip() 
        lem.append(word)
    return lem


In [48]:

def build_model():
    pipeline = Pipeline([('vect', CountVectorizer(tokenizer=tokenize)),
                ('tfidf', TfidfTransformer()),
                   ('clf', MultiOutputClassifier(KNeighborsClassifier()))])
    
    parameters = {'clf__estimator__n_neighbors':[50,100]
             }
    cv =  GridSearchCV(estimator = pipeline,param_grid = parameters)
    return cv

def evaluate_model(model, X_test, Y_test, category_names):
    y_pred = model.predict(X_test)
    print('Accuracy Score: {}'.format(np.mean(Y_test.values == y_pred)))

def save_model(model, model_filepath):
    pickle.dump(model, open(model_filepath, 'wb'))


In [49]:
X, Y, category_names = load_data('data/dis.db')

In [50]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)


In [51]:

X_train

15594    Weathermen forecast another heavy rainstorm wo...
1181     Good evening. Is it possible for an aftershake...
9597     Code Black! This is Quil. Among the new admini...
8997     Give me information on the fault of the Cap-Ha...
11832    RT ikePortnoyDT: Well, the Santiago show is on...
                               ...                        
23417    Some of the most critical needs for families i...
10700    any kind of cleanup , food distribution , cook...
17537    Livestock have drowned in "inland seas" up 40 ...
16340    On top of those drowned or killed by mudslides...
9318     Is there a fair in the department of South-east? 
Name: message, Length: 20972, dtype: object

In [52]:
print('Building model...')
model = build_model()  
print('Training model...')
model.fit(X_train, Y_train)

Building model...
Training model...


GridSearchCV(estimator=Pipeline(steps=[('vect',
                                        CountVectorizer(tokenizer=<function tokenize at 0x0000020E811E4430>)),
                                       ('tfidf', TfidfTransformer()),
                                       ('clf',
                                        MultiOutputClassifier(estimator=KNeighborsClassifier()))]),
             param_grid={'clf__estimator__n_neighbors': [50, 100]})

In [18]:
Y_train

Unnamed: 0,genre,related,request,offer,aid_related,medical_help,medical_products,search_and_rescue,security,military,...,aid_centers,other_infrastructure,weather_related,floods,storm,fire,earthquake,cold,other_weather,direct_report
10487,social,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
12251,direct,1,1,0,1,0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,1
8928,direct,1,1,0,1,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,1
3290,direct,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4661,direct,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12319,direct,1,0,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
10964,social,1,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,1
315,direct,1,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
21770,news,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [53]:
print('Evaluating model...')
evaluate_model(model, X_test, Y_test, category_names)


Evaluating model...
Accuracy Score: 0.9394175353843546


In [54]:
# print('Saving model...\n  =================  MODEL: {}'.format(model_filepath))
save_model(model,'models/mod.pkl')

In [55]:
model = pickle.load(open('models/model.pkl','rb'))


In [17]:
    classification_labels = model.predict(["we are more than 50 people sleeping on street,food,tent"])[0]


In [18]:
classification_labels

array(['direct', '1', '1', '0', '1', '0', '0', '0', '0', '0', '0', '0',
       '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'],
      dtype=object)

In [19]:
engine = create_engine('sqlite:///data//DisasterResponse.db')
df = pd.read_sql_table('new', engine)
classification_results = dict(zip(df.columns[4:], classification_labels))


In [20]:
classification_results

{'related': 'direct',
 'request': '1',
 'offer': '1',
 'aid_related': '0',
 'medical_help': '1',
 'medical_products': '0',
 'search_and_rescue': '0',
 'security': '0',
 'military': '0',
 'child_alone': '0',
 'water': '0',
 'food': '0',
 'shelter': '0',
 'clothing': '1',
 'money': '0',
 'missing_people': '0',
 'refugees': '0',
 'death': '0',
 'other_aid': '0',
 'infrastructure_related': '0',
 'transport': '0',
 'buildings': '0',
 'electricity': '0',
 'tools': '0',
 'hospitals': '0',
 'shops': '0',
 'aid_centers': '0',
 'other_infrastructure': '0',
 'weather_related': '0',
 'floods': '0',
 'storm': '0',
 'fire': '0',
 'earthquake': '0',
 'cold': '0',
 'other_weather': '0',
 'direct_report': '0'}

In [28]:
for category, classification in classification_results.items():
    if classification =='1':
        print (classification)

1
1
1
1
