This notebook is aimed to find better parameters for the evalutaion model.
For details on the construction and  decision making process take a look at the ML-Pipeline notebook.


Importing the libraries needed and the dataframes

In [1]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
import sqlite3
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
import statistics




def load_data():
    '''loading the messages database'''
    
    #opening the connect and reading the database
    conn = sqlite3.connect('Messages.db')
    df = pd.read_sql('SELECT * FROM Messages', conn)
    df = df.drop(columns=['index'])
    
    #storing the database into X,y
    X = df['message'].values#first scenario will ignore the genre feature
    y= df[df.columns.difference(['message','genre_news','genre_social'])]
    
    #closing connection
    conn.close()
    
    return X,y;



X, y = load_data()

In [2]:
stop_words = stopwords.words("english")
lemmatizer = WordNetLemmatizer()

def tokenize(text):
    # normalize case, remove punctuation and numbers
    text = re.sub(r"[^a-zA-Z]", " ", text.lower())
    
    # tokenize text
    tokens = word_tokenize(text)
    
    # lemmatize and remove stop words
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    
    #lemmatize verbs
    tokens = [lemmatizer.lemmatize(word, pos='v') for word in tokens]
    
    #lemmatize adjectives
    tokens = [lemmatizer.lemmatize(word, pos='a') for word in tokens]
    
    #lemmatize adverbs
    tokens = [lemmatizer.lemmatize(word, pos='r') for word in tokens]
    
    

    return tokens

In [3]:
def model_pipeline():
    '''Pipeline for a model with the default parameters'''
    
    pipeline = Pipeline([
        ('vect',CountVectorizer(tokenizer=tokenize)),
        ('tfidf',TfidfTransformer()),
        ('clf', MultiOutputClassifier(estimator=RandomForestClassifier()))
    ])

    # specify parameters for grid search
    parameters = {
            #'vect__ngram_range': ((1, 1), (1, 2)),
            #'vect__max_df': (0.5, 0.75, 1.0),
            #'vect__max_features': (None, 5000, 10000),
            #'tfidf__use_idf': (True, False),
            'clf__estimator__n_estimators': [100],
            #'clf__estimator__max_depth': [220],
            'clf__estimator__random_state': [42]
        
    }

    

    # create grid search object
    cv = GridSearchCV(pipeline, param_grid=parameters,verbose=1,n_jobs=3)
    
    return cv

In [4]:
random_state=42
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=random_state)

model = model_pipeline()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   5 out of   5 | elapsed: 12.0min finished


In [38]:
def model_scores(y_test,y_pred):
    '''Calculates the area under the ROC curve,f1_score, accuracy, precision and recall scores for every label
    and displays the mean, maximum and minimum values of each score.'''
    
    auc = []
    for i in range (0,y_test.shape[1]):
        auc.append(roc_auc_score(y_test.iloc[:,i],y_pred[:,i]))
    print('Mean AUC:',"%.2f" % statistics.mean(auc),' Max AUC:', "%.2f" % max(auc),' Min AUC:', "%.2f" % min (auc))
    
    
    f1_score_model = []
    for i in range (0,y_test.shape[1]):
        f1_score_column = f1_score(y_test.iloc[:,i],y_pred[:,i])
        f1_score_model.append(f1_score_column)
    print('Mean f1 score:',"%.2f" % statistics.mean(f1_score_model),' Max f1 score:',"%.2f" % max(f1_score_model),' Min f1 score:',"%.2f" % min (f1_score_model))
    
    
    precision_score_model = []
    for i in range (0,y_test.shape[1]):
        precision_score_column = precision_score(y_test.iloc[:,i],y_pred[:,i])
        precision_score_model.append(precision_score_column)
    print('Mean precision score:',"%.2f" % statistics.mean(precision_score_model),' Max precision score:',"%.2f" % max(precision_score_model),' Min precision score:',"%.2f" % min (precision_score_model))
    
    accuracy_score_model = []
    for i in range (0,y_test.shape[1]):
        accuracy_score_column = accuracy_score(y_test.iloc[:,i],y_pred[:,i])
        accuracy_score_model.append(accuracy_score_column)
    print('Mean accuracy score:',"%.2f" % statistics.mean(accuracy_score_model),' Max accuracy score:',"%.2f" % max(accuracy_score_model),' Min accuracy score:',"%.2f" % min (accuracy_score_model))
    
    recall_score_model = []
    for i in range (0,y_test.shape[1]):
        recall_score_column = recall_score(y_test.iloc[:,i],y_pred[:,i])
        recall_score_model.append(recall_score_column)
    print('Mean recall score:',"%.2f" % statistics.mean(recall_score_model),' Max recall score:',"%.2f" % max(recall_score_model),' Min recall score:',"%.2f" % min (recall_score_model))
    
    

def AUC_ROC(y_test,y_pred):
    '''Calculates the area under the ROC curve for every label and displays the value.
    '''
    
    auc = []
    for i in range (0,y_test.shape[1]):
        auc_score_column = roc_auc_score(y_test.iloc[:,i],y_pred[:,i])
        auc.append(auc_score_column)
        print('The AUC for',y.columns[i],' was: ',"%.2f" % auc_score_column,'.')
    
    
    #return auc; remove the comment if you want to return the list



def f1_score_labels(y_test,y_pred):
    '''Calculates the f1 score for every label and displays the value.
    '''
    
    f1_score_model = []
    for i in range (0,y_test.shape[1]):
        f1_score_column = f1_score(y_test.iloc[:,i],y_pred[:,i])
        f1_score_model.append(f1_score_column)
        print('The f1 score for',y.columns[i],' was: ',"%.2f" % f1_score_column,'.')
    
    #return f1_score_model; remove the comment if you want to return the list



def precision_score_labels(y_test,y_pred):
    '''Calculates the precision score for every label and displays the value.
    '''
    
    precision_score_model = []
    for i in range (0,y_test.shape[1]):
        precision_score_column = precision_score(y_test.iloc[:,i],y_pred[:,i])
        precision_score_model.append(precision_score_column)
        print('The precision score for',y.columns[i],' was: ',"%.2f" % precision_score_column,'.')
    
    #return precision_score_model; remove the comment if you want to return the list


def accuracy_score_labels (y_test,y_pred):
    '''Calculates the accuracy score for every label and displays the value.
    '''
    
    accuracy_score_model = []
    for i in range (0,y_test.shape[1]):
        accuracy_score_column = accuracy_score(y_test.iloc[:,i],y_pred[:,i])
        accuracy_score_model.append(accuracy_score_column)
        print('The accuracy score for',y.columns[i],' was: ',"%.2f" % accuracy_score_column,'.')
    
    #return accuracy_score_model; remove the comment if you want to return the list
    


def recall_score_labels (y_test,y_pred):
    '''Calculates the accuracy score for every label and displays the value.
    '''

    recall_score_model = []
    for i in range (0,y_test.shape[1]):
        recall_score_column = recall_score(y_test.iloc[:,i],y_pred[:,i])
        recall_score_model.append(recall_score_column)
        print('The recall score for',y.columns[i],' was: ',"%.2f" % recall_score_column,'.')
    
    #return recall_score_model; remove the comment if you want to return the list



#if you want the score values for just one label run the following functions    
def AUC_ROC_column(y_test,y_pred, column):
    '''Calculates the area under the ROC curve for the column(label) and displays the value.
    '''
    index = y_test.columns.get_loc(column)
    auc_score_column = roc_auc_score(y_test.iloc[:,index],y_pred[:,index])
    print('The AUC for',column,'was: ',"%.2f" % auc_score_column,'.')
    
    
    #return auc_score_column; remove the comment if you want to return the value
    
def f1_score_column(y_test,y_pred,column):
    '''Calculates the f1 score for the column(label) and displays the value.
    '''
    
    index = y_test.columns.get_loc(column)
    f1_score_column = f1_score(y_test.iloc[:,index],y_pred[:,index])
    print('The f1 score for',column,'was: ',"%.2f" % f1_score_column,'.')
    
    #return f1_score_column; remove the comment if you want to return the value



def precision_score_column(y_test,y_pred,column):
    '''Calculates the precision score for the column(label) and displays the value.
    '''
    
    index = y_test.columns.get_loc(column)
    precision_score_column = precision_score(y_test.iloc[:,index],y_pred[:,index])  
    print('The precision score for',column,'was: ',"%.2f" % precision_score_column,'.')
    
    #return precision_score_column; remove the comment if you want to return the value


def accuracy_score_column (y_test,y_pred,column):
    '''Calculates the accuracy score for the column(label) and displays the value.
    '''
    
    index = y_test.columns.get_loc(column)
    accuracy_score_column = accuracy_score(y_test.iloc[:,index],y_pred[:,index])
    print('The accuracy score for',column,'was: ',"%.2f" % accuracy_score_column,'.')
    
    #return accuracy_score_column; remove the comment if you want to return the value
    


def recall_score_column (y_test,y_pred,column):
    '''Calculates the accuracy score for the column(label) and displays the value.
    '''

    index = y_test.columns.get_loc(column)
    recall_score_column = recall_score(y_test.iloc[:,index],y_pred[:,index])
    print('The recall score for',column,'was: ',"%.2f" % recall_score_column,'.')
    
    #return recall_score_column; remove the comment if you want to return the value


In [6]:
AUC_ROC(y_test,y_pred)

The AUC for aid_centers  was:  0.50 .
The AUC for aid_related  was:  0.73 .
The AUC for buildings  was:  0.57 .
The AUC for clothing  was:  0.59 .
The AUC for cold  was:  0.58 .
The AUC for death  was:  0.64 .
The AUC for direct_report  was:  0.70 .
The AUC for earthquake  was:  0.90 .
The AUC for electricity  was:  0.52 .
The AUC for fire  was:  0.51 .
The AUC for floods  was:  0.76 .
The AUC for food  was:  0.85 .
The AUC for hospitals  was:  0.51 .
The AUC for infrastructure_related  was:  0.50 .
The AUC for medical_help  was:  0.57 .
The AUC for medical_products  was:  0.55 .
The AUC for military  was:  0.58 .
The AUC for missing_people  was:  0.51 .
The AUC for money  was:  0.53 .
The AUC for offer  was:  0.50 .
The AUC for other_aid  was:  0.52 .
The AUC for other_infrastructure  was:  0.50 .
The AUC for other_weather  was:  0.54 .
The AUC for refugees  was:  0.55 .
The AUC for request  was:  0.78 .
The AUC for search_and_rescue  was:  0.52 .
The AUC for security  was:  0.50 .
Th

In [7]:
f1_score_labels(y_test,y_pred)

The f1 score for aid_centers  was:  0.00 .
The f1 score for aid_related  was:  0.76 .
The f1 score for buildings  was:  0.25 .
The f1 score for clothing  was:  0.30 .
The f1 score for cold  was:  0.27 .
The f1 score for death  was:  0.42 .
The f1 score for direct_report  was:  0.56 .
The f1 score for earthquake  was:  0.85 .
The f1 score for electricity  was:  0.08 .
The f1 score for fire  was:  0.03 .
The f1 score for floods  was:  0.66 .
The f1 score for food  was:  0.77 .
The f1 score for hospitals  was:  0.04 .
The f1 score for infrastructure_related  was:  0.01 .
The f1 score for medical_help  was:  0.24 .
The f1 score for medical_products  was:  0.18 .
The f1 score for military  was:  0.26 .
The f1 score for missing_people  was:  0.03 .
The f1 score for money  was:  0.11 .
The f1 score for offer  was:  0.00 .
The f1 score for other_aid  was:  0.09 .
The f1 score for other_infrastructure  was:  0.00 .
The f1 score for other_weather  was:  0.14 .
The f1 score for refugees  was:  0.

In [8]:
precision_score_labels(y_test,y_pred)

The precision score for aid_centers  was:  0.00 .
The precision score for aid_related  was:  0.72 .
The precision score for buildings  was:  0.76 .
The precision score for clothing  was:  0.76 .
The precision score for cold  was:  0.89 .
The precision score for death  was:  0.80 .
The precision score for direct_report  was:  0.72 .
The precision score for earthquake  was:  0.90 .
The precision score for electricity  was:  0.83 .
The precision score for fire  was:  1.00 .
The precision score for floods  was:  0.90 .
The precision score for food  was:  0.84 .
The precision score for hospitals  was:  1.00 .
The precision score for infrastructure_related  was:  0.11 .
The precision score for medical_help  was:  0.65 .
The precision score for medical_products  was:  0.82 .
The precision score for military  was:  0.71 .
The precision score for missing_people  was:  1.00 .
The precision score for money  was:  0.67 .
The precision score for offer  was:  0.00 .
The precision score for other_aid

  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
accuracy_score_labels (y_test,y_pred)

The accuracy score for aid_centers  was:  0.98 .
The accuracy score for aid_related  was:  0.73 .
The accuracy score for buildings  was:  0.94 .
The accuracy score for clothing  was:  0.98 .
The accuracy score for cold  was:  0.98 .
The accuracy score for death  was:  0.96 .
The accuracy score for direct_report  was:  0.82 .
The accuracy score for earthquake  was:  0.96 .
The accuracy score for electricity  was:  0.97 .
The accuracy score for fire  was:  0.99 .
The accuracy score for floods  was:  0.95 .
The accuracy score for food  was:  0.94 .
The accuracy score for hospitals  was:  0.99 .
The accuracy score for infrastructure_related  was:  0.92 .
The accuracy score for medical_help  was:  0.90 .
The accuracy score for medical_products  was:  0.94 .
The accuracy score for military  was:  0.97 .
The accuracy score for missing_people  was:  0.98 .
The accuracy score for money  was:  0.98 .
The accuracy score for offer  was:  0.99 .
The accuracy score for other_aid  was:  0.83 .
The ac

In [10]:
recall_score_labels (y_test,y_pred)

The recall score for aid_centers  was:  0.00 .
The recall score for aid_related  was:  0.80 .
The recall score for buildings  was:  0.15 .
The recall score for clothing  was:  0.19 .
The recall score for cold  was:  0.16 .
The recall score for death  was:  0.28 .
The recall score for direct_report  was:  0.46 .
The recall score for earthquake  was:  0.81 .
The recall score for electricity  was:  0.04 .
The recall score for fire  was:  0.02 .
The recall score for floods  was:  0.52 .
The recall score for food  was:  0.72 .
The recall score for hospitals  was:  0.02 .
The recall score for infrastructure_related  was:  0.00 .
The recall score for medical_help  was:  0.15 .
The recall score for medical_products  was:  0.10 .
The recall score for military  was:  0.16 .
The recall score for missing_people  was:  0.02 .
The recall score for money  was:  0.06 .
The recall score for offer  was:  0.00 .
The recall score for other_aid  was:  0.05 .
The recall score for other_infrastructure  was: 

In [16]:
model_scores(y_test,y_pred)

Mean AUC: 0.61  Max AUC: 0.90  Min AUC: 0.50
Mean f1 score: 0.29  Max f1 score: 0.85  Min f1 score: 0.00
Mean precision score: 0.64  Max precision score: 1.00  Min precision score: 0.00
Mean accuracy score: 0.94  Max accuracy score: 0.99  Min accuracy score: 0.73
Mean recall score: 0.24  Max recall score: 0.81  Min recall score: 0.00


  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
AUC_ROC_column(y_test,y_pred,'aid_centers')

The AUC for aid_centers  was:  0.50 .


In [36]:
f1_score_column(y_test,y_pred,'aid_centers')

The f1 score for aid_centers was:  0.00 .


In [39]:
precision_score_column(y_test,y_pred,'aid_centers')

The precision score for aid_centers was:  0.00 .


In [40]:
accuracy_score_column(y_test,y_pred,'aid_centers')

The accuracy score for aid_centers was:  0.98 .


In [41]:
recall_score_column(y_test,y_pred,'aid_centers')

The recall score for aid_centers was:  0.00 .
