This notebook is aimed to find better parameters for the evalutaion model.
For details on the construction and  decision making process take a look at the ML-Pipeline notebook.


Importing the libraries needed and the dataframes

In [1]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
import sqlite3
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
import statistics




def load_data():
    '''loading the messages database'''
    
    #opening the connect and reading the database
    conn = sqlite3.connect('Messages.db')
    df = pd.read_sql('SELECT * FROM Messages', conn)
    df = df.drop(columns=['index'])
    
    #storing the database into X,y
    X = df['message'].values#first scenario will ignore the genre feature
    y= df[df.columns.difference(['message','genre_news','genre_social'])]
    
    #closing connection
    conn.close()
    
    return X,y;



X, y = load_data()

In [2]:
stop_words = stopwords.words("english")
lemmatizer = WordNetLemmatizer()

def tokenize(text):
    # normalize case, remove punctuation and numbers
    text = re.sub(r"[^a-zA-Z]", " ", text.lower())
    
    # tokenize text
    tokens = word_tokenize(text)
    
    # lemmatize and remove stop words
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    
    #lemmatize verbs
    tokens = [lemmatizer.lemmatize(word, pos='v') for word in tokens]
    
    #lemmatize adjectives
    tokens = [lemmatizer.lemmatize(word, pos='a') for word in tokens]
    
    #lemmatize adverbs
    tokens = [lemmatizer.lemmatize(word, pos='r') for word in tokens]
    
    

    return tokens

In [98]:
def model_pipeline():
    '''Pipeline for a model with the default parameters'''
    
    pipeline = Pipeline([
        ('vect',CountVectorizer(tokenizer=tokenize)),
        ('tfidf',TfidfTransformer()),
        ('clf', MultiOutputClassifier(estimator=RandomForestClassifier()))
    ])

    # specify parameters for grid search
    parameters = {
            #'vect__ngram_range': ((1, 1), (1, 2)),
            #'vect__max_df': (0.5, 0.75, 1.0),
            #'vect__max_features': (None, 5000, 10000),
            #'tfidf__use_idf': (True, False),
            'clf__estimator__n_estimators': [150],
            'clf__estimator__max_depth': [220],
            'clf__estimator__random_state': [42]
        
    }

    

    # create grid search object
    cv = GridSearchCV(pipeline, param_grid=parameters,verbose=1,n_jobs=3)
    
    return cv

In [99]:
random_state=42
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=random_state)

model = model_pipeline()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   5 out of   5 | elapsed: 18.3min finished


In [100]:
def AUC_ROC(y_test,y_pred):
    '''Calculates the area under the ROC curve for every label and returns the list
    Also displays the mean, maximum and minimum values.
    '''
    
    auc = []
    for i in range (0,y_test.shape[1]):
        auc.append(roc_auc_score(y_test.iloc[:,i],y_pred[:,i]))
    
    print('Mean AUC: ',"%.2f" % statistics.mean(auc),'Max AUC:', "%.2f" % max(auc),'Min AUC:', "%.2f" % min (auc))
    return auc;



def f1_score_labels(y_test,y_pred):
    '''Calculates the f1 score for every label, displays it and returns the list
    Also displays the mean, maximum and minimum values.
    '''
    
    f1_score_model = []
    for i in range (0,y_test.shape[1]):
        f1_score_column = f1_score(y_test.iloc[:,i],y_pred[:,i])
        f1_score_model.append(f1_score_column)
        print('The f1 score for',y.columns[i],' was: ',"%.2f" % f1_score_column,'.')
    
    print('Mean f1 score: ',"%.2f" % statistics.mean(f1_score_model),'Max f1 score:',"%.2f" % max(f1_score_model),'Min f1 score:',"%.2f" % min (f1_score_model))
    



def precision_score_labels(y_test,y_pred):
    '''Calculates the precision score for every label, displays it and returns the list
    Also displays the mean, maximum and minimum values.
    '''
    
    precision_score_model = []
    for i in range (0,y_test.shape[1]):
        precision_score_column = precision_score(y_test.iloc[:,i],y_pred[:,i])
        precision_score_model.append(precision_score_column)
        print('The precision score for',y.columns[i],' was: ',"%.2f" % precision_score_column,'.')
    
    print('Mean precision score: ',"%.2f" % statistics.mean(precision_score_model),'Max precision score:',"%.2f" % max(precision_score_model),'Min precision score:',"%.2f" % min (precision_score_model))
    
    


def accuracy_score_labels (y_test,y_pred):
    '''Calculates the accuracy score for every label, displays it and returns the list
    Also displays the mean, maximum and minimum values.
    '''
    
    accuracy_score_model = []
    for i in range (0,y_test.shape[1]):
        accuracy_score_column = accuracy_score(y_test.iloc[:,i],y_pred[:,i])
        accuracy_score_model.append(accuracy_score_column)
        print('The accuracy score for',y.columns[i],' was: ',"%.2f" % accuracy_score_column,'.')
    
    print('Mean accuracy score: ',"%.2f" % statistics.mean(accuracy_score_model),'Max accuracy score:',"%.2f" % max(accuracy_score_model),'Min accuracy score:',"%.2f" % min (accuracy_score_model))
    



def recall_score_labels (y_test,y_pred):

    recall_score_model = []
    for i in range (0,y_test.shape[1]):
        recall_score_column = recall_score(y_test.iloc[:,i],y_pred[:,i])
        recall_score_model.append(recall_score_column)
        print('The recall score for',y.columns[i],' was: ',"%.2f" % recall_score_column,'.')
    
    print('Mean recall score: ',"%.2f" % statistics.mean(recall_score_model),'Max recall score:',"%.2f" % max(recall_score_model),'Min recall score:',"%.2f" % min (recall_score_model))
    

In [101]:
AUC_ROC(y_test,y_pred)

Mean AUC:  0.61 Max AUC: 0.89 Min AUC: 0.50


[0.4998723838693211,
 0.7283671518614524,
 0.5809552174697049,
 0.5828195907183835,
 0.5745079572073704,
 0.6485529579794032,
 0.694093046771085,
 0.8910624667545504,
 0.5248212815452775,
 0.5084931060879359,
 0.7541051945402909,
 0.8517406142984758,
 0.5,
 0.4995904995904996,
 0.5692454181560169,
 0.5625456243890972,
 0.5912776991168824,
 0.5079365079365079,
 0.5244841372194997,
 0.5,
 0.5187232576106577,
 0.4996022275258552,
 0.5249435561811495,
 0.5438805685395401,
 0.7757916609959696,
 0.527841615869785,
 0.4998710343048749,
 0.7177773632699447,
 0.5,
 0.7947708154608711,
 0.5,
 0.5623849459158422,
 0.7489759892468127,
 0.8423310288521912]

In [102]:
f1_score_labels(y_test,y_pred)

The f1 score for aid_centers  was:  0.00 .
The f1 score for aid_related  was:  0.76 .
The f1 score for buildings  was:  0.27 .
The f1 score for clothing  was:  0.27 .
The f1 score for cold  was:  0.26 .
The f1 score for death  was:  0.44 .
The f1 score for direct_report  was:  0.55 .
The f1 score for earthquake  was:  0.84 .
The f1 score for electricity  was:  0.09 .
The f1 score for fire  was:  0.03 .
The f1 score for floods  was:  0.66 .
The f1 score for food  was:  0.78 .
The f1 score for hospitals  was:  0.00 .
The f1 score for infrastructure_related  was:  0.00 .
The f1 score for medical_help  was:  0.24 .
The f1 score for medical_products  was:  0.22 .
The f1 score for military  was:  0.29 .
The f1 score for missing_people  was:  0.03 .
The f1 score for money  was:  0.09 .
The f1 score for offer  was:  0.00 .
The f1 score for other_aid  was:  0.08 .
The f1 score for other_infrastructure  was:  0.00 .
The f1 score for other_weather  was:  0.10 .
The f1 score for refugees  was:  0.

f1_score with 0 values indicates us that the labels are imbalanced, conducting a grid search will help us get further insights about this behaviour.

In [103]:
precision_score_labels(y_test,y_pred)

The precision score for aid_centers  was:  0.00 .
The precision score for aid_related  was:  0.72 .
The precision score for buildings  was:  0.77 .
The precision score for clothing  was:  0.78 .
The precision score for cold  was:  0.89 .
The precision score for death  was:  0.78 .
The precision score for direct_report  was:  0.72 .
The precision score for earthquake  was:  0.90 .
The precision score for electricity  was:  0.67 .
The precision score for fire  was:  0.50 .
The precision score for floods  was:  0.91 .
The precision score for food  was:  0.83 .
The precision score for hospitals  was:  0.00 .
The precision score for infrastructure_related  was:  0.00 .
The precision score for medical_help  was:  0.72 .
The precision score for medical_products  was:  0.87 .
The precision score for military  was:  0.72 .
The precision score for missing_people  was:  1.00 .
The precision score for money  was:  0.56 .
The precision score for offer  was:  0.00 .
The precision score for other_aid

  _warn_prf(average, modifier, msg_start, len(result))


In [104]:
accuracy_score_labels (y_test,y_pred)

The accuracy score for aid_centers  was:  0.98 .
The accuracy score for aid_related  was:  0.73 .
The accuracy score for buildings  was:  0.94 .
The accuracy score for clothing  was:  0.98 .
The accuracy score for cold  was:  0.98 .
The accuracy score for death  was:  0.96 .
The accuracy score for direct_report  was:  0.82 .
The accuracy score for earthquake  was:  0.96 .
The accuracy score for electricity  was:  0.97 .
The accuracy score for fire  was:  0.99 .
The accuracy score for floods  was:  0.95 .
The accuracy score for food  was:  0.94 .
The accuracy score for hospitals  was:  0.99 .
The accuracy score for infrastructure_related  was:  0.92 .
The accuracy score for medical_help  was:  0.90 .
The accuracy score for medical_products  was:  0.94 .
The accuracy score for military  was:  0.97 .
The accuracy score for missing_people  was:  0.98 .
The accuracy score for money  was:  0.98 .
The accuracy score for offer  was:  0.99 .
The accuracy score for other_aid  was:  0.83 .
The ac

In [105]:
recall_score_labels (y_test,y_pred)

The recall score for aid_centers  was:  0.00 .
The recall score for aid_related  was:  0.81 .
The recall score for buildings  was:  0.17 .
The recall score for clothing  was:  0.17 .
The recall score for cold  was:  0.15 .
The recall score for death  was:  0.30 .
The recall score for direct_report  was:  0.44 .
The recall score for earthquake  was:  0.79 .
The recall score for electricity  was:  0.05 .
The recall score for fire  was:  0.02 .
The recall score for floods  was:  0.51 .
The recall score for food  was:  0.73 .
The recall score for hospitals  was:  0.00 .
The recall score for infrastructure_related  was:  0.00 .
The recall score for medical_help  was:  0.15 .
The recall score for medical_products  was:  0.13 .
The recall score for military  was:  0.19 .
The recall score for missing_people  was:  0.02 .
The recall score for money  was:  0.05 .
The recall score for offer  was:  0.00 .
The recall score for other_aid  was:  0.04 .
The recall score for other_infrastructure  was: 

In [106]:
cm_y1 = confusion_matrix(y_test.iloc[:,0],y_pred[:,0])
cm_y2 = confusion_matrix(y_test.iloc[:,1],y_pred[:,1])



cr_y0 = classification_report(y_test.iloc[:,0],y_pred[:,0])
cr_y9 = classification_report(y_test.iloc[:,9],y_pred[:,9])
cr_y13 = classification_report(y_test.iloc[:,13],y_pred[:,13])
cr_y19 = classification_report(y_test.iloc[:,19],y_pred[:,19])
cr_y21 = classification_report(y_test.iloc[:,21],y_pred[:,21])
cr_y26 = classification_report(y_test.iloc[:,26],y_pred[:,26])
cr_y28 = classification_report(y_test.iloc[:,28],y_pred[:,28])
cr_y30 = classification_report(y_test.iloc[:,30],y_pred[:,30])
cr_y31 = classification_report(y_test.iloc[:,31],y_pred[:,31])

  _warn_prf(average, modifier, msg_start, len(result))


In [107]:
print (cr_y31)

              precision    recall  f1-score   support

           0       0.95      1.00      0.97      3748
           1       0.81      0.13      0.22       229

    accuracy                           0.95      3977
   macro avg       0.88      0.56      0.60      3977
weighted avg       0.94      0.95      0.93      3977



In [112]:
model.best_params_

{'clf__estimator__max_depth': 220,
 'clf__estimator__n_estimators': 150,
 'clf__estimator__random_state': 42}

So far the parameters tested max_depth: 5,6, 50, 100, 150 200, 220, 250: 220 was the best one. And the estimators: 50,100,150: 150 proved to be best. However from the already implemeted model there is no significant difference so there is no reason to overwrite for now.