In [1]:
from nltk.tokenize import RegexpTokenizer
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from time import time
import numpy as np
import os 
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer 
from math import floor
from ipy_table import *

In [2]:
TOKENIZER=RegexpTokenizer(r'([a-zA-Z]+|\?)')

In [3]:
def tokenize(input_,stop_words):
    ## recibe un string, y retorna una lista con los tokens que le pertenece
    tokens=TOKENIZER.tokenize(input_)
    stemmer = PorterStemmer()
    st=stemmer.stem
    lowered_tokens=[st(token.lower()) for token in tokens if token not in stop_words]
    return lowered_tokens

def parse_stories(story):
    substories=[]
    stop_words=[]
    # stop_words=set(stopwords.words('english'))
    for line in story:
        line_id,statement=line.split(" ",maxsplit=1)
        if line_id=="1":
            substory=[]
        if "\t" in statement:
            ## maneajr Q and Answer
            query,answer,_=statement.split("\t")
            query=tokenize(query,stop_words)
            ## hacemos una copia de la lista porque es mutable
            substories.append((substory[:],query,answer))
        else:
            ## manejar un fact
            statement=tokenize(statement,stop_words)
            substory.append(statement)
    return substories

def get_max_substory_length(stories):
    # for i in stories:
    #     print(i)
    #     print(type(i))

    substories_lenght=[len(substory) for substory,_,_ in stories]
    return max(substories_lenght)

PAD_TOKEN="_PAD"
def pad_substories(stories,max_lenght):
    for substory,_,_ in stories:
        for _ in  range(max_lenght-len(substory)):
            substory.append([PAD_TOKEN])
    return stories
## n tamano del vocavu

def get_vocabs(stories):
    stories_tokens=[]
    answers_tokens=[]
    for substory,query,answer in stories:
        stories_tokens +=[token for fact in substory for token in fact]
        stories_tokens +=[token for token in query]
        answers_tokens +=[answer]
    stories_vocab=sorted(set(stories_tokens))
    answers_vocab=sorted(set(answers_tokens))
    stories_token_map={token: i for i,token in enumerate(stories_vocab)}
    answers_token_map={token: i for i,token in enumerate(answers_vocab)}
    return stories_vocab, stories_token_map, answers_vocab, answers_token_map

def one_hot_vector(i,dim):
    vector=np.zeros(dim) ## vector de puros zeors *dim
    vector[i]=1
    return vector

def one_hot_encode(stories,stories_token_map,answers_token_map):
    stories_encoded=[]
    stories_vocab_size=len(stories_token_map)
    # answers_vocab_size=len(answers_token_map)
    for substory,query,answer in stories:
        statements_encoded=[]
        for statement in substory:
            tokens_encoded=[one_hot_vector(stories_token_map[token],stories_vocab_size) for token in statement]
            ## ahora la suma de bag of words
            statements_encoded.append(sum(tokens_encoded))
        question_encoded=(sum([one_hot_vector(stories_token_map[token],stories_vocab_size) for token in query]))
        statements_encoded.append(question_encoded) ## agregarle la pregunta al final de los statements
        answer_encoded=answers_token_map[answer]
        ## statmens encoded es una lista de vecotres, np. conactenate apila los vectores
        stories_encoded.append((np.concatenate(statements_encoded),answer_encoded))
    return stories_encoded



def evaluate_classifier(clf,test_stores,test_answers,target_names):
    test_predicted=clf.predict(test_stores)
    ## (respuestas reales
    accuracy=accuracy_score(test_answers,test_predicted)
    conf_matr=confusion_matrix(test_answers,test_predicted)
    print(classification_report(test_answers,test_predicted,labels=clf.classes_,target_names=target_names))
    print(conf_matr)
    print(accuracy)
    return accuracy

In [4]:
def load_qa(DATASET_PATH="IIC2613_Course\en\qa6_yes-no-questions",other=False):
    if other:
        lista_dir=os.listdir("D:/IA Github/IIC2613_Course/en/")
        train_stories=[]
        test_stories=[]
        for el in lista_dir:
            if "qa" in el and "train" in el:
                with open("IIC2613_Course\en\{}".format(el)) as file:
                    train_stories+=parse_stories(file.readlines())
            elif "qa" in el and "test" in el:
                with open("IIC2613_Course\en\{}".format(el)) as file:
                    test_stories+=parse_stories(file.readlines())
        test_stories=np.array(test_stories)
        train_stories=np.array(train_stories)
    else:     
        with open(DATASET_PATH+"_train.txt") as train_file:
            train_stories=parse_stories(train_file.readlines())
        with open(DATASET_PATH+"_test.txt") as test_file:
            test_stories=parse_stories(test_file.readlines())
    max_substory_length=get_max_substory_length(np.concatenate((train_stories,test_stories)))
    padded_train_stories=pad_substories(train_stories,max_substory_length)
    padded_test_stories=pad_substories(test_stories,max_substory_length)
    stories_vocab,stories_token_map,answers_vocab,answers_token_map=get_vocabs(padded_train_stories+padded_test_stories)
    encoded_train_stories=one_hot_encode(padded_train_stories,stories_token_map,answers_token_map)
    encoded_test_stories=one_hot_encode(padded_test_stories,stories_token_map,answers_token_map)
    stories_vocab_size=len(stories_vocab)
    answers_vocab_size=len(answers_vocab)
    feature_space_size=len(encoded_train_stories[0][0])
    return encoded_train_stories,encoded_test_stories,stories_token_map,answers_token_map,max_substory_length

In [5]:
 def multiple_nn(train_stories,train_answers,test_stories,test_answers):
    results=[]
    hidden_layer_mean=floor((len(train_stories)+len(train_answers))/2) ## hidden layer es mean of input/output layer
    out_four_step=floor((hidden_layer_mean)/8)
    for number_atributes in range(out_four_step,hidden_layer_mean,out_four_step):
        clf = MLPClassifier(max_iter=100,solver='sgd',hidden_layer_sizes=(number_atributes,), random_state=2018,learning_rate="adaptive") #random state = seed
        clf.fit(train_stories,train_answers)
        
        train_predicted=clf.predict(train_stories)
        ## (respuestas reales
        accuracy_t=accuracy_score(train_answers,train_predicted)


        test_predicted=clf.predict(test_stories)
        ## (respuestas reales

        accuracy=accuracy_score(test_answers,test_predicted)
        results.append((accuracy,number_atributes,clf))

    return max(results,key=lambda x:x[0])

In [6]:
def multiple_svm(train_stories,train_answers,test_stories,test_answers):
    negative_exp=[-i for i in range(1,6)] ## -5 
    positive_exp=[i for i in range(6,16)]
    results=[]
    for i in range(len(negative_exp)):  
        c=2**negative_exp[i]
        svm2=LinearSVC(
            penalty="l2",
            loss="squared_hinge",
            C=c,
            max_iter=10000
        )
        svm2.fit(train_stories,train_answers)

        train_predicted=svm2.predict(train_stories)
        ## (respuestas reales
        accuracy_t=accuracy_score(train_answers,train_predicted)


        test_predicted=svm2.predict(test_stories)
        ## (respuestas reales
        accuracy=accuracy_score(test_answers,test_predicted)
        results.append((abs(accuracy-accuracy_t),negative_exp[i],svm2))

    for i in range(len(positive_exp)):  
        svm=LinearSVC(
            penalty="l2",
            loss="squared_hinge",
            C=2**positive_exp[i],
            max_iter=10000
        )
        svm.fit(train_stories,train_answers)

        train_predicted=svm.predict(train_stories)
        ## (respuestas reales
        accuracy_t=accuracy_score(train_answers,train_predicted)

        test_predicted=svm.predict(test_stories)
        ## (respuestas reales
        accuracy=accuracy_score(test_answers,test_predicted)
        results.append((abs(accuracy-accuracy_t),positive_exp[i],svm))
    return max(results,key=lambda x:x[0])

In [7]:
def main_svm():
    # lista_dir=os.listdir("D:/IA Github/IIC2613_Course/en/")
    lista_dir=["qa20_agents-motivations_train.txt"]
    train_stories=[]
    test_stories=[]
    statistics={}
    for el in lista_dir:
        if "qa" in el and "train" in el:
            official=el
            el="IIC2613_Course\en\{}".format(el.replace("_train.txt",""))
            train_split,test_split,stories_token_map,answers_token_map,number_atributes=load_qa(el,False)
            train_stories,train_answers=zip(*train_split)
            test_stories,test_answers=zip(*test_split)
            best_svm=multiple_svm(train_stories,train_answers,test_stories,test_answers)
            # svm=LinearSVC(
                # penalty="l2",
                # loss="squared_hinge",
                # C=1
            # )
            # svm.fit(train_stories,train_answers)
            print(best_svm)
            svm=best_svm[2]
            print("Evaluar sobre el split de training")
            train_ac=evaluate_classifier(svm,train_stories,train_answers,answers_token_map.keys())

            print("Evaluar sobre el split de testing")
            test_ac=evaluate_classifier(svm,test_stories,test_answers,answers_token_map.keys())
            qa_string=official.split("_")[0]
            number_qa=int(qa_string.split("qa")[-1])
            statistics[number_qa]={"C":svm.C,"test_accuracy":test_ac,"train_accuracy":train_ac}
    print(statistics)


In [8]:
def main_nn():
    lista_dir=os.listdir("D:/IA Github/IIC2613_Course/en/")
    # lista_dir=["qa20_agents-motivations_train.txt"]
    train_stories=[]
    test_stories=[]
    statistics={}
    for el in lista_dir:
        if "qa" in el and "train" in el:
            official=el
            el="IIC2613_Course\en\{}".format(el.replace("_train.txt",""))
            # el="{}".format(el.replace("_train.txt",""))

            train_split,test_split,stories_token_map,answers_token_map,number_atributes=load_qa(el,False)
            train_stories,train_answers=zip(*train_split)
            test_stories,test_answers=zip(*test_split)
            best=multiple_nn(train_stories,train_answers,test_stories,test_answers)
            print(best)
            clf=best[-1]
            clf.fit(train_stories,train_answers)

            print("Evaluar sobre el split de training")
            train_ac=evaluate_classifier(clf,train_stories,train_answers,answers_token_map.keys())

            print("Evaluar sobre el split de testing")
            test_ac=evaluate_classifier(clf,test_stories,test_answers,answers_token_map.keys())
            qa_string=official.split("_")[0]
            number_qa=int(qa_string.split("qa")[-1])
            print(test_ac,train_ac)
            statistics[number_qa]={"Size":best[1],"test_accuracy":test_ac,"train_accuracy":train_ac}
    return statistics

In [9]:
diction=main_nn()
lista=[[i,diction[i]["Size"],diction[i]["train_accuracy"],diction[i]["test_accuracy"]] for i in sorted(diction.keys())]
make_table([["Task","Hidden Layer Size","Train Accuracy","Test Accuracy"]]+lista)
apply_theme('basic')



(0.48099999999999998, 500, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(500,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))


  'precision', 'predicted', average, warn_for)


Evaluar sobre el split de training
              precision    recall  f1-score   support

          no       0.00      0.00      0.00       122
         yes       0.57      0.78      0.66       466
       maybe       0.65      0.57      0.61       412

   micro avg       0.60      0.60      0.60      1000
   macro avg       0.40      0.45      0.42      1000
weighted avg       0.53      0.60      0.55      1000

[[  0  99  23]
 [  0 362 104]
 [  0 178 234]]
0.596
Evaluar sobre el split de testing
              precision    recall  f1-score   support

          no       0.00      0.00      0.00       150
         yes       0.46      0.68      0.55       437
       maybe       0.52      0.44      0.48       413

   micro avg       0.48      0.48      0.48      1000
   macro avg       0.33      0.37      0.34      1000
weighted avg       0.42      0.48      0.44      1000

[[  0 116  34]
 [  1 299 137]
 [  1 230 182]]
0.481
0.481 0.596




(0.44700000000000001, 625, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(625,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.69      0.45      0.55       147
      garden       0.56      0.70      0.62       183
    bathroom       0.63      0.54      0.58       153
     kitchen       0.49      0.69      0.57       196
     hallway       0.68      0.47      0.56       158
     bedroom       0.59      0.56      0.58       163

   micro avg       0.58      0.58      0.58      1000
   macro avg       0.61      0.57      0.58      1000
weighted avg       0.60      0.58      0.58      1000

[[ 66  26  11  25   7  12]
 [  3 129  12  21   7  11]
 [  5  24  83  24   4  13]
 [  7  20  10 135   8  16]
 [ 10  10   8  43  75  12]
 [  4  21   8  28  10  92]]
0.58
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.47      0.30      0.37       153
      garden       0.46      0.53      0.49       171
    bathroom       0.45      0.45      0.45       164
   



(0.28299999999999997, 750, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(750,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.45      0.44      0.44       164
      garden       0.46      0.25      0.32       157
    bathroom       0.37      0.62      0.46       182
     kitchen       0.47      0.38      0.42       160
     hallway       0.40      0.31      0.35       160
     bedroom       0.48      0.54      0.51       177

   micro avg       0.43      0.43      0.43      1000
   macro avg       0.44      0.42      0.42      1000
weighted avg       0.44      0.43      0.42      1000

[[ 72   6  42   8  16  20]
 [ 21  39  34  17  14  32]
 [ 17  11 112  14  14  14]
 [ 17  13  35  61  16  18]
 [ 18   7  50  15  50  20]
 [ 16   8  30  14  14  95]]
0.429
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.31      0.33      0.32       179
      garden       0.30      0.17      0.22       163
    bathroom       0.25      0.51      0.34       152
  



(0.441, 750, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(750,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.69      0.59      0.64       163
      garden       0.62      0.58      0.60       158
    bathroom       0.76      0.57      0.65       152
     kitchen       0.51      0.82      0.63       200
     hallway       0.67      0.62      0.65       163
     bedroom       0.65      0.52      0.58       164

   micro avg       0.62      0.62      0.62      1000
   macro avg       0.65      0.61      0.62      1000
weighted avg       0.65      0.62      0.62      1000

[[ 96   9   7  36   7   8]
 [  8  91   5  34   9  11]
 [  8  13  86  27   9   9]
 [  7  13   5 164   7   4]
 [ 10  13   1  25 101  13]
 [ 10   8   9  35  17  85]]
0.623
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.48      0.34      0.40       172
      garden       0.53      0.34      0.41       183
    bathroom       0.57      0.40      0.47       179
  



(0.27000000000000002, 750, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(750,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

        park       0.50      0.50      0.50       178
      office       0.51      0.47      0.49       159
      school       0.52      0.64      0.57       173
      cinema       0.49      0.55      0.52       172
     kitchen       0.52      0.57      0.54       169
     bedroom       0.60      0.35      0.44       149

   micro avg       0.52      0.52      0.52      1000
   macro avg       0.52      0.51      0.51      1000
weighted avg       0.52      0.52      0.51      1000

[[ 89  16  21  21  19  12]
 [ 17  74  15  23  23   7]
 [ 12  13 110  14  17   7]
 [ 18  16  22  94  17   5]
 [ 20  14  17  19  96   3]
 [ 23  11  28  22  13  52]]
0.515
Evaluar sobre el split de testing
              precision    recall  f1-score   support

        park       0.28      0.33      0.30       162
      office       0.26      0.30      0.28       171
      school       0.24      0.27      0.25       176
  



(0.39300000000000002, 500, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(500,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

       mouse       0.54      0.68      0.60       290
       sheep       0.60      0.61      0.60       252
         cat       0.55      0.51      0.53       242
        wolf       0.63      0.44      0.51       216

   micro avg       0.57      0.57      0.57      1000
   macro avg       0.58      0.56      0.56      1000
weighted avg       0.57      0.57      0.57      1000

[[197  41  36  16]
 [ 47 154  31  20]
 [ 68  30 124  20]
 [ 55  33  34  94]]
0.569
Evaluar sobre el split de testing
              precision    recall  f1-score   support

       mouse       0.31      0.47      0.37       213
       sheep       0.41      0.50      0.45       239
         cat       0.35      0.33      0.34       207
        wolf       0.57      0.30      0.40       341

   micro avg       0.39      0.39      0.39      1000
   macro avg       0.41      0.40      0.39      1000
weighted avg       0.43      0.39



(0.46800000000000003, 625, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(625,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

        gray       0.53      0.43      0.47       226
       green       0.57      0.56      0.56       247
       white       0.54      0.47      0.50       247
      yellow       0.54      0.68      0.60       280

   micro avg       0.54      0.54      0.54      1000
   macro avg       0.54      0.54      0.53      1000
weighted avg       0.54      0.54      0.54      1000

[[ 97  33  35  61]
 [ 23 139  38  47]
 [ 30  45 117  55]
 [ 34  29  28 189]]
0.542
Evaluar sobre el split de testing
              precision    recall  f1-score   support

        gray       0.53      0.40      0.46       251
       green       0.50      0.44      0.47       257
       white       0.46      0.43      0.45       256
      yellow       0.42      0.61      0.49       236

   micro avg       0.47      0.47      0.47      1000
   macro avg       0.48      0.47      0.47      1000
weighted avg       0.48      0.47



(0.52000000000000002, 875, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(875,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

         yes       0.59      0.55      0.57       495
          no       0.59      0.64      0.61       505

   micro avg       0.59      0.59      0.59      1000
   macro avg       0.59      0.59      0.59      1000
weighted avg       0.59      0.59      0.59      1000

[[270 225]
 [184 321]]
0.591
Evaluar sobre el split de testing
              precision    recall  f1-score   support

         yes       0.50      0.47      0.48       480
          no       0.54      0.57      0.55       520

   micro avg       0.52      0.52      0.52      1000
   macro avg       0.52      0.52      0.52      1000
weighted avg       0.52      0.52      0.52      1000

[[225 255]
 [225 295]]
0.52
0.52 0.591




(0.52100000000000002, 500, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(500,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

         yes       0.65      0.59      0.62       495
          no       0.63      0.69      0.66       505

   micro avg       0.64      0.64      0.64      1000
   macro avg       0.64      0.64      0.64      1000
weighted avg       0.64      0.64      0.64      1000

[[292 203]
 [158 347]]
0.639
Evaluar sobre el split de testing
              precision    recall  f1-score   support

         yes       0.55      0.54      0.55       531
          no       0.49      0.50      0.49       469

   micro avg       0.52      0.52      0.52      1000
   macro avg       0.52      0.52      0.52      1000
weighted avg       0.52      0.52      0.52      1000

[[287 244]
 [235 234]]
0.521
0.521 0.639




(0.10299999999999999, 500, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(500,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

         w,w       0.20      0.42      0.27        92
         s,w       0.14      0.22      0.17        91
         n,w       0.18      0.14      0.16        85
         e,n       0.15      0.08      0.10        79
         w,n       0.19      0.24      0.21        85
         e,e       0.20      0.21      0.20        86
         s,e       0.21      0.19      0.20        83
         e,s       0.22      0.24      0.23        79
         n,n       0.16      0.10      0.12        87
         n,e       0.00      0.00      0.00        70
         s,s       0.22      0.21      0.22        84
         w,s       0.22      0.14      0.17        79

   micro avg       0.19      0.19      0.19      1000
   macro avg       0.17      0.18      0.17      1000
weighted avg       0.17      0.19      0.17      1000

[[39 12  4  2  6  9  3  5  4  0  2  6]
 [20 20  7  2 10  3  6  9  3  0  6  5]
 [14 12 12  2  9 10 



(0.29699999999999999, 875, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(875,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.58      0.36      0.45       154
      garden       0.58      0.43      0.49       155
    bathroom       0.47      0.53      0.49       177
     kitchen       0.45      0.63      0.52       175
     hallway       0.45      0.47      0.46       173
     bedroom       0.52      0.49      0.50       166

   micro avg       0.49      0.49      0.49      1000
   macro avg       0.51      0.49      0.49      1000
weighted avg       0.50      0.49      0.49      1000

[[ 56   6  11  40  24  17]
 [  9  66  26  18  25  11]
 [  8  12  93  23  25  16]
 [  6   7  22 111  12  17]
 [  8  15  26  26  82  16]
 [  9   8  21  31  15  82]]
0.49
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.17      0.11      0.13       149
      garden       0.40      0.17      0.24       171
    bathroom       0.38      0.47      0.42       187
   



(0.67000000000000004, 875, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(875,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      garden       0.89      0.09      0.16        91
       bored       0.89      0.99      0.93       164
     thirsty       0.60      0.06      0.11        98
     kitchen       0.82      0.75      0.78       151
       tired       0.52      0.98      0.68       186
      hungry       0.83      0.82      0.83       158
     bedroom       0.96      0.96      0.96       152

   micro avg       0.75      0.75      0.75      1000
   macro avg       0.79      0.66      0.64      1000
weighted avg       0.78      0.75      0.70      1000

[[  8   0   1   0  82   0   0]
 [  0 162   0   0   0   0   2]
 [  1   2   6   0  89   0   0]
 [  0   8   0 113   0  26   4]
 [  0   1   3   0 182   0   0]
 [  0   5   0  23   0 130   0]
 [  0   5   0   1   0   0 146]]
0.747
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      garden       0.33      0.02      0.04        97
 



(0.20000000000000001, 750, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(750,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.70      0.51      0.59       145
      garden       0.75      0.54      0.63       158
    bathroom       0.55      0.82      0.66       210
     kitchen       0.83      0.49      0.62       138
     hallway       0.75      0.47      0.58       147
     bedroom       0.55      0.79      0.65       202

   micro avg       0.63      0.63      0.63      1000
   macro avg       0.69      0.60      0.62      1000
weighted avg       0.67      0.63      0.62      1000

[[ 74   7  28   5   4  27]
 [  6  86  33   0   2  31]
 [  8   4 172   4   6  16]
 [  7   6  22  68   6  29]
 [  7   8  31   3  69  29]
 [  4   3  28   2   5 160]]
0.629
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.21      0.11      0.14       175
      garden       0.22      0.12      0.15       167
    bathroom       0.22      0.50      0.31       187
  



(0.183, 125, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(125,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.64      0.87      0.74       198
      garden       0.77      0.62      0.69       144
    bathroom       0.73      0.71      0.72       171
     kitchen       0.75      0.73      0.74       183
     hallway       0.76      0.71      0.73       146
     bedroom       0.74      0.64      0.68       158

   micro avg       0.72      0.72      0.72      1000
   macro avg       0.73      0.71      0.72      1000
weighted avg       0.73      0.72      0.72      1000

[[172   3   6   7   4   6]
 [ 21  89  14   6   5   9]
 [ 21   6 122  11   7   4]
 [ 15   4  10 134   8  12]
 [ 15   8   8   7 103   5]
 [ 23   5   7  14   8 101]]
0.721
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.16      0.34      0.22       185
      garden       0.17      0.06      0.09       154
    bathroom       0.20      0.18      0.19       215
  



(0.52600000000000002, 375, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(375,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

      office       0.55      0.58      0.56       159
      garden       0.54      0.38      0.45       160
    bathroom       0.58      0.49      0.53       162
     kitchen       0.52      0.57      0.54       169
     hallway       0.50      0.65      0.57       180
     bedroom       0.63      0.59      0.61       170

   micro avg       0.55      0.55      0.55      1000
   macro avg       0.55      0.54      0.54      1000
weighted avg       0.55      0.55      0.54      1000

[[ 92  12   8  17  22   8]
 [ 16  61  17  27  21  18]
 [ 15  11  79  19  24  14]
 [ 19   9   8  97  29   7]
 [ 11  15  11  13 117  13]
 [ 15   6  14  14  20 101]]
0.547
Evaluar sobre el split de testing
              precision    recall  f1-score   support

      office       0.60      0.48      0.53       177
      garden       0.55      0.46      0.50       156
    bathroom       0.50      0.49      0.50       167
  



(0.434, 750, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(750,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

        Fred       0.74      0.65      0.69       151
       apple       0.70      0.75      0.72       167
    football       0.72      0.63      0.67       144
        milk       0.66      0.74      0.70       171
        Bill       0.89      0.93      0.91       136
        Mary       0.84      0.95      0.89       136
        Jeff       0.95      0.73      0.82        95

   micro avg       0.77      0.77      0.77      1000
   macro avg       0.78      0.77      0.77      1000
weighted avg       0.77      0.77      0.76      1000

[[ 98  17  14  21   0   1   0]
 [ 12 125   8  22   0   0   0]
 [ 14  15  91  23   0   0   1]
 [  9  21  14 127   0   0   0]
 [  0   0   0   0 127   7   2]
 [  0   0   0   0   6 129   1]
 [  0   0   0   0  10  16  69]]
0.766
Evaluar sobre el split de testing
              precision    recall  f1-score   support

        Fred       0.33      0.24      0.28       164
 



(0.52400000000000002, 875, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(875,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training
              precision    recall  f1-score   support

         yes       0.63      0.56      0.59       492
          no       0.62      0.69      0.65       508

   micro avg       0.62      0.62      0.62      1000
   macro avg       0.63      0.62      0.62      1000
weighted avg       0.62      0.62      0.62      1000

[[275 217]
 [159 349]]
0.624
Evaluar sobre el split de testing
              precision    recall  f1-score   support

         yes       0.53      0.44      0.48       503
          no       0.52      0.61      0.56       497

   micro avg       0.52      0.52      0.52      1000
   macro avg       0.53      0.52      0.52      1000
weighted avg       0.53      0.52      0.52      1000

[[223 280]
 [196 301]]
0.524
0.524 0.624




(0.61899999999999999, 875, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(875,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

        none       0.69      0.70      0.69       468
       three       0.67      0.73      0.70       488
         one       0.00      0.00      0.00         1
         two       0.00      0.00      0.00        43

   micro avg       0.68      0.68      0.68      1000
   macro avg       0.34      0.36      0.35      1000
weighted avg       0.65      0.68      0.66      1000

[[326 142   0   0]
 [134 354   0   0]
 [  0   1   0   0]
 [ 15  28   0   0]]
0.68
Evaluar sobre el split de testing


  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

        none       0.61      0.60      0.61       468
       three       0.62      0.70      0.66       488
         one       0.00      0.00      0.00         2
         two       0.00      0.00      0.00        42

   micro avg       0.62      0.62      0.62      1000
   macro avg       0.31      0.32      0.32      1000
weighted avg       0.59      0.62      0.60      1000

[[279 189   0   0]
 [148 340   0   0]
 [  2   0   0   0]
 [ 25  17   0   0]]
0.619
0.619 0.68




(0.435, 750, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(750,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))




Evaluar sobre el split de training


  .format(len(labels), len(target_names))
  'precision', 'predicted', average, warn_for)


                     precision    recall  f1-score   support

           football       0.65      0.37      0.47       194
apple,football,milk       0.00      0.00      0.00        10
              apple       0.00      0.00      0.00         3
football,apple,milk       0.00      0.00      0.00        22
            nothing       0.00      0.00      0.00         2
         apple,milk       0.68      0.41      0.51       188
     apple,football       0.00      0.00      0.00        12
      milk,football       0.00      0.00      0.00         9
apple,milk,football       0.69      0.34      0.45       198
         milk,apple       0.00      0.00      0.00         6
milk,football,apple       0.00      0.00      0.00        11
               milk       0.00      0.00      0.00         1
     football,apple       0.45      0.90      0.60       344

          micro avg       0.52      0.52      0.52      1000
          macro avg       0.19      0.15      0.16      1000
       weighted avg   

  .format(len(labels), len(target_names))
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


                     precision    recall  f1-score   support

           football       0.52      0.24      0.33       204
apple,football,milk       0.00      0.00      0.00        11
              apple       0.00      0.00      0.00         1
football,apple,milk       0.00      0.00      0.00        10
            nothing       0.00      0.00      0.00         0
         apple,milk       0.46      0.25      0.32       186
     apple,football       0.00      0.00      0.00        18
      milk,football       0.00      0.00      0.00         7
apple,milk,football       0.57      0.20      0.30       207
         milk,apple       0.00      0.00      0.00         9
milk,football,apple       0.00      0.00      0.00        10
               milk       0.00      0.00      0.00         0
     football,apple       0.41      0.89      0.56       336

          micro avg       0.43      0.44      0.44       999
          macro avg       0.15      0.12      0.12       999
       weighted avg   



(0.64200000000000002, 375, MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(375,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=100, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=2018, shuffle=True, solver='sgd', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False))
Evaluar sobre el split de training
              precision    recall  f1-score   support

         yes       0.65      0.98      0.78       633
          no       0.78      0.10      0.17       367

   micro avg       0.66      0.66      0.66      1000
   macro avg       0.72      0.54      0.48      1000
weighted avg       0.70      0.66      0.56      1000

[[623  10]
 [332  35]]
0.658
Evaluar sobre el split de testing
              precision    recall  f1-score   support

         yes       0.64      0.99     



0,1,2,3
Task,Hidden Layer Size,Train Accuracy,Test Accuracy
1,875,0.4900,0.2970
2,750,0.6290,0.2000
3,125,0.7210,0.1830
4,375,0.5470,0.5260
5,750,0.7660,0.4340
6,875,0.6240,0.5240
7,875,0.6800,0.6190
8,750,0.5230,0.4350
9,375,0.6580,0.6420
