In [None]:
from google.colab import drive
drive.mount('/content/drive')

#### Imports and TPU setting

In [1]:
! pip install --upgrade kaggle -q
! pip install transformers -q
! pip install emoji -qq
! pip install googletrans -qq

[?25l[K     |█████▌                          | 10kB 22.9MB/s eta 0:00:01[K     |███████████                     | 20kB 2.2MB/s eta 0:00:01[K     |████████████████▋               | 30kB 2.7MB/s eta 0:00:01[K     |██████████████████████▏         | 40kB 3.0MB/s eta 0:00:01[K     |███████████████████████████▊    | 51kB 2.4MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 2.0MB/s 
[?25h  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Building wheel for slugify (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 890kB 3.5MB/s 
[K     |████████████████████████████████| 1.1MB 10.9MB/s 
[K     |████████████████████████████████| 890kB 20.7MB/s 
[K     |████████████████████████████████| 3.0MB 27.6MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 51kB 1.7MB/s 
[?25h  Building wheel for emoji (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████

In [2]:
import os
import re
import time
import numpy as np
import pandas as pd
import transformers
from tqdm import tqdm
import tensorflow as tf
from google.colab import files
import tensorflow_datasets as tfds
from transformers import BertTokenizer
from tensorflow.keras.models import Model
from keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from transformers import TFAutoModel, AutoTokenizer
from tensorflow.keras.layers import Dense, Input, Dropout, GlobalAveragePooling1D, Concatenate


from setup import set_TPU
from text_models import XLMRobertaInputs, BertInputs

import matplotlib.pyplot as plt
%matplotlib inline

tf.get_logger().setLevel('ERROR')

#### Load the data

In [3]:
files.upload()
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle competitions download -c google-quest-challenge

Saving kaggle.json to kaggle.json
Downloading google-quest-challenge.zip to /content
100% 4.85M/4.85M [00:00<00:00, 34.6MB/s]
100% 4.85M/4.85M [00:00<00:00, 34.5MB/s]


In [4]:
!unzip '/content/google-quest-challenge.zip'

Archive:  /content/google-quest-challenge.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [5]:
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')

In [4]:
train.head()

Unnamed: 0,qa_id,question_title,question_body,question_user_name,question_user_page,answer,answer_user_name,answer_user_page,url,category,host,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,question_not_really_a_question,question_opinion_seeking,question_type_choice,question_type_compare,question_type_consequence,question_type_definition,question_type_entity,question_type_instructions,question_type_procedure,question_type_reason_explanation,question_type_spelling,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,0,What am I losing when using extension tubes in...,After playing around with macro photography on...,ysap,https://photo.stackexchange.com/users/1024,"I just got extension tubes, so here's the skin...",rfusca,https://photo.stackexchange.com/users/1917,http://photo.stackexchange.com/questions/9169/...,LIFE_ARTS,photo.stackexchange.com,1.0,0.333333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.666667,1.0,1.0,0.8,1.0,0.0,0.0,1.0
1,1,What is the distinction between a city and a s...,I am trying to understand what kinds of places...,russellpierce,https://rpg.stackexchange.com/users/8774,It might be helpful to look into the definitio...,Erik Schmidt,https://rpg.stackexchange.com/users/1871,http://rpg.stackexchange.com/questions/47820/w...,CULTURE,rpg.stackexchange.com,1.0,1.0,0.0,0.5,1.0,1.0,0.444444,0.444444,0.666667,0.0,0.0,0.666667,0.666667,0.0,0.333333,0.0,0.0,0.0,0.333333,0.0,0.888889,0.888889,0.555556,0.888889,0.888889,0.666667,0.0,0.0,0.666667,0.888889
2,2,Maximum protusion length for through-hole comp...,I'm working on a PCB that has through-hole com...,Joe Baker,https://electronics.stackexchange.com/users/10157,Do you even need grooves? We make several pro...,Dwayne Reid,https://electronics.stackexchange.com/users/64754,http://electronics.stackexchange.com/questions...,SCIENCE,electronics.stackexchange.com,0.888889,0.666667,0.0,1.0,1.0,1.0,0.666667,0.444444,0.333333,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,1.0,0.333333,0.333333,0.0,0.777778,0.777778,0.555556,1.0,1.0,0.666667,0.0,0.333333,1.0,0.888889
3,3,Can an affidavit be used in Beit Din?,"An affidavit, from what i understand, is basic...",Scimonster,https://judaism.stackexchange.com/users/5151,"Sending an ""affidavit"" it is a dispute between...",Y e z,https://judaism.stackexchange.com/users/4794,http://judaism.stackexchange.com/questions/551...,CULTURE,judaism.stackexchange.com,0.888889,0.666667,0.666667,1.0,1.0,1.0,0.444444,0.444444,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.888889,0.833333,0.333333,0.833333,1.0,0.8,0.0,0.0,1.0,1.0
4,5,How do you make a binary image in Photoshop?,I am trying to make a binary image. I want mor...,leigero,https://graphicdesign.stackexchange.com/users/...,Check out Image Trace in Adobe Illustrator. \n...,q2ra,https://graphicdesign.stackexchange.com/users/...,http://graphicdesign.stackexchange.com/questio...,LIFE_ARTS,graphicdesign.stackexchange.com,1.0,0.666667,0.0,1.0,1.0,1.0,0.666667,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.666667,1.0,1.0,0.8,1.0,0.0,1.0,1.0


In [5]:
test.head()

Unnamed: 0,qa_id,question_title,question_body,question_user_name,question_user_page,answer,answer_user_name,answer_user_page,url,category,host
0,39,Will leaving corpses lying around upset my pri...,I see questions/information online about how t...,Dylan,https://gaming.stackexchange.com/users/64471,There is no consequence for leaving corpses an...,Nelson868,https://gaming.stackexchange.com/users/97324,http://gaming.stackexchange.com/questions/1979...,CULTURE,gaming.stackexchange.com
1,46,Url link to feature image in the portfolio,I am new to Wordpress. i have issue with Featu...,Anu,https://wordpress.stackexchange.com/users/72927,I think it is possible with custom fields.\n\n...,Irina,https://wordpress.stackexchange.com/users/27233,http://wordpress.stackexchange.com/questions/1...,TECHNOLOGY,wordpress.stackexchange.com
2,70,"Is accuracy, recoil or bullet spread affected ...","To experiment I started a bot game, toggled in...",Konsta,https://gaming.stackexchange.com/users/37545,You do not have armour in the screenshots. Thi...,Damon Smithies,https://gaming.stackexchange.com/users/70641,http://gaming.stackexchange.com/questions/2154...,CULTURE,gaming.stackexchange.com
3,132,Suddenly got an I/O error from my external HDD,I have used my Raspberry Pi as a torrent-serve...,robbannn,https://raspberrypi.stackexchange.com/users/17341,Your Western Digital hard drive is disappearin...,HeatfanJohn,https://raspberrypi.stackexchange.com/users/1311,http://raspberrypi.stackexchange.com/questions...,TECHNOLOGY,raspberrypi.stackexchange.com
4,200,Passenger Name - Flight Booking Passenger only...,I have bought Delhi-London return flights for ...,Amit,https://travel.stackexchange.com/users/29089,I called two persons who work for Saudia (tick...,Nean Der Thal,https://travel.stackexchange.com/users/10051,http://travel.stackexchange.com/questions/4704...,CULTURE,travel.stackexchange.com


In [5]:
print(f"train shape: {train.shape} \nvalidation shape: {test.shape}")

train shape: (6079, 41) 
validation shape: (476, 11)


#### Preprocess

In [6]:
output_categories = list(train.columns[11:])
input_categories = list(train.columns[[1,2,5]])

In [7]:
train['question_title_body']  = train['question_title'] + '. ' + train['question_body']

#### Modelling

##### Model by CLS token

###### Build model inputs

In [8]:
strategy = set_TPU()

INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0


Running on TPU: grpc://10.112.218.106:8470
REPLICAS: 8


In [9]:
# Configuration
MODEL = 'jplu/tf-xlm-roberta-large'
EPOCHS = 3
MAX_LEN = 96

BATCH_SIZE = 16 * strategy.num_replicas_in_sync

In [10]:
xlmroberta_inputs_train = XLMRobertaInputs(train[['question_title_body','answer']].iloc[:5000].values.tolist(), train[output_categories].iloc[:5000].values, max_length=MAX_LEN, batch_size=BATCH_SIZE)
%time train_inputs = xlmroberta_inputs_train.process_examples(train=True)

xlmroberta_inputs_val = XLMRobertaInputs(train[['question_title_body','answer']].iloc[5000:].values.tolist(), train[output_categories].iloc[5000:].values, max_length=MAX_LEN, batch_size=BATCH_SIZE)
%time validation_inputs = xlmroberta_inputs_val.process_examples(train=False)



CPU times: user 8.64 s, sys: 159 ms, total: 8.8 s
Wall time: 8.82 s




CPU times: user 2.09 s, sys: 8.73 ms, total: 2.1 s
Wall time: 2.11 s


###### Build model

In [11]:
def build_model(transformer,max_len):
    
    input_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
    sequence_output = transformer(input_ids)[0]
    cls_token = sequence_output[:, 0, :]
    cls_token = Dropout(0.2)(cls_token)
    cls_token = Dense(512,activation='relu')(cls_token)
    out = Dense(len(output_categories), activation='sigmoid')(cls_token)

    model = Model(inputs=input_ids, outputs=out)
    model.compile(Adam(lr=2e-5),  loss='binary_crossentropy')
    
    return model

In [12]:
transformer_layer = TFAutoModel.from_pretrained(MODEL)
model = build_model(transformer_layer, max_len=MAX_LEN)
model.summary()

Some weights of the model checkpoint at jplu/tf-xlm-roberta-large were not used when initializing TFXLMRobertaModel: ['lm_head']
- This IS expected if you are initializing TFXLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFXLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFXLMRobertaModel were initialized from the model checkpoint at jplu/tf-xlm-roberta-large.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLMRobertaModel for predictions without further training.


Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_ids (InputLayer)       [(None, 96)]              0         
_________________________________________________________________
tfxlm_roberta_model (TFXLMRo ((None, 96, 1024), (None, 559890432 
_________________________________________________________________
tf_op_layer_strided_slice (T [(None, 1024)]            0         
_________________________________________________________________
dropout_74 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               524800    
_________________________________________________________________
dense_1 (Dense)              (None, 30)                15390     
Total params: 560,430,622
Trainable params: 560,430,622
Non-trainable params: 0
________________________________________

###### Training

In [13]:
n_steps = train.shape[0]//BATCH_SIZE

train_history = model.fit(train_inputs, validation_data=validation_inputs, steps_per_epoch=n_steps, epochs=EPOCHS, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [14]:
text = [['its a nice day', 'the weather outside seems to be good']]
token = xlmroberta_inputs_train.tokenizer.batch_encode_plus(text,
                        max_length = MAX_LEN, # max length of the text that can go to BERT
                        pad_to_max_length = True, # add [PAD] tokens
                        truncation = True
                    )
xlmroberta_inputs_pred = np.array(token['input_ids'])

model.predict(xlmroberta_inputs_pred)



array([[0.79775965, 0.5225271 , 0.12118098, 0.63759834, 0.5774739 ,
        0.6357765 , 0.58222264, 0.63370115, 0.28555083, 0.02138367,
        0.54605   , 0.24992555, 0.13134766, 0.04937392, 0.08555287,
        0.1759719 , 0.22008052, 0.25450706, 0.39080173, 0.03756759,
        0.77141625, 0.8313895 , 0.5324839 , 0.8038827 , 0.9214682 ,
        0.8090356 , 0.25202483, 0.11678043, 0.40678966, 0.70252013]],
      dtype=float32)

##### BERT model

###### Build model inputs

In [15]:
Bert_inputs_train = BertInputs(train[['question_title_body','answer']].iloc[:5000].values.tolist(), train[output_categories].iloc[:5000].values, max_length=MAX_LEN, batch_size=BATCH_SIZE)
%time train_inputs = Bert_inputs_train.process_examples(train=True)

Bert_inputs_val = BertInputs(train[['question_title_body','answer']].iloc[5000:].values.tolist(), train[output_categories].iloc[5000:].values, max_length=MAX_LEN, batch_size=BATCH_SIZE)
%time validation_inputs = Bert_inputs_val.process_examples(train=False)



CPU times: user 24.9 s, sys: 98.5 ms, total: 25 s
Wall time: 25.1 s




CPU times: user 5.64 s, sys: 35.3 ms, total: 5.68 s
Wall time: 5.69 s


###### Build model

In [16]:
def build_model(transformer,max_len):
    
    input_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
    
    input_masks = Input((max_len,), dtype=tf.int32, name="input_masks")
    
    input_attention = Input((max_len,), dtype=tf.int32, name="input_attention")
    
    sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_attention)[0]

    cls_token = sequence_output[:, 0, :]
    cls_token = Dropout(0.2)(cls_token)
    cls_token = Dense(512,activation='relu')(cls_token)
    out = Dense(len(output_categories), activation='sigmoid')(cls_token)

    model = Model(inputs=[input_ids, input_masks, input_attention], outputs=out)
    model.compile(Adam(lr=2e-5),  loss='binary_crossentropy')
    
    return model

In [17]:
transformer_layer = TFAutoModel.from_pretrained("bert-base-uncased")
model = build_model(transformer_layer, max_len=MAX_LEN)
model.summary()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 96)]         0                                            
__________________________________________________________________________________________________
input_masks (InputLayer)        [(None, 96)]         0                                            
__________________________________________________________________________________________________
input_attention (InputLayer)    [(None, 96)]         0                                            
__________________________________________________________________________________________________
tf_bert_model (TFBertModel)     ((None, 96, 768), (N 109482240   input_ids[0][0]                  
                                                                 input_masks[0][0]     

###### Training

In [18]:
n_steps = train.shape[0]//BATCH_SIZE

train_history = model.fit(train_inputs, validation_data=validation_inputs, steps_per_epoch=n_steps, epochs=EPOCHS, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [19]:
text = ['its a nice day', 'the weather outside seems to be good']

def convert_example_to_feature(text):

    return Bert_inputs_train.tokenizer.batch_encode_plus(text,
                    add_special_tokens = True, # add [CLS], [SEP]
                    max_length = MAX_LEN, # max length of the text that can go to BERT
                    pad_to_max_length = True, # add [PAD] tokens
                    return_attention_mask = True, # add attention mask to not focus on pad tokens
                    return_token_type_ids = True,
                    truncation=True
                )
    
def map_example_to_dict(input_ids, attention_masks, token_type_ids):

    return {
        "input_ids": input_ids,
        "token_type_ids": token_type_ids,
        "attention_mask": attention_masks,
    }

def encode_examples(text):

    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []
        
    bert_input = convert_example_to_feature(text)

    input_ids_list.append(bert_input['input_ids'])
    token_type_ids_list.append(bert_input['token_type_ids'])
    attention_mask_list.append(bert_input['attention_mask'])

    return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list)).map(map_example_to_dict)


%time pred_inputs = encode_examples(text)

model.predict(pred_inputs)

CPU times: user 39.5 ms, sys: 4.11 ms, total: 43.6 ms
Wall time: 46 ms




array([[0.9026169 , 0.5946907 , 0.04686373, 0.70602137, 0.81683004,
        0.81802714, 0.571542  , 0.49712488, 0.21049258, 0.00722891,
        0.39576966, 0.2787671 , 0.03532407, 0.01146644, 0.02369136,
        0.05350515, 0.4742013 , 0.13612297, 0.33123317, 0.006648  ,
        0.80012405, 0.9467379 , 0.65032214, 0.96606654, 0.9729286 ,
        0.8701431 , 0.4988237 , 0.11869973, 0.48908088, 0.9162179 ],
       [0.9027573 , 0.5924587 , 0.04710203, 0.7063874 , 0.8147415 ,
        0.81925344, 0.5713013 , 0.49628526, 0.21291155, 0.00738299,
        0.39430138, 0.27792934, 0.03568569, 0.01170361, 0.02402282,
        0.05393511, 0.47370946, 0.13593972, 0.33096486, 0.00679541,
        0.7994795 , 0.9466686 , 0.64903724, 0.9656198 , 0.97275305,
        0.8701525 , 0.49765217, 0.11880806, 0.48939478, 0.9157262 ]],
      dtype=float32)

##### Model by BERT siamese network

###### Build model inputs

In [24]:
import random
from transformers import TFBertModel

def convert_to_transformer_inputs(str1, str2, tokenizer, max_sequence_length):
    
    def return_id(str1, str2, truncation_strategy, length):

        inputs = tokenizer.encode_plus(str1, str2,
            add_special_tokens=True,
            max_length=length,
            truncation_strategy=truncation_strategy,
            truncation=True)
        
        input_ids =  inputs["input_ids"]
        input_masks = [1] * len(input_ids)
        input_segments = inputs["token_type_ids"]
        
        padding_length = length - len(input_ids)
        padding_id = tokenizer.pad_token_id
        
        input_ids = input_ids + ([padding_id] * padding_length)
        input_masks = input_masks + ([0] * padding_length)
        input_segments = input_segments + ([0] * padding_length)
        
        return [input_ids, input_masks, input_segments]
        
    input_ids_1, input_masks_1, input_segments_1 = return_id(
        str1, None, 'longest_first', max_sequence_length)

    input_ids_2, input_masks_2, input_segments_2 = return_id(
        str2, None, 'longest_first', max_sequence_length)

    return [input_ids_1, input_masks_1, input_segments_1,
            input_ids_2, input_masks_2, input_segments_2]     

def compute_input_arrays(df, columns, tokenizer, max_sequence_length):
    
    input_ids_1, input_masks_1, input_segments_1 = [], [], []
    input_ids_2, input_masks_2, input_segments_2 = [], [], []
    for _, instance in tqdm(df[columns].iterrows(), total=len(df)):
        str1, str2 = instance[columns[0]], instance[columns[1]]

        ids_1, masks_1, segments_1, ids_2, masks_2, segments_2 = \
        convert_to_transformer_inputs(str1, str2, tokenizer, max_sequence_length)
        
        input_ids_1.append(ids_1)
        input_masks_1.append(masks_1)
        input_segments_1.append(segments_1)

        input_ids_2.append(ids_2)
        input_masks_2.append(masks_2)
        input_segments_2.append(segments_2)
        
        
    return [np.asarray(input_ids_1, dtype=np.int32), 
            np.asarray(input_masks_1, dtype=np.int32), 
            np.asarray(input_segments_1, dtype=np.int32),
            np.asarray(input_ids_2, dtype=np.int32), 
            np.asarray(input_masks_2, dtype=np.int32), 
            np.asarray(input_segments_2, dtype=np.int32)]

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

input_train = compute_input_arrays(train[['question_title_body','answer']], ['question_title_body','answer'], tokenizer, MAX_LEN)

100%|██████████| 6079/6079 [00:42<00:00, 144.65it/s]


###### Build model

In [11]:
def siamese_bert():
    
    opt = Adam(learning_rate=2e-5)
    
    id1 = Input((MAX_LEN,), dtype=tf.int32)
    id2 = Input((MAX_LEN,), dtype=tf.int32)
    
    mask1 = Input((MAX_LEN,), dtype=tf.int32)
    mask2 = Input((MAX_LEN,), dtype=tf.int32)
    
    atn1 = Input((MAX_LEN,), dtype=tf.int32)
    atn2 = Input((MAX_LEN,), dtype=tf.int32)
    
    bert_model = TFBertModel.from_pretrained("bert-base-uncased")
    bert_model.config.output_hidden_states = False
    
    embedding1 = bert_model(id1, attention_mask=mask1, token_type_ids=atn1)[0]
    embedding2 = bert_model(id2, attention_mask=mask2, token_type_ids=atn2)[0]
    
    x1 = GlobalAveragePooling1D()(embedding1)
    x2 = GlobalAveragePooling1D()(embedding2)
    
    x = Concatenate()([x1, x2])
    x = Dropout(0.2)(x)
    x = Dense(512, activation='relu')(x)
    out = Dense(len(output_categories), activation='sigmoid')(x)

    model = Model(inputs=[id1, mask1, atn1, id2, mask2, atn2], outputs=out)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    
    return model

In [25]:
model = siamese_bert()
model.summary()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "functional_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_19 (InputLayer)           [(None, 96)]         0                                            
__________________________________________________________________________________________________
input_21 (InputLayer)           [(None, 96)]         0                                            
__________________________________________________________________________________________________
input_23 (InputLayer)           [(None, 96)]         0                                            
__________________________________________________________________________________________________
input_20 (InputLayer)           [(None, 96)]         0                                            
_______________________________________________________________________________________

###### Training

In [26]:
train_history = model.fit(x=input_train, y=train[output_categories], 
                          validation_split=0.2, 
                          epochs=EPOCHS, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3
