# Design Pattern 4: Multimodal Input

> Aborda o problema de representar dados complexos e de tipos diferentes concatenando todas as representações

### Bibliotecas

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, concatenate

2024-01-04 11:21:02.468591: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-04 11:21:02.470521: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-04 11:21:02.513158: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-04 11:21:02.514365: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Base de exemplo

In [2]:
df = pd.read_csv('data/review.csv')
df.dropna(inplace=True)
print(df.shape)
df.head()

(24505, 6)


Unnamed: 0,Author,Rating,Review,Restaurant,Location,classe
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,"Jalan Haven The Haven All Suite Resort Tambun,...",good
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,"Jalan Haven The Haven All Suite Resort Tambun,...",bad
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,"Jalan Haven The Haven All Suite Resort Tambun,...",bad
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,"Jalan Haven The Haven All Suite Resort Tambun,...",bad
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,"Jalan Haven The Haven All Suite Resort Tambun,...",good


### Design Pattern 4: Multimodal Input

- Converta as características para um formato aceitável para o modelo
- Combine as saídas da etapa anterior
- Adicione ao modelo

In [3]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['Review'])
vocab_size = len(tokenizer.word_index) + 1

train_sequences = tokenizer.texts_to_sequences(train_df['Review'])
test_sequences = tokenizer.texts_to_sequences(test_df['Review'])


max_length = 20
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post')
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post')

In [4]:
text_input = Input(shape=(max_length,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=8, input_length=max_length)(text_input)
flattened_text = Flatten()(embedding_layer)

numeric_input = Input(shape=(1,))
dense_rating = Dense(8, activation='relu')(numeric_input)
# padrão de entrada multimodal
concatenated_input = concatenate([flattened_text, dense_rating])

In [5]:
dense_combined = Dense(16, activation='relu')(concatenated_input)
output_layer = Dense(1, activation='sigmoid')(dense_combined)

model = Model(inputs=[text_input, numeric_input], outputs=output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 20)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 20, 8)                163312    ['input_1[0][0]']             
                                                                                                  
 input_2 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 flatten (Flatten)           (None, 160)                  0         ['embedding[0][0]']           
                                                                                              