In [24]:
# https://jovianlin.io/embeddings-in-keras/
# 참고사이트
import re
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Bidirectional, LSTM
docs = ['Well done!', 'Good work', 'Great effort', 'nice work', 'Excellent!',
        'Weak', 'Poor effort!', 'not good', 'poor work', 'Could have done better.']

labels = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

In [37]:
own_embedding_vocab_size = 10
encoded_docs_oe = [one_hot(d, own_embedding_vocab_size) for d in docs]
print(encoded_docs_oe)
print(len(encoded_docs_oe))

[[5, 3], [3, 9], [9, 4], [7, 9], [7], [5], [6, 4], [7, 3], [6, 9], [3, 8, 3, 2]]
10


In [39]:
maxlen = 5
padded_decs_oe = pad_sequences(encoded_docs_oe, maxlen=maxlen, padding='post')
print(padded_decs_oe)
print(padded_decs_oe.shape)

[[5 3 0 0 0]
 [3 9 0 0 0]
 [9 4 0 0 0]
 [7 9 0 0 0]
 [7 0 0 0 0]
 [5 0 0 0 0]
 [6 4 0 0 0]
 [7 3 0 0 0]
 [6 9 0 0 0]
 [3 8 3 2 0]]
(10, 5)


In [27]:
model = Sequential()
model.add(Embedding(input_dim=own_embedding_vocab_size, output_dim=32, input_length=maxlen))
model.add(Bidirectional(LSTM(32,activation='relu',recurrent_dropout=0.1,return_sequences)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [29]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])  # Compile the model
print(model.summary())  # Summarize the model

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 5, 32)             320       
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64)                16640     
_________________________________________________________________
flatten_3 (Flatten)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 17,025
Trainable params: 17,025
Non-trainable params: 0
_________________________________________________________________
None


In [40]:
model.fit(padded_decs_oe, labels, epochs=50, verbose=0)  # Fit the model
loss, accuracy = model.evaluate(padded_decs_oe, labels, verbose=0)  # Evaluate the model
print('loss : %0.3f'%loss ,'Accuracy: %0.3f' % accuracy)

loss : 0.153 Accuracy: 0.900


In [31]:
padded_decs_oe[1:2]

array([[3, 9, 0, 0, 0]])

In [42]:
word = "Good work"
word_docs_oe = [one_hot(word, own_embedding_vocab_size)]
word_oe = pad_sequences(word_docs_oe, maxlen=maxlen, padding='post')
pred = model.predict(word_oe)
np.round(pred)

array([[1.]], dtype=float32)

In [43]:
s = padded_decs_oe
a = model.predict(s)
np.round(a)

array([[0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]], dtype=float32)

In [33]:
word_oe

array([[3, 9, 0, 0, 0]])

In [34]:
s = padded_decs_oe
a = model.predict(s)
a

array([[0.25155085],
       [0.88942003],
       [0.9993516 ],
       [0.9988058 ],
       [0.76390016],
       [0.39815846],
       [0.0150933 ],
       [0.34821892],
       [0.0957509 ],
       [0.00982311]], dtype=float32)

In [35]:
np.round(a)

array([[0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]], dtype=float32)

In [None]:
# LSTM
- 분류 : 감정분류 ( 긍 부 정 )

In [12]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense,Dropout,LSTM, Embedding,Flatten
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import ModelCheckpoint

vocab_size = 2000
maxlen=80
dictionary = imdb.get_word_index(path='imdb_word_index.json')
dictionary = {value:key for key,value in zip(dictionary.keys(),dictionary.values())}

(x_train, y_train), (x_test,y_test) = imdb.load_data(num_words = vocab_size)

x_train = sequence.pad_sequences(x_train, maxlen=maxlen,padding='post')
x_test = sequence.pad_sequences(x_test, maxlen=maxlen,padding='post')
print(x_train[1])

[ 125   68    2    2   15  349  165    2   98    5    4  228    9   43
    2 1157   15  299  120    5  120  174   11  220  175  136   50    9
    2  228    2    5    2  656  245    2    5    4    2  131  152  491
   18    2   32    2 1212   14    9    6  371   78   22  625   64 1382
    9    8  168  145   23    4 1690   15   16    4 1355    5   28    6
   52  154  462   33   89   78  285   16  145   95]


In [22]:
for i in x_train[1]:
    print(i, ':', dictionary[i], end='\t')

125 : better	68 : were	2 : and	2 : and	15 : for	349 : budget	165 : look	2 : and	98 : any	5 : to	4 : of	228 : making	9 : it	43 : out	2 : and	1157 : follows	15 : for	299 : effects	120 : show	5 : to	120 : show	174 : cast	11 : this	220 : family	175 : us	136 : scenes	50 : more	9 : it	2 : and	228 : making	2 : and	5 : to	2 : and	656 : finds	245 : tv	2 : and	5 : to	4 : of	2 : and	131 : these	152 : thing	491 : wants	18 : but	2 : and	32 : an	2 : and	1212 : cult	14 : as	9 : it	6 : is	371 : video	78 : do	22 : you	625 : david	64 : see	1382 : scenery	9 : it	8 : in	168 : few	145 : those	23 : are	4 : of	1690 : ship	15 : for	16 : with	4 : of	1355 : wild	5 : to	28 : one	6 : is	52 : very	154 : work	462 : dark	33 : they	89 : don't	78 : do	285 : dvd	16 : with	145 : those	95 : them	

In [19]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size,output_dim=128,input_length=maxlen))
model.add(LSTM(128,dropout = 0.2, recurrent_dropout=0.2))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['acc'])

che = 'keras_model1'
point = ModelCheckpoint(filepath=che , monitor='val_loss', verbose=1, save_best_only=True)

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 80, 128)           256000    
_________________________________________________________________
lstm (LSTM)                  (None, 128)               131584    
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 387,713
Trainable params: 387,713
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(x_train,y_train,batch_size=32, epochs=10,validation_data = (x_test, y_test),callbacks=[point])

Epoch 1/10

In [None]:
model = load_model('keras_model1')

In [None]:
pred = model.predict(x_train[1])
print(np.round(pred),y_train[1])
