<a href="https://colab.research.google.com/github/Normal-case/DeepLearning/blob/master/RNN_emotion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.optimizers import Adam
from keras_preprocessing.text import Tokenizer
import numpy as np

In [2]:
X_train = ['Traffic ticket fines', 'Traffic is fine']
# 부정 0, 긍정 1
y_train = np.array([0, 1], dtype = 'float32')

In [3]:
tokenizer = Tokenizer()
tokenizer

<keras_preprocessing.text.Tokenizer at 0x7ff519e33b70>

In [4]:
# X의 문장을 숫자로 변환을 준비하기 위해서 X의 단어 빈도수 조회
tokenizer.fit_on_texts(X_train)

In [5]:
X_tokens = tokenizer.texts_to_sequences(X_train)
X_tokens

[[1, 2, 3], [1, 4, 5]]

In [6]:
tokenizer.word_index

{'fine': 5, 'fines': 3, 'is': 4, 'ticket': 2, 'traffic': 1}

In [7]:
len(tokenizer.word_index) # 단어 길이!

5

In [8]:
from keras.utils import to_categorical

In [9]:
# X_tokens를 원핫 인코딩해서 리턴
X_onehot = to_categorical(X_tokens)
X_onehot

array([[[0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.]],

       [[0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.]]], dtype=float32)

In [10]:
X_onehot.shape

(2, 3, 6)

In [11]:
model = Sequential()
model.add(SimpleRNN(units = 3, input_shape = (3, 6)))
model.add(Dense(1, activation = 'sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 3)                 30        
_________________________________________________________________
dense (Dense)                (None, 1)                 4         
Total params: 34
Trainable params: 34
Non-trainable params: 0
_________________________________________________________________


In [12]:
model.compile(loss = 'binary_crossentropy', optimizer=Adam(lr=0.1), metrics = ['acc'])

In [13]:
model.fit(X_onehot, y_train, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff4d02cf668>

In [14]:
pred = model.predict(X_onehot)
pred

array([[0.07372171],
       [0.93809474]], dtype=float32)

In [15]:
predict = np.where(pred > 0.5, 1, 0)
emotion = np.where(predict == 0, 'unhappy', 'happy')
print('predict')
print('"{}" is {}'.format(X_train[0], emotion[0]))
print('"{}" is {}'.format(X_train[1], emotion[1]))

predict
"Traffic ticket fines" is ['unhappy']
"Traffic is fine" is ['happy']


+ RNN auto_making

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.optimizers import Adam
import numpy as np

In [17]:
X_train = ['wor', 'woo', 'dee', 'div', 'col', 'coo', 'loa', 'lov', 'kis', 'kin']
X_train

['wor', 'woo', 'dee', 'div', 'col', 'coo', 'loa', 'lov', 'kis', 'kin']

In [18]:
y_train = ['d', 'd', 'p', 'e', 'd', 'k', 'd', 'e', 's', 'd']
# 영어 알파벳이 저장된 리스트
char_arr = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
            'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
print(char_arr)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [19]:
for i, char in enumerate(char_arr):
  print(i, char)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j
10 k
11 l
12 m
13 n
14 o
15 p
16 q
17 r
18 s
19 t
20 u
21 v
22 w
23 x
24 y
25 z


In [20]:
num_dic = {char: i for i, char in enumerate(char_arr)}
num_dic

{'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18,
 't': 19,
 'u': 20,
 'v': 21,
 'w': 22,
 'x': 23,
 'y': 24,
 'z': 25}

In [21]:
input = [num_dic[char] for char in 'wor']
input

[22, 14, 17]

In [22]:
# np.eye(25)[input] : input에 저장된 숫자를 onehot 인코딩
# np.eye(25) : onehot 인코딩 된 데이터는 25칸임
onehot = np.eye(26)[input]
onehot

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [23]:
X_train_onehot = []
# X_train에서 문자열 하나를 seq에 대입
for seq in X_train:
  print('seq:', seq)
  # seq를 숫자로 변환
  input = [num_dic[char] for char in seq]
  print('input:', input)
  # input을 onehot 인코딩
  onehot = np.eye(26)[input]
  print('one hot:', onehot)
  X_train_onehot.append(onehot)
  print('='*30)

seq: wor
input: [22, 14, 17]
one hot: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0.]]
seq: woo
input: [22, 14, 14]
one hot: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
seq: dee
input: [3, 4, 4]
one hot: [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
seq: div
input: [3, 8, 21]
one hot: [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 

In [24]:
X_train_onehot = np.array(X_train_onehot, dtype = 'float32')
X_train_onehot

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0.,

In [25]:
y_train_tokens = [num_dic[char] for char in y_train]
y_train_tokens

[3, 3, 15, 4, 3, 10, 3, 4, 18, 3]

In [26]:
y_train_onehot = np.eye(26)[y_train_tokens]
y_train_onehot

array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 

In [27]:
X_train_onehot.shape

(10, 3, 26)

In [28]:
model = Sequential()
model.add(SimpleRNN(units = 3, input_shape = (3, 26)))
model.add(Dense(26, activation = 'softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 3)                 90        
_________________________________________________________________
dense_1 (Dense)              (None, 26)                104       
Total params: 194
Trainable params: 194
Non-trainable params: 0
_________________________________________________________________


In [29]:
model.compile(loss = 'categorical_crossentropy', optimizer = Adam(lr = 0.1), metrics = ['acc'])

In [30]:
model.fit(X_train_onehot, y_train_onehot, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7ff48a024518>

In [44]:
new_data = ['hel', 'mul', 'goo']

In [45]:
new_data_onehot = []
for seq in new_data:
  print('seq:', seq)
  input = [num_dic[char] for char in seq]
  print('input:', input)
  onehot = np.eye(26)[input]
  print('one hot:', onehot)
  new_data_onehot.append(onehot)
  print('='*30)

seq: hel
input: [7, 4, 11]
one hot: [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
seq: mul
input: [12, 20, 11]
one hot: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
seq: goo
input: [6, 14, 14]
one hot: [[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]


In [46]:
new_data_onehot = np.array(new_data_onehot, dtype = 'float32')
pred = model.predict(new_data_onehot)
predict = np.argmax(pred, 1)
char_arr[predict[0]]
for pred_num in predict:
  print('예측값 :', char_arr[pred_num])

예측값 : p
예측값 : d
예측값 : d
