In [1]:
import tarfile
import os

In [2]:
if not os.path.exists('aclImdb'):
    tffile = tarfile.open("aclImdb_v1.tar.gz",'r:gz')
    result = tffile.extract('aclImdb')

## 读取IMDb数据

In [3]:
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer

Using TensorFlow backend.


In [4]:
import re
def rm_tags(text):
    re_tag = re.compile(r'<[^>]+>')
    return re_tag.sub('', text)

In [5]:
import os
def read_files(filetype):
    path = 'aclImdb/'
    file_list = []
    
    positive_path = path +filetype+'/pos/'
    for f in os.listdir(positive_path):
        file_list += [positive_path+f]
        
    negative_path = path+filetype+'/neg/'
    for f in os.listdir(negative_path):
        file_list += [negative_path+f]
        
    print('read', filetype, 'files:', len(file_list))
    all_labels = ([1]*12500+[0]*12500)
    
    all_texts = []
    for fi in file_list:
        with open(fi) as file_input:
            all_texts += [rm_tags(" ".join(file_input.readlines()))]
            
    return all_labels, all_texts

In [6]:
#使用read_files函数传入参数train读取训练数据
y_train, train_text = read_files('train')

('read', 'train', 'files:', 25000)


In [7]:
#使用read_files函数传入参数test读取测试数据
y_test, test_text = read_files('test')

('read', 'test', 'files:', 25000)


## 查看IMDb数据

In [9]:
train_text[0]

'I love this movie. It is great film that combines English and Indian cultures with feminist-type issues, such as girls wanting to play sports that were previously reserved for men. It shows the struggles of both an Indian person wanting to break outside her cultural barriers and women wanting to break outside the gender restrictions found in sports, especially in England at the time. I feel that the cultural struggles are more emphasized than the other issues.In contrast to the other comment, I do not think this movie is anything like Dirty Dancing or any other such chick flick. This move is loved by many types of people, men and women, young and old alike.'

In [10]:
y_train[0]

1

In [11]:
train_text[12501]

'From watching only the trailer to Theodore Rex, you would think this is a bad buddy cop comedy with Whoopi Goldberg and a guy in a dinosaur costume. That is true, but this is mostly a futuristic story, which looks a lot like Batman Forever with it\'s direction style and weird character designs. It was mismarketed, and should have been marketed as a futuristic tale, instead of just a lame cop comedy. Whether or not this movie is mismarketed, it\'s still a horrible movie.In the future, dinosaurs have been brought back to life through amazing technology, and they talk and walk around like humans. Teddy is a dinosaur detective who is never taken seriously, but after a dinosaur is murdered, he\'s given the case to work on, but he has to be partners with the toughest cop of them all, Katie Coltrane (Whoopi Goldberg). It\'s up to this mismatched duo to solve the murder, and it\'s up to the audience to stay awake long enough to make it through this piece of crud.Teddy starts the picture as a 

In [12]:
y_train[12501]

0

## 建立token

In [13]:
token = Tokenizer(num_words=2000)
token.fit_on_texts(train_text)

In [14]:
print(token.document_count)

25000


In [15]:
print(token.word_index)



## 使用Token将"影评文字"转换成"数字列表"

In [17]:
x_train_seq = token.texts_to_sequences(train_text)
x_test_seq = token.texts_to_sequences(test_text)

In [18]:
print(train_text[0])

I love this movie. It is great film that combines English and Indian cultures with feminist-type issues, such as girls wanting to play sports that were previously reserved for men. It shows the struggles of both an Indian person wanting to break outside her cultural barriers and women wanting to break outside the gender restrictions found in sports, especially in England at the time. I feel that the cultural struggles are more emphasized than the other issues.In contrast to the other comment, I do not think this movie is anything like Dirty Dancing or any other such chick flick. This move is loved by many types of people, men and women, young and old alike.


In [19]:
print(x_train_seq)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


## 让转换后的数字长度相同

In [20]:
x_train = sequence.pad_sequences(x_train_seq, maxlen=100)
x_test = sequence.pad_sequences(x_test_seq, maxlen=100)

In [21]:
#长度大于指定长度的时候,截去头部多余的部分
print('before pad_sequences length=', len(x_train_seq[0]))
print(x_train_seq[0])

('before pad_sequences length=', 98)
[9, 115, 10, 16, 8, 6, 83, 18, 11, 627, 2, 1392, 15, 548, 1336, 137, 13, 535, 1779, 5, 293, 11, 67, 14, 345, 8, 283, 1, 4, 195, 31, 1392, 410, 1779, 5, 986, 1002, 37, 2, 368, 1779, 5, 986, 1002, 1, 254, 7, 257, 7, 1821, 29, 1, 54, 9, 230, 11, 1, 22, 49, 70, 1, 81, 1336, 7, 5, 1, 81, 923, 9, 78, 20, 100, 10, 16, 6, 228, 36, 1643, 1101, 38, 97, 81, 137, 504, 10, 843, 6, 442, 30, 107, 4, 80, 345, 2, 368, 181, 2, 150]


In [22]:
print('after pad_sequences length=', len(x_train[0]))
print(x_train[0])

('after pad_sequences length=', 100)
[   0    0    9  115   10   16    8    6   83   18   11  627    2 1392   15
  548 1336  137   13  535 1779    5  293   11   67   14  345    8  283    1
    4  195   31 1392  410 1779    5  986 1002   37    2  368 1779    5  986
 1002    1  254    7  257    7 1821   29    1   54    9  230   11    1   22
   49   70    1   81 1336    7    5    1   81  923    9   78   20  100   10
   16    6  228   36 1643 1101   38   97   81  137  504   10  843    6  442
   30  107    4   80  345    2  368  181    2  150]


In [23]:
#长度小于指定长度的时候,在头部添加多余的部分
print('before pad_sequences length=', len(x_train_seq[1]))
print(x_train_seq[1])

('before pad_sequences length=', 313)
[704, 301, 301, 237, 6, 1, 1530, 697, 761, 35, 907, 9, 1026, 215, 10, 16, 170, 99, 20, 315, 8, 7, 107, 149, 9, 373, 82, 54, 9, 215, 8, 9, 156, 65, 97, 1393, 29, 28, 2, 67, 765, 30, 10, 1746, 2, 541, 438, 16, 6, 8, 124, 70, 7, 107, 766, 418, 147, 281, 612, 75, 8, 118, 45, 707, 579, 4, 31, 48, 225, 2, 75, 59, 89, 1483, 4, 47, 896, 3, 869, 18, 549, 41, 323, 5, 63, 2, 170, 7, 1, 92, 11, 71, 38, 430, 68, 442, 40, 94, 7, 10, 18, 7, 32, 67, 336, 615, 2, 9, 156, 419, 1, 2, 197, 1078, 10, 18, 96, 710, 24, 73, 3, 374, 27, 17, 1, 1053, 156, 558, 1, 351, 466, 2, 300, 5, 93, 10, 18, 29, 207, 572, 638, 46, 22, 3, 167, 633, 11, 1466, 66, 353, 19, 64, 201, 1, 599, 6, 175, 48, 46, 22, 103, 157, 686, 59, 22, 2, 103, 157, 101, 1625, 30, 2, 1691, 59, 22, 733, 1, 1851, 196, 2, 22, 995, 1, 915, 135, 22, 69, 7, 1, 61, 2, 148, 230, 42, 4, 269, 59, 8, 96, 24, 710, 220, 257, 7, 1286, 17, 6, 46, 138, 11, 10, 427, 35, 393, 160, 35, 68, 418, 46, 6, 258, 32, 148, 1483, 95, 72, 

In [24]:
print('after pad_sequences length=', len(x_train[1]))
print(x_train[1])

('after pad_sequences length=', 100)
[ 393  160   35   68  418   46    6  258   32  148 1483   95   72   17  636
  755   94  301  107    4    1  135   35    1   82   18   22  338    7   10
   27 1269    8   12  908    5   63    1  150  135   15    1  157  659  237
  311    1 1322   45    4    1  633  454  831  339   10    6   61    2  258
    1 1053  349    5   23  780   15    8  162    1  876  230    3  113   29
  207   17 1399  144 1367  148 1483   10  896 1134   16    8    6    1   60
   62  907  750   59  140   25    7  171   16 1587]


# 建立MLP,RNN,LSTM模型进行IMDB情感分析

## 加入嵌入层

In [25]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding

In [26]:
model = Sequential()

In [27]:
'''
input_dim=2000:输入的维数是2000,因为之前建立的字典有2000个单词
output_dim=32:将每个单词的数字转成32维实数向量
input_length=100:每个样例的数字列表的长度
'''
model.add(Embedding(output_dim=32,input_dim=2000,input_length=100))
model.add(Dropout(0.2))

## 建立多层感知器模型

In [28]:
model.add(Flatten())

In [29]:
model.add(Dense(units=256,
               activation='relu'))
model.add(Dropout(0.35))

In [30]:
model.add(Dense(units=1,activation='sigmoid'))

In [31]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 100, 32)           64000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 32)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 3200)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               819456    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 883,713
Trainable params: 883,713
Non-trainable params: 0
_________________________________________________________________


## 训练模型

In [32]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
train_history = model.fit(x_train, y_train, batch_size=100, epochs=10, verbose=2, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
10s - loss: 0.4775 - acc: 0.7582 - val_loss: 0.4436 - val_acc: 0.7998
Epoch 2/10
8s - loss: 0.2672 - acc: 0.8911 - val_loss: 0.5450 - val_acc: 0.7622
Epoch 3/10
8s - loss: 0.1612 - acc: 0.9412 - val_loss: 0.6631 - val_acc: 0.7504
Epoch 4/10
8s - loss: 0.0832 - acc: 0.9713 - val_loss: 0.9423 - val_acc: 0.7274
Epoch 5/10
8s - loss: 0.0486 - acc: 0.9848 - val_loss: 0.9478 - val_acc: 0.7676
Epoch 6/10
9s - loss: 0.0367 - acc: 0.9867 - val_loss: 1.1657 - val_acc: 0.7426
Epoch 7/10
7s - loss: 0.0296 - acc: 0.9890 - val_loss: 1.1710 - val_acc: 0.7592
Epoch 8/10
7s - loss: 0.0294 - acc: 0.9891 - val_loss: 1.4109 - val_acc: 0.7270
Epoch 9/10
7s - loss: 0.0232 - acc: 0.9919 - val_loss: 1.4333 - val_acc: 0.7380
Epoch 10/10
7s - loss: 0.0241 - acc: 0.9917 - val_loss: 1.8831 - val_acc: 0.6888


## 评估模型准确率

In [34]:
scores = model.evaluate(x_test, y_test, verbose=1)
scores[1]



0.80047999999999997

## 进行预测

In [35]:
predict = model.predict_classes(x_test)



In [36]:
predict[:10]

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1]], dtype=int32)

In [37]:
predict_classes = predict.reshape(-1)
predict_classes[:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

## 查看测试数据预测结果

In [38]:
SentimentDict = {1:'正面的',0:'负面的'}

def display_test_Sentiment(i):
    print(test_text[i])
    print(u'label真实值:', SentimentDict[y_test[i]],u'预测结果:', SentimentDict[predict_classes[i]])

In [39]:
display_test_Sentiment(2)

I would like to know if anyone know how I can get a copy of the movie, "That's the way of the World". It's been about 30 years since I've seen this movie, and I would like to see it again. Earth Wind & Fire transcend the nation globally with their inspirational music and themes. It was unfortunate that this group didn't take off like their counterparts in the early 70's, but as previously stated, racial tension existed in the United States which prohibited equalized exposure for the African American musical groups. It is good to see that Earth Wind & Fire continuing their success. I would like to add this movie to my collection. Someone please help me if possible. Thank you for your attention. Milton Shaw
(u'label\u771f\u5b9e\u503c:', '\xe6\xad\xa3\xe9\x9d\xa2\xe7\x9a\x84', u'\u9884\u6d4b\u7ed3\u679c:', '\xe6\xad\xa3\xe9\x9d\xa2\xe7\x9a\x84')


In [40]:
display_test_Sentiment(12502)

This movie was just horrendous. How could anybody like this movie, and for the ones who liked it because of the jokes, they should really take a long hard look in the mirror and ask themselves if stereotypes are not bad. Ignoring the face of the racial stereotypes, this was just awful. It never had its moments, if it paid homage to 1980's, "Airplane!", it needs to pay some more. Awful acting, terrible script writing, even for a movie with Mo'Nique or Tom Arnold. This movie was bad from the beginning, but people might have seen the whole thing, by the thought that the plane would crash. One of the worse movies ever, stay away.
(u'label\u771f\u5b9e\u503c:', '\xe8\xb4\x9f\xe9\x9d\xa2\xe7\x9a\x84', u'\u9884\u6d4b\u7ed3\u679c:', '\xe8\xb4\x9f\xe9\x9d\xa2\xe7\x9a\x84')


## 查看<美女与野兽>的影评

In [41]:
input_text = '''This movie is very overrated and the reason why the film is popular because of its promotion. Emma Watson performance is really boring as hell and her voice is not that good. I suggest you to watch the original version (1991). Dan Stevens and Luke Evans performance is pretty good tho. Overall, its the worst Disney film i've ever watched.'''

In [42]:
input_seq = token.texts_to_sequences([input_text])

In [43]:
print(input_seq[0])

[10, 16, 6, 51, 2, 1, 279, 134, 1, 18, 6, 1059, 84, 4, 90, 235, 6, 62, 354, 13, 605, 2, 37, 540, 6, 20, 11, 48, 9, 1460, 21, 5, 102, 1, 200, 306, 2, 235, 6, 180, 48, 443, 90, 1, 245, 907, 18, 203, 122, 292]


In [44]:
len(input_seq[0])

50

In [45]:
pad_input_seq = sequence.pad_sequences(input_seq, maxlen=100)

In [46]:
len(pad_input_seq)

1

In [47]:
predict_result = model.predict_classes(pad_input_seq)



In [48]:
print predict_result

[[0]]


In [49]:
predict_result[0][0]

0

In [50]:
SentimentDict[predict_result[0][0]]

'\xe8\xb4\x9f\xe9\x9d\xa2\xe7\x9a\x84'

In [51]:
def predict_review(input_text):
    input_seq = token.texts_to_sequences([input_text])
    pad_input_seq = sequence.pad_sequences(input_seq, maxlen=100)
    predict_result = model.predict_classes(pad_input_seq)
    print(SentimentDict[predict_result[0][0]])

In [52]:
input_text = '''As a fan of the original Disney film (Personally I feel it's their masterpiece) I was taken aback to the fact that a new version was in the making. Still excited I had high hopes for the film. Most of was shattered in the first 10 minutes. Campy acting with badly performed singing starts off a long journey holding hands with some of the worst CGI Hollywood have managed to but to screen in ages.

A film that is over 50% GCI, should focus on making that part believable, unfortunately for this film, it's far from that. It looks like the original film was ripped apart frame by frame and the beautiful hand-painted drawings have been replaced with digital caricatures. Besides CGI that is bad, it's mostly creepy. '''

In [53]:
predict_review(input_text)

负面的


In [54]:
input_text = '''Very much like the cartoon! The singing was really good ... Emma Watson ... what a star! The acting was great. I was in two minds about seeing this as it's my favorite fairy story and my favorite Disney cartoon. I was in tears at the end, even though I knew the story backwards. Why didn't I give it 10 ... The thing that let it down a little for me was the make up of The Beast, I thought it was a little too scary for the film and the wolves were quite a bit nastier than the cartoon version. Young children may be scared by these things.'''

In [55]:
predict_review(input_text)

正面的


## 使用更大的字典提取更多的文字

In [56]:
token = Tokenizer(num_words=3800)
token.fit_on_texts(train_text)

In [57]:
#将文字转为数字序列
x_train_seq =token.texts_to_sequences(train_text)
x_test_seq = token.texts_to_sequences(test_text)
#截长补短,让所有影评所产生的数字序列长度一样
x_train = sequence.pad_sequences(x_train_seq, maxlen=380)
x_test = sequence.pad_sequences(x_test_seq, maxlen=380)

In [58]:
model = Sequential()
model.add(Embedding(output_dim=32, input_dim=3800, input_length=380))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 380, 32)           121600    
_________________________________________________________________
dropout_3 (Dropout)          (None, 380, 32)           0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 12160)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               3113216   
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
Total params: 3,235,073
Trainable params: 3,235,073
Non-trainable params: 0
_________________________________________________________________


In [59]:
pad_input_seq = sequence.pad_sequences(input_seq, maxlen=380)

In [60]:
def predict_review(input_text):
    input_seq = token.texts_to_sequences([input_text])
    pad_input_seq = sequence.pad_sequences(input_seq, maxlen=380)
    predict_result = model.predict_classes(pad_input_seq)
    print(SentimentDict[predict_result[0][0]])

In [62]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [64]:
train_history = model.fit(x_train, y_train, batch_size=100, epochs=10, verbose=2, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
33s - loss: 0.4729 - acc: 0.7571 - val_loss: 0.3087 - val_acc: 0.8760
Epoch 2/10
26s - loss: 0.1915 - acc: 0.9278 - val_loss: 0.5284 - val_acc: 0.7910
Epoch 3/10
25s - loss: 0.0745 - acc: 0.9770 - val_loss: 0.6801 - val_acc: 0.7836
Epoch 4/10
25s - loss: 0.0309 - acc: 0.9916 - val_loss: 0.8268 - val_acc: 0.7976
Epoch 5/10
26s - loss: 0.0143 - acc: 0.9959 - val_loss: 0.8221 - val_acc: 0.8128
Epoch 6/10
25s - loss: 0.0142 - acc: 0.9953 - val_loss: 1.0271 - val_acc: 0.7842
Epoch 7/10
25s - loss: 0.0118 - acc: 0.9963 - val_loss: 1.0786 - val_acc: 0.7938
Epoch 8/10
27s - loss: 0.0172 - acc: 0.9939 - val_loss: 1.0416 - val_acc: 0.8050
Epoch 9/10
26s - loss: 0.0237 - acc: 0.9914 - val_loss: 1.1499 - val_acc: 0.7912
Epoch 10/10
25s - loss: 0.0165 - acc: 0.9945 - val_loss: 0.8838 - val_acc: 0.8412


In [65]:
scores = model.evaluate(x_test, y_test, verbose=1)
scores[1]



0.85096000000000005

## RNN模型介绍

In [66]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import SimpleRNN

In [67]:
model = Sequential()
model.add(Embedding(output_dim=32, input_dim=3800, input_length=380))
model.add(Dropout(0.35))
model.add(SimpleRNN(units=16))
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.35))
model.add(Dense(units=1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 380, 32)           121600    
_________________________________________________________________
dropout_4 (Dropout)          (None, 380, 32)           0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 16)                784       
_________________________________________________________________
dense_5 (Dense)              (None, 256)               4352      
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 257       
Total params: 126,993
Trainable params: 126,993
Non-trainable params: 0
_________________________________________________________________


In [69]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
train_history = model.fit(x_train, y_train, batch_size=100, epochs=10, verbose=2, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
22s - loss: 0.5171 - acc: 0.7415 - val_loss: 0.6779 - val_acc: 0.7142
Epoch 2/10
20s - loss: 0.3267 - acc: 0.8658 - val_loss: 0.3968 - val_acc: 0.8198
Epoch 3/10
18s - loss: 0.2787 - acc: 0.8918 - val_loss: 0.4017 - val_acc: 0.8398
Epoch 4/10
18s - loss: 0.2373 - acc: 0.9076 - val_loss: 0.4254 - val_acc: 0.8258
Epoch 5/10
18s - loss: 0.2056 - acc: 0.9217 - val_loss: 0.5425 - val_acc: 0.7710
Epoch 6/10
18s - loss: 0.1826 - acc: 0.9296 - val_loss: 0.4000 - val_acc: 0.8640
Epoch 7/10
19s - loss: 0.1739 - acc: 0.9324 - val_loss: 0.4764 - val_acc: 0.8280
Epoch 8/10
23s - loss: 0.1347 - acc: 0.9509 - val_loss: 0.5607 - val_acc: 0.8094
Epoch 9/10
19s - loss: 0.1214 - acc: 0.9550 - val_loss: 0.6494 - val_acc: 0.8038
Epoch 10/10
18s - loss: 0.1023 - acc: 0.9609 - val_loss: 0.9864 - val_acc: 0.7594


In [74]:
scores = model.evaluate(x_test, y_test, verbose=1)



In [75]:
scores[1]

0.84116000000000002

## LSTM模型介绍
- RNN的长期依赖问题

    RNN在训练时会有长期依赖问题,这时由于RNN模型在训练时会遇到梯度消失或爆炸的问题.
    
    训练时计算和反向传播,梯度倾向于在每一时刻递增或递减,经过一段时间后,会发散到无穷大或收敛到0.
    
    简单来说,长期依赖问题就是在每一个时间的间隔不断增大时,RNN会丧失学习到的连接到远处的信息的能力.
    
- LSTM介绍

    简单来说,RNN只有短期记忆,没有长期记忆.
    
    LSTM使用闸门机制,使得它有了长期记忆,分别有三种闸门:遗忘门,输入门,输出门.

In [76]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM

In [79]:
model = Sequential()
model.add(Embedding(output_dim=32,input_dim=3800, input_length=380))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dense(units=256,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 380, 32)           121600    
_________________________________________________________________
dropout_6 (Dropout)          (None, 380, 32)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_7 (Dense)              (None, 256)               8448      
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 257       
Total params: 138,625
Trainable params: 138,625
Non-trainable params: 0
_________________________________________________________________


In [80]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
train_history = model.fit(x_train, y_train, batch_size=100, epochs=10, verbose=2, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
206s - loss: 0.4839 - acc: 0.7562 - val_loss: 0.4817 - val_acc: 0.7806
Epoch 2/10
245s - loss: 0.2744 - acc: 0.8916 - val_loss: 0.5515 - val_acc: 0.7522
Epoch 3/10
202s - loss: 0.2283 - acc: 0.9099 - val_loss: 0.3210 - val_acc: 0.8778
Epoch 4/10
193s - loss: 0.2056 - acc: 0.9198 - val_loss: 0.3126 - val_acc: 0.8758
Epoch 5/10
198s - loss: 0.1865 - acc: 0.9287 - val_loss: 0.4749 - val_acc: 0.8120
Epoch 6/10
214s - loss: 0.1635 - acc: 0.9367 - val_loss: 0.5348 - val_acc: 0.7542
Epoch 7/10
194s - loss: 0.1528 - acc: 0.9435 - val_loss: 0.6294 - val_acc: 0.8182
Epoch 8/10
208s - loss: 0.1301 - acc: 0.9514 - val_loss: 0.5522 - val_acc: 0.8318
Epoch 9/10
187s - loss: 0.1191 - acc: 0.9570 - val_loss: 0.4646 - val_acc: 0.8710
Epoch 10/10
192s - loss: 0.1156 - acc: 0.9567 - val_loss: 0.4025 - val_acc: 0.8750


In [81]:
scores = model.evaluate(x_test, y_test, verbose=1)
scores[1]



0.86155999999999999