### 对keras内置路透社文本数据集进行内容分类（要求模型至少包含有两要素：CNN 、 RNN、 注意力机制）
即：Conv + RNN 或RNN + 注意力机制 或 CNN + 注意力机制
路透社数据集：<BR>
路透社数据集包含许多短新闻及其对应的主题，由路透社在1986年发布。包括46个不同的主题，其中某些主题的样本更多，但是训练集中的每个主题都有至少10个样本。<BR>
与IMDB数据集一样，路透社数据集也内置到了Keras库中，并且已经经过了预处理。<BR>
#### 提示：
由于文本较长，先用CNN卷积上采样到较短长度，再用RNN处理是一个避免梯度消失的方案。<BR>
    (由于卷积核为一维，卷积核大小要相应增大到5或7，stride增加到3或5)。<BR>
引入注意力机制是另一种克服遗忘的方案。<BR>
采用pytorch框架的同学，也利用keras读取数据集内容后进行训练
#### 要求：
利用callback将最佳模型保存到文件(注意：在"save"目录下建立以自己学号命名的子目录，然后在该子目录下保存文件)，
最后对最佳模型进行指标评估，展示混淆矩阵
#### 数据读取方法：
(x_train, y_train), (x_test, y_test) = keras.datasets.reuters.load_data(num_words=10000, test_split=0.2)

#### 考核办法：
1）程序功能完成度<BR>
2）计算得到的准确率为指标，准确率达到0.7为及格成绩起点，0.8以上优秀<BR>
score = model.evaluate(x_test, y_test)


In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.datasets import reuters
import numpy as np

(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=10000, test_split=0.2)
len(x_train),len(x_test)

In [None]:
# import random

# def eda(data,y,maxlen):
#     # new_data = data.copy()
#     for i in range(len(data)):
#         if len(data[i])>30:
#             data_ex = list(data[i].copy())
#             change_list = random.sample(range(len(data[i])-1),4)
#             temp = data_ex[change_list[0]]
#             data_ex[change_list[0]] = data_ex[change_list[1]]
#             data_ex[change_list[1]] = temp
#             del data_ex[change_list[2]]
#             del data_ex[change_list[3]]
#             # new_data[i] = data_ex
#         data = np.append(data,data_ex)
#         y = np.append(y,y[i])
#     # data = np.vstack((data,new_data))
    
#     # for i in range(len(data)):
#     #     while len(data[i])<maxlen:
#     #         data[i] = np.append(data[i],data[i])
#     return data,y

# # tx,ty = eda(x_train[:2],y_train[:2],350)
# # tx,ty
# np.array(x_train)

In [None]:
word_idx = reuters.get_word_index()
idx_to_word = {v:k for k,v in word_idx.items()}


from keras_preprocessing import sequence
from keras.utils.np_utils import to_categorical
import numpy as np

max_len = 350

x_train_ex = x_train.copy()
x_test_ex = x_test.copy()
for i in range(len(x_train)):
    while len(x_train_ex[i])<max_len:
        x_train_ex[i] = np.append(x_train_ex[i],x_train[i])
for i in range(len(x_test)):
    while len(x_test_ex[i])<max_len:
        x_test_ex[i] = np.append(x_test_ex[i],x_test[i])

x_train_pad = sequence.pad_sequences(x_train_ex,maxlen=max_len)
x_test_pad = sequence.pad_sequences(x_test_ex,maxlen=max_len)

y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

In [None]:
from keras.layers import Dense, Lambda, dot, Activation, concatenate
from keras.layers import Layer

class Attention(Layer):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __call__(self, hidden_states):
        hidden_size = int(hidden_states.shape[2])
        score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)

        h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
        score = dot([score_first_part, h_t], [2, 1], name='attention_score')
        attention_weights = Activation('softmax', name='attention_weight')(score)

        context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector')
        pre_activation = concatenate([context_vector, h_t], name='attention_output')
        attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
        return attention_vector

In [None]:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,Bidirectional
from keras.layers import GRU,LSTM,Conv1D,MaxPooling1D
from keras import regularizers

model = Sequential()
model.add(Embedding(input_dim=8192,output_dim=1024,input_length=max_len))
model.add(Conv1D(512,kernel_size=7,strides=5,activation='relu'))
# model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=32))
# model.add(Dense(64,activation='relu'))
model.add(Bidirectional(LSTM(64,dropout=0.5,return_sequences=True)))
model.add(Attention())
model.add(Dense(46,activation='softmax'))

model.summary()

In [None]:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,Bidirectional,Attention
from keras.layers import GRU,LSTM,Conv1D,MaxPooling1D

model = Sequential()
model.add(Embedding(input_dim=8000,output_dim=128,input_length=max_len))
model.add(Conv1D(64,kernel_size=7,strides=5,activation='relu'))
model.add(MaxPooling1D(pool_size=4))
model.add(Dropout(0.2))
# model.add(GRU(32,return_sequences=True,dropout=0.1))
model.add(GRU(64,dropout=0.1))
model.add(Dense(46,activation='softmax'))

model.summary()

In [None]:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,Bidirectional,Attention
from keras.layers import GRU,LSTM,Conv1D,MaxPooling1D
from keras import regularizers

model = Sequential()
model.add(Embedding(input_dim=1024,output_dim=128,input_length=max_len))
model.add(Conv1D(64,kernel_size=7,strides=5,activation='relu'))
# model.add(MaxPooling1D(pool_size=4))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.2))
model.add(Dense(46,activation='softmax'))

model.summary()

In [None]:
from tensorflow.keras.optimizers import Adam,SGD

opt = Adam(learning_rate=1e-3,decay=1e-2)
# opt = SGD(learning_rate=0.1,decay=0.07,momentum=0.9,nesterov=True)
model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint

savepath = 'save/3200102349/project2.h5'
checkpoint = ModelCheckpoint(filepath=savepath,monitor='val_accuracy',
                             verbose=0,save_best_only=True)

In [None]:
history = model.fit(x_train_pad,y_train_onehot,validation_data=(x_test_pad,y_test_onehot),
                    shuffle=True,batch_size=32,epochs=40,verbose=1,callbacks=[checkpoint])

In [None]:
import matplotlib.pyplot as plt
def show_train_history(history, train, validation):
    plt.plot(history.history[train])
    plt.plot(history.history[validation])
    plt.title('train history')
    plt.ylabel(train)
    plt.xlabel('epoch')
    plt.legend(['train','validation'], loc='upper left')
    plt.grid()
    plt.show()

show_train_history(history,'accuracy','val_accuracy')
show_train_history(history,'loss','val_loss')

In [None]:
from keras.models import load_model

# bestmodel = load_model(filepath=savepath)
# score = bestmodel.evaluate(x_test_pad,y_test_onehot)
model.load_weights(savepath)
score = model.evaluate(x_test_pad,y_test_onehot)
print('score: %.3f' % score[1])

In [None]:
import numpy as np
import pandas as pd

prediction = np.argmax(bestmodel.predict(x_test_pad),axis=-1)

test_labels = y_test
pd.crosstab(test_labels,prediction,rownames=['label'],colnames=['predict'])