In [5]:
import tensorflow as tf
import os

gpu_device_name = tf.test.gpu_device_name()
print(gpu_device_name)

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import pandas as pd

dataset = pd.read_excel('1617241934831197.xlsx', sheet_name = 'All', header = 0, names = ['content', 'channelName', 'title'], keep_default_na = False, engine = 'openpyxl').astype(str)
indices = dataset.loc[dataset['content'] == ''].index.values
dataset.drop(indices, axis = 0)

dataset['news'] = dataset['title'] + '!' + dataset['content']

import sklearn
from sklearn.model_selection import train_test_split
import jieba
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

process = lambda x: list(jieba.cut(x))
dataset['tokenized'] = dataset['news'].apply(process)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(dataset['tokenized'])
vocab = tokenizer.word_index

kind = { '财经': 0, '房产': 1, '教育': 2, '科技': 3, '军事': 4, '汽车': 5, '体育': 6, '综合体育最新': 6, '体育焦点': 6, '游戏': 7, '娱乐': 8 }
label = []

for channel in dataset['channelName']:
    label.append(kind[channel])

x_train, x_test, y_train, y_test = train_test_split(dataset['tokenized'], label, test_size = 0.1)
x_train_word_ids = tokenizer.texts_to_sequences(x_train)
x_test_word_ids = tokenizer.texts_to_sequences(x_test)

x_train_padded_seqs = pad_sequences(x_train_word_ids, maxlen = 128)
x_test_padded_seqs = pad_sequences(x_test_word_ids, maxlen = 128)

import gensim
from gensim.models.word2vec import Word2Vec
import numpy as np

model = gensim.models.KeyedVectors.load_word2vec_format('model/baike_26g_news_13g_novel_229g.bin', binary = True)
embedding_matrix = np.zeros((len(vocab) + 1, 128))

for word, i in vocab.items():
    try:
        embedding_vector = model[str(word)]
        embedding_matrix[i] = embedding_vector
    except KeyError:
        continue
        
from tensorflow.keras import layers, optimizers, metrics, regularizers
import tensorflow.keras.backend as K
from tensorflow.keras.utils import plot_model

def mish(x):
    return x * K.tanh(K.softplus(x))

def my_loss(y_true, y_pred, e = 0.1):
    loss1 = K.categorical_crossentropy(y_true, y_pred)
    loss2 = K.categorical_crossentropy(K.ones_like(y_pred) / 9, y_pred)
    loss = (1 - e) * loss1 + e * loss2
    
    return loss

def cosine_decay_with_warmup(global_step, learning_rate_base, total_steps, warmup_learning_rate = 0.0, warmup_steps = 0, hold_base_rate_steps = 0):
    learning_rate = 0.5 * learning_rate_base * (1 + np.cos(np.pi * (global_step - warmup_steps - hold_base_rate_steps) / float(total_steps - warmup_steps - hold_base_rate_steps)))
    
    if hold_base_rate_steps > 0:
        learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps, learning_rate, learning_rate_base)
        
    slope = (learning_rate_base - warmup_learning_rate) / warmup_steps
    warmup_rate = slope * global_step + warmup_learning_rate
    learning_rate = np.where(global_step < warmup_steps, warmup_rate, learning_rate)
    
    return np.where(global_step > total_steps, 0.0, learning_rate)

class WarmUpCosineDecayScheduler(keras.callbacks.Callback):
    def __init__(self, learning_rate_base, total_steps, global_step_init = 0, warmup_learning_rate = 0.0, warmup_steps = 0, hold_base_rate_steps = 0, verbose = 0):
        super(WarmUpCosineDecayScheduler, self).__init__()
        self.learning_rate_base = learning_rate_base
        self.total_steps = total_steps
        self.global_step = global_step_init
        self.warmup_learning_rate = warmup_learning_rate
        self.warmup_steps = warmup_steps
        self.hold_base_rate_steps = hold_base_rate_steps
        self.verbose = verbose
        self.learning_rates = []
        
    def on_batch_end(self, batch, logs = None):
        self.global_step = self.global_step + 1
        lr = K.get_value(self.model.optimizer.lr)
        self.learning_rates.append(lr)
        
    def on_batch_begin(self, batch, logs = None):
        lr = cosine_decay_with_warmup(global_step = self.global_step, learning_rate_base = self.learning_rate_base, total_steps = self.total_steps, warmup_learning_rate = self.warmup_learning_rate, warmup_steps = self.warmup_steps, hold_base_rate_steps = self.hold_base_rate_steps)
        K.set_value(self.model.optimizer.lr, lr)
        
def text_cnn(x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, embedding_matrix):
    inlet = keras.Input(shape = (128,), dtype = 'float64')
    embedder = layers.Embedding(len(vocab) + 1, 128, input_length = 128, weights = [embedding_matrix], trainable = False)
    embed = embedder(inlet)

    cnn1 = layers.Conv1D(256, 3, padding = 'same', strides = 1, kernel_regularizer = regularizers.l2(0.01))(embed)
    cnn1 = layers.BatchNormalization()(cnn1)
    cnn1 = layers.Activation(mish)(cnn1)
    cnn1 = layers.MaxPooling1D(pool_size = 32)(cnn1)
    cnn2 = layers.Conv1D(256, 4, padding = 'same', strides = 1, kernel_regularizer = regularizers.l2(0.01))(embed)
    cnn2 = layers.BatchNormalization()(cnn2)
    cnn2 = layers.Activation(mish)(cnn2)
    cnn2 = layers.MaxPooling1D(pool_size = 31)(cnn2)
    cnn3 = layers.Conv1D(256, 5, padding = 'same', strides = 1, kernel_regularizer = regularizers.l2(0.01))(embed)
    cnn3 = layers.BatchNormalization()(cnn3)
    cnn3 = layers.Activation(mish)(cnn3)
    cnn3 = layers.MaxPooling1D(pool_size = 30)(cnn3)
    
    cnn = K.concatenate([cnn1, cnn2, cnn3], axis = -1)
    
    flat = layers.Flatten()(cnn)
    drop = layers.Dropout(0.5)(flat)
    outlet = layers.Dense(9, activation = 'softmax')(drop)
    model = keras.Model(inputs = inlet, outputs = outlet)
    opt = optimizers.SGD(lr = 0.001, momentum = 0.9, decay = 0.0, nesterov = True)
    model.compile(loss = my_loss, optimizer = opt, metrics = ['accuracy'])
    plot_model(model, to_file = 'model/model.png', show_shapes = True, show_layer_names = False)
    
    sample_count = 13168
    epochs = 16
    warmup_epoch = 8
    batch_size = 16
    learning_rate_base = 0.01
    total_steps = int(epochs * sample_count / batch_size)
    warmup_steps = int(warmup_epoch * sample_count / batch_size)
    warmup_batches = warmup_epoch * sample_count / batch_size
    
    warm_up_lr = WarmUpCosineDecayScheduler(learning_rate_base = learning_rate_base, total_steps = total_steps, warmup_learning_rate = 4e-06, warmup_steps = warmup_steps, hold_base_rate_steps = 5)
    
    one_hot_labels = keras.utils.to_categorical(y_train, num_classes = 9)
    model.fit(x_train_padded_seqs, one_hot_labels, batch_size = 16, epochs = 16, callbacks = [warm_up_lr])
    model.save('model/textcnn.h5')
    result = model.predict(x_test_padded_seqs) 
    result_labels = np.argmax(result, axis = 1)
    # y_predict = list(map(str, result_labels))
    # print('accuracy', sklearn.metrics.accuracy_score(y_test, y_predict))
    # print('average f1-score:', sklearn.metrics.f1_score(y_test, y_predict, average = 'weighted'))
    print('accuracy', sklearn.metrics.accuracy_score(y_test, result_labels))
    print('average f1-score:', sklearn.metrics.f1_score(y_test, result_labels, average = 'weighted'))
    
if __name__=='__main__':
    text_cnn(x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, embedding_matrix)

/device:GPU:0
Train on 13168 samples
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
accuracy 0.9426229508196722
average f1-score: 0.9423758849528252
