In [None]:
# coding:utf-8
import keras
from keras import backend as K
from keras.models import Model
from keras.layers import Dense, Input, Activation, Concatenate, Flatten, Dropout, Reshape, BatchNormalization
from keras.layers.convolutional import Conv2D, Conv1D
from keras.layers import LSTM, GRU
from keras.layers import Bidirectional, GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.layers.pooling import MaxPool2D, MaxPool1D
from keras.optimizers import Adam
from keras.metrics import mae, categorical_accuracy
from keras_metric import neg_sparse_categorical_crossentropy, precision, recall
from keras.callbacks import ModelCheckpoint, EarlyStopping
from gensim.models import Word2Vec as w2v

import sys, os
import pickle as pkl
import pandas as pd
import numpy as np
import time

from dataloder import encode_samples, pad_samples
from mymetric import my_confusion_matrix, summary

In [None]:
class LSGNN():
    def __init__(self):
        # Input Shape
        self.sent_len = 100
        self.sent_dim = 32
        self.sent_shape = (self.sent_len, self.sent_dim)
        self.extra_knowledge = 8
        self.drop = 0.4
        self.filter_num = 40
        self.fileter_length=[2,3,4,5] 
        self.hidden_num = 40
        self.cnn_features_shape = (320,)
        
        optimizer = Adam(lr=0.0002, beta_1=0.8)
        
        """Build the knowledge guidance subnetwork"""
        self.knowledge_guidance = self.build_knowledge_guidance()
        self.knowledge_guidance.name = 'kd'
        
        """Build the rumor detector"""
        self.rumor_detector = self.build_rumor_detector()
        self.rumor_detector.name = 'rd'
        
        """Build the feature generator"""
        self.feature_generator = self.build_feature_generator_CNN() 
        
        """Build the combined model"""
        text = Input(shape=self.sent_shape, name='combined_input')
        text_feature = self.feature_generator(text)
        
        """The output of the combined model are consist of rumor detector output and event detector"""
        is_rumor = self.rumor_detector(text_feature)
        re_constract_knowledge = self.knowledge_guidance(text_feature)
        
        """Build and compile the combined model"""
        self.combined = Model(text, [is_rumor, re_constract_knowledge])
        self.combined.compile(loss=['binary_crossentropy','kullback_leibler_divergence'],
            optimizer=optimizer,
            metrics={'rd':'acc',
                     'kd':'mae'})
                

    def build_feature_generator_CNN(self):
        text = Input(shape=self.sent_shape)

        conv1 = Conv1D(self.filter_num, kernel_size=(self.fileter_length[0]), input_shape=(self.sent_len,self.sent_dim), activation="relu")(text)
        conv2 = Conv1D(self.filter_num, kernel_size=(self.fileter_length[1]), input_shape=(self.sent_len,self.sent_dim), activation="relu")(text)
        conv3 = Conv1D(self.filter_num, kernel_size=(self.fileter_length[2]), input_shape=(self.sent_len,self.sent_dim), activation="relu")(text)
        conv4 = Conv1D(self.filter_num, kernel_size=(self.fileter_length[3]), input_shape=(self.sent_len,self.sent_dim), activation="relu")(text)

        maxp1 = GlobalMaxPooling1D()(conv1)
        maxp2 = GlobalMaxPooling1D()(conv2)
        maxp3 = GlobalMaxPooling1D()(conv3)
        maxp4 = GlobalMaxPooling1D()(conv4)

        conv5 = Conv1D(self.filter_num, kernel_size=(3), activation="relu")(conv1) 
        conv6 = Conv1D(self.filter_num, kernel_size=(3), activation="relu")(conv2) 
        conv7 = Conv1D(self.filter_num, kernel_size=(3), activation="relu")(conv3) 
        conv8 = Conv1D(self.filter_num, kernel_size=(3), activation="relu")(conv4) 

        maxp5 = GlobalMaxPooling1D()(conv5)
        maxp6 = GlobalMaxPooling1D()(conv6)
        maxp7 = GlobalMaxPooling1D()(conv7)
        maxp8 = GlobalMaxPooling1D()(conv8)
        
        text_feature = Concatenate(axis=1, name='cnn_feature')([maxp1, maxp2, maxp3, maxp4, 
                                                                maxp5, maxp6, maxp7, maxp8])        
        model = Model(inputs=text, outputs=text_feature)
        return model
        
    
    def build_rumor_detector(self):
        text_feature = Input(shape=self.cnn_features_shape)
        
        x = Dense(64, activation='relu')(text_feature)
        output = Dense(1, activation='sigmoid')(x)
        
        model = Model(inputs=text_feature, outputs = output)
        return model
    
    def build_knowledge_guidance(self):
        text_feature = Input(shape=self.cnn_features_shape)
        
        x = Dense(self.extra_knowledge,activation='linear')(text_feature)
        x = Dense(self.extra_knowledge, activation='softmax')(x)
        model = Model(inputs=text_feature, outputs=x)
        return model
    
    
    
    # ============================ Train Methods ================================ #
    def train_global(self, X_train, y_rumor_train, y_ek_train, X_val, y_rumor_val, y_ek_val, epoches=200, batch_size=128):
        timestr = time.strftime('%m%d@%H')
        param_path = './'
        global_param_path = param_path + timestr + '_lsgnn.hdf5'

        earlystop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
        global_checkpoint = ModelCheckpoint(global_param_path, monitor='val_loss', 
                                            verbose=0, save_best_only=True, mode='auto')
        global_callbacks_list = [global_checkpoint, earlystop]

        g_loss = self.combined.fit(X_train, [y_rumor_train, y_ek_train],
            batch_size=batch_size, validation_data=[X_val, [y_rumor_val, y_ek_val]], 
            epochs=epoches, verbose=2, shuffle=True, callbacks=global_callbacks_list)
        return g_loss

## Train SGBNN

In [None]:
def encode_text(text_cut, maxlen=100):
    w2v_path = '../model/word_emb_32.ell'
    w2vmodel = w2v.load(w2v_path)
    X = encode_samples(text_cut, w2vmodel)
    X = pad_samples(X, maxlen)
    X = np.array(X)
    return X

def encode_label(label):
    return np.array(label)

In [None]:
def load_features_name():
    feature_list = []
    f = open("../model/manual_feature.txt", "r")
    for line in f:
        feature_list.append(line.strip())
    f.close()
    return feature_list

## 1. Load Data

In [None]:
high_features = ['interactivity', 'interestingness', 'moving', 'persuasive', 'logic', 'readability', 'formality','Integrity1']
text_features = list(set(load_features_name()).difference(set(high_features)))

data = pd.read_csv('../data/ictmcg_train.csv',header=0)
text_cut_train = data.seg.values.tolist()
y_rumor_train = data.label.values.tolist()
y_ek_train = data[high_features].values.tolist()
X_train = encode_text(text_cut_train)
y_rumor_train = encode_label(y_rumor_train)
y_ek_train = encode_label(y_ek_train)

In [None]:
data_val = pd.read_csv('../data/ictmcg_val.csv',header=0)
text_cut_val = data_val.seg.values.tolist()
y_rumor_val = data_val.label.values.tolist()
y_ek_val = data_val[high_features].values.tolist()
X_val = encode_text(text_cut_val)
y_rumor_val = encode_label(y_rumor_val)
y_ek_val = encode_label(y_ek_val)

## 2. Train

In [None]:
lsgnn = LSGNN()
loss = lsgnn.train_global(X_train, y_rumor_train, y_ek_train, 
                 X_val, y_rumor_val, y_ek_val, epoches=100, batch_size=128)

## Test

In [None]:
data_test = pd.read_csv('../data/ictmcg_test.csv',header=0)
text_cut_test = data_test.seg.values.tolist()
y_rumor_test = data_test.label.values.tolist()
y_ek_test = data_test[high_features].values.tolist()
X_test = encode_text(text_cut_test)
y_rumor_test = encode_label(y_rumor_test)
y_ek_test = encode_label(y_ek_test)

In [None]:
def show_eva(evalist, metrics_name):
    print('===========================')
    for i in range(len(metrics_name)):
        print('%s: %.4f' %(metrics_name[i],evalist[i]))

In [None]:
global_eva = lsgnn.combined.evaluate(X_test, [y_rumor_test, y_ek_test])
y_rumor_pred, y_ek_pred = sgbnn.combined.predict(X_test)
y_rumor_predict = [1 if x > 0.5 else 0 for x in y_rumor_pred]
show_eva(global_eva, sgbnn.combined.metrics_names)
summary(y_rumor_test, y_rumor_predict)
my_confusion_matrix(y_rumor_test, y_rumor_predict)