In [None]:
import csv
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.utils import plot_model
from keras.models import Model
from keras.layers import *
from keras.layers.merge import concatenate
from keras.layers.wrappers import Bidirectional
from keras.callbacks import ModelCheckpoint
from preprocess import load_data
import keras.backend as K
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support

In [None]:
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.keras.backend import set_session
tf.keras.backend.clear_session()  # For easy reset of notebook state.
config_proto = tf.ConfigProto()
off = rewriter_config_pb2.RewriterConfig.OFF
config_proto.graph_options.rewrite_options.arithmetic_optimization = off
session = tf.Session(config=config_proto)
set_session(session)


In [None]:
maxlen = 75
hashlen = 3
emb_dim = 300

print("Loading data.....")

vocab_size, train_tweet, train_hash, train_labels, val_tweet, val_hash, val_labels, test_tweet, test_hash, test_labels, word_matrix, val_tweetId, val_userId, val_original_tweet  = load_data()
print("Data Loading Completed")

In [None]:

tweet = Input(shape = (75,),name="tweet")
tweet_embeddings = Embedding(input_dim=vocab_size, output_dim=emb_dim, weights=[word_matrix], input_length=maxlen, name="Glove")(tweet)

hashtag = Input(shape = (3,),name="hashtag")
hash_embeddings = Embedding(input_dim=vocab_size, output_dim=emb_dim, weights=[word_matrix], input_length=3, name="Glove_")(hashtag)#hashlen

In [None]:
#view tweet
conv = Conv1D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu')(tweet_embeddings)
pool = MaxPooling1D(pool_size=2, padding='same')(conv)
lstm = Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.1))(pool)
drop = Dropout(0.2)(lstm)
dense1 = Dense(200)(drop)
f1 = Dense(3,activation="softmax")(dense1)

#view hashtag
conv_h = Conv1D(filters=256, kernel_size=3, strides=1, padding='same', activation='relu')(hash_embeddings)
pool_h = MaxPooling1D(pool_size=2, padding='same')(conv_h)
lstm_h = Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.1))(pool_h)
drop_h = Dropout(0.2)(lstm_h)
dense2 = Dense(50)(drop_h)
f2 = Dense(3,activation="softmax")(dense2)

In [None]:
dense = concatenate([dense1, dense2])

output = Dense(3,activation="softmax")(dense)

model = Model(inputs=[tweet, hashtag], outputs=output)
model.summary()

plot_model(model, to_file="saves/SingleView.png")

In [None]:
#path="SingleView.hdf5"
#checkpoint = ModelCheckpoint(filepath=path, monitor='val_sparse_categorical_accuracy', verbose=1, save_best_only=True, mode='max')
#callbacks_list = [checkpoint]
history = model.fit([train_tweet, train_hash], train_labels,
                    batch_size=250,
                    epochs=10)
                    #,
                    #callbacks=callbacks_list)
                    #,
                    #validation_data=([val_tweet, val_hash], val_labels))


In [None]:
###predictions###
#model.load_weights(path)
probability = model.predict([val_tweet, val_hash])
predictions = np.argmax(probability,axis=1)
predictionsa = predictions + 1

#val_tweetId, val_userId, val_original_tweet


with open('L1/res/prediction_task5.tsv', 'w+', newline='') as f_output:
    tsv_output = csv.writer(f_output, delimiter='\t')
    tsv_output.writerow(["tweet_id", "user_id", "tweet", "Class"])
    for i in range(len(predictions)):
        data = []
        data.append(val_tweetId[i])
        data.append(val_userId[i])
        data.append(val_original_tweet[i])
        data.append(predictionsa[i])
        tsv_output.writerow(data)

f_output.close()

In [None]:
target_names = ['defect', 'possible defect', 'no defect']

print(classification_report(val_labels, predictions, target_names=target_names))