In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from keras import backend as K
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer
import tensorflow_hub as hub
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
# Create a custom layer that allows us to update weights (lambda layers do not have trainable parameters!)

class ElmoEmbeddingLayer(Layer):
    def __init__(self, **kwargs):
        self.dimensions = 1024
        self.trainable=True
        super(ElmoEmbeddingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
                               name="{}_module".format(self.name))

        self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
        super(ElmoEmbeddingLayer, self).build(input_shape)

    def call(self, x, mask=None):
        result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                      as_dict=True,
                      signature='default',
                      )['default']
        return result

    def compute_mask(self, inputs, mask=None):
        return K.not_equal(inputs, '--PAD--')

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.dimensions)

In [None]:
def build_model(): 
    input_text = layers.Input(shape=(1,), dtype="string")
    embedding = ElmoEmbeddingLayer()(input_text)
    dense = layers.Dense(256, activation='relu')(embedding)
    pred = layers.Dense(3, activation='softmax')(dense)

    model = Model(inputs=[input_text], outputs=pred)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
  
    return model

In [None]:
train_set  = pd.read_csv('../input/train.csv',low_memory = False)
test_set = pd.read_csv('../input/test.csv',low_memory = False)

In [None]:
test_set.head()

In [None]:
relevant_text_train = []
relevant_text_test = []
for index,row in train_set.iterrows():
    drug_name = train_set['drug'][index]
    sentences = train_set['text'][index].lower().split('.')
    relevant_sentences = []
    for sentence in sentences:
        if drug_name in sentence:
            relevant_sentences.append(sentence)
            #print(' '.join(relevant_sentences))
    relevant_text_train.append(' '.join(relevant_sentences))
train_set['text'] = relevant_text_train
print(test_set['text'][0:5])
for index,row in test_set.iterrows():
    drug_name = test_set['drug'][index]
    #print(drug_name)
    sentences_test = test_set['text'][index].lower().split('.')
    #print(sentences_test)
    relevant_sentences_test = []
    for sentence_test in sentences_test:
        if drug_name in sentence_test:
            #print(sentence_test)
            relevant_sentences_test.append(sentence_test)
    #print(relevant_sentences_test)
    relevant_text_test.append(' '.join(relevant_sentences_test))

In [None]:
train_set['text'] = relevant_text_train
test_set['text'] = relevant_text_test

In [None]:
test_set.head()

In [None]:
train_text = train_set['text'].tolist()
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
train_label = train_set['sentiment'].tolist()
#test_text = test_set['sentence'].tolist()
#test_text = np.array(test_text, dtype=object)[:, np.newaxis]
#test_label = test_df['sentiment'].tolist()


In [None]:
def onehot(y):
    #creating a sparse matrix for every output
    dict_one_hot = {'0':0,'1':1,'2':2}
    Y = np.zeros((len(y),3))
    for i in range(len(y)):
        Y[i, int(dict_one_hot[str(y[i])])] = 1
    return Y

In [None]:
def reverse_mapping_predict(predict):
    predict_mapping = []
    dict_one_hot = {'0':0,'1':1,'2':2}
    for i in predict:
        pred = []
        index = i.argsort()[-1:][::-1]
        for j in index:
            label = int(dict_one_hot[str(j)])
            prob = i[j]
            pred.extend([label,prob])
        predict_mapping.append(pred)
    pred_df = pd.DataFrame(predict_mapping, columns = ['Prediction1','Probability1'])
    return pred_df

In [None]:
#from keras.utils import to_categorical
train_label_one_hot = onehot(train_label)

In [None]:

model = build_model()
model.fit(train_text, 
          train_label_one_hot,
          #validation_data=(test_text, test_label),
          epochs=3,
          batch_size=1)
model.save('Elmo_Model_drug_sentiment_analysis.h5')


In [None]:
test_text = test_set['text'].tolist()
test_text = np.array(test_text, dtype=object)[:, np.newaxis]

pre_save_preds = model.predict(test_text)
all_pred = reverse_mapping_predict(pre_save_preds)
df_pred = pd.concat([test_set.reset_index(drop = True),all_pred.reset_index(drop = True)], axis = 1)
df_pred.to_csv('AV_innoplexus_predictions.csv',index = False)

In [None]:
print(os.listdir("../input"))

In [None]:
print(os.listdir(os.getcwd()))

In [None]:
model_2 = build_model()
model_2.load_weights('Elmo_Model_drug_sentiment_analysis.h5')
test_text2 = test_set['text'].tolist()
test_text2 = np.array(test_text2, dtype=object)[:, np.newaxis]

post_save_preds = model_2.predict(test_text2)
all_pred_2 = reverse_mapping_predict(post_save_preds)
df_pred_2 = pd.concat([test_set.reset_index(drop = True),all_pred_2.reset_index(drop = True)], axis = 1)
df_pred_2.to_csv('AV_innoplexus_predictions_2.csv',index = False)