In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from keras.layers import Input, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder

In [2]:
train = pd.read_csv('simplified_emotions.csv')
test = pd.read_csv('test.csv', delimiter='\t')

In [3]:
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4", trainable=True)

In [4]:
train_happiness = train[train['Emotions'] == 'happiness']

In [5]:
train_other = train[train['Emotions'] == 'other']

In [6]:
train_other = train_other.sample(train_happiness.shape[0])

In [7]:
print(train_happiness.shape, train_other.shape)

(179858, 3) (179858, 3)


In [8]:
train_balanced = pd.concat([train_happiness, train_other])

In [9]:
train_balanced

Unnamed: 0.1,Unnamed: 0,Sentences,Emotions
0,1,Dorian Gray with Rainbow Scarf #LoveWins (from...,happiness
1,2,@SelectShowcase @Tate_StIves ... Replace with ...,happiness
2,3,@Sofabsports thank you for following me back. ...,happiness
3,4,@britishmuseum @TudorHistory What a beautiful ...,happiness
4,5,@NationalGallery @ThePoldarkian I have always ...,happiness
...,...,...,...
420598,26853,Your 17. End of conversation. You don’t love him,other
283920,10105,i asked him feeling a bit rejected,other
36963,35334,i feel i am forever depressed and there is a c...,other
223300,49595,i feel really shitty bout them at first but th...,other


In [10]:
train_balanced['Emotions'] = train_balanced['Emotions'].apply(lambda x: 1 if x=='happiness' else 0)

In [11]:
X_train = train_balanced['Sentences']
y_train = train_balanced['Emotions']

X_test = test['sentence']

In [12]:
y_train

0         1
1         1
2         1
3         1
4         1
         ..
420598    0
283920    0
36963     0
223300    0
16153     0
Name: Emotions, Length: 359716, dtype: int64

In [13]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [14]:
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

dropout = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
output = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(dropout)

model = tf.keras.Model(inputs=text_input, outputs = output)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer_1 (KerasLayer)     {'input_mask': (Non  0           ['text[0][0]']                   
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

In [15]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics='accuracy')

In [16]:
early_stopping = EarlyStopping(monitor='val_loss', patience=1, restore_best_weights=True)

In [17]:
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
  19/4497 [..............................] - ETA: 51:53 - loss: 0.6869 - accuracy: 0.5822

KeyboardInterrupt: 

In [None]:
predictions = model.predict(X_test)
predictions = predictions.flatten()

predictions = np.where(predictions > 0.5, 1, 0)
predictions = ['happiness' if pred == 1 else 'other' for pred in predictions]

In [None]:
submission = pd.DataFrame({'id': test['id'], 'emotion': predictions})

submission