In [41]:
!pip install tensorflow_text
!pip install tensorflow_addons



In [42]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import pickle
import numpy as np
import pandas as pd
import tensorflow_addons as tfa

In [43]:
gpus = tf.config.list_physical_devices('GPU')

In [44]:
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [45]:
df = pd.read_csv("DesireDBPreprocessed.csv")
df.head(5)

Unnamed: 0.1,Unnamed: 0,Content,Label
0,0,"Ah, it's the weekend again. This has become a ...",Fulfilled
1,1,It was hectic. Then on top of that the one and...,Unfulfilled
2,2,"Being a groomsman, I really didn't get the cha...",Fulfilled
3,3,Before justice started school I had an idea to...,Fulfilled
4,4,So for some reason (I dunno wad) I was under t...,Unfulfilled


In [46]:
df.groupby('Label').describe()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Fulfilled,1950.0,1915.909744,1052.956578,0.0,992.5,1968.5,2854.75,3586.0
Unfulfilled,1126.0,1633.002664,946.576897,1.0,851.5,1590.5,2365.75,3587.0
Unknown,512.0,1680.259766,1097.551201,6.0,643.25,1618.5,2730.5,3584.0


In [47]:
df['Label'].value_counts()

Fulfilled      1950
Unfulfilled    1126
Unknown         512
Name: Label, dtype: int64

In [48]:
X = df.iloc[:, 1 :-1].values
y = df.iloc[:, -1].values

In [49]:
LABELS = [
    'admiration',
    'amusement',
    'anger',
    'annoyance',
    'approval',
    'caring',
    'confusion',
    'curiosity',
    'desire',
    'disappointment',
    'disapproval',
    'disgust',
    'embarrassment',
    'excitement',
    'fear',
    'gratitude',
    'grief',
    'joy',
    'love',
    'nervousness',
    'optimism',
    'pride',
    'realization',
    'relief',
    'remorse',
    'sadness',
    'surprise',
    'neutral',
]

In [50]:
def values_to_label(values):

  maxim = -1 
  index = -1

  for i in range(len(values)):
    if values[i] > maxim: 
      maxim = values[i] 
      index = i

  # print(LABELS[index])

  return LABELS[index]


In [51]:
X_with_emotion = []

In [52]:
with open('out_visual.pickle', 'rb') as fd:
    w = pickle.load(fd)

    for i in range(len(X)):

      label = values_to_label(w[i][1])
      x_simple = X[i].tolist()
      x_simple[0] = x_simple[0] + " " + label
      X_with_emotion.append(x_simple)


In [53]:
markers = {"accordingly": 0, "so": 0,"ultimately": 0,"finally": 0, "rather": 0, "yet": 0, "although": 0, "but": 0}

In [54]:
for story in X_with_emotion:
  word_list = story[0].split()
  for word in word_list:
    if word in markers:
      markers[word] += 1
  story[0] += " "+str(sum(list(markers.values())[:4]))
  story[0] += " "+str(sum(list(markers.values())[-4:]))

  markers = {k: 0 for k in markers}

In [55]:
X_with_emotion[:2]

[["Ah, it's the weekend again. This has become a sort of weekend blog, hasn't it? For Saturday morning's activity, I decided to organize a small team for trekking the nice and simple route of Kranji Memorial trek once again. However, as the weather gods would have it, I woke up to ominous looking skies. Soon, it started pouring and one by one, people decided to back out. Finally, only Jane, Felix, Jervais and I were left. But hey, the weather turned out to be really awsome! Raving to go, people! neutral 0 0"],
 ["It was hectic. Then on top of that the one and ONLY day I have ever worn a skirt and pantyhose to work, I had to spend the afternoon crawling on the floor under my desk plugging shit in. I am not a dressy person, I don't like dresses and skirts. I would much rather wear pants, and I certainly do not like pantyhoes or stockings of any kind. But I live in the south, and ladies are expected to wear them if they are showing any leg at all, it is just not socially acceptable in my 

In [56]:
X_with_emotion = [x for x,label in zip(X_with_emotion,y) if label =='Fulfilled' or label =='Unfulfilled']
y = [label for label in y if label =='Fulfilled' or label =='Unfulfilled']

In [57]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_with_emotion,y,test_size=0.2, random_state=42)

In [58]:
len(X_train)

2460

In [59]:
label_to_id= {'Fulfilled' : 0, 'Unfulfilled' : 1}

In [60]:
y_train = [label_to_id[label] for label in y_train]

In [61]:
y_train = tf.one_hot(y_train, depth = 2)

In [62]:
handle_encoder = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4"
handle_preprocess = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"

In [63]:
tf.random.set_seed(0)

In [64]:
def build_classifier_model():
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessing_layer = hub.KerasLayer(handle_preprocess, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(handle_encoder, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.2)(net)
  # net = tf.keras.layers.Dense(1000, activation = 'relu', kernel_initializer=tf.keras.initializers.HeNormal())(net)
  net = tf.keras.layers.Dense(2, activation='softmax', name='classifier')(net)
  return tf.keras.Model(text_input, net)

In [65]:
loss = tf.keras.losses.CategoricalCrossentropy()

In [66]:
METRICS = [
      'accuracy',
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]

In [67]:
MyAdamW = tfa.optimizers.extend_with_decoupled_weight_decay(tf.keras.optimizers.Adam)


In [68]:
# 600 15 epoci 
# 500 15 epoci 77%
# 500 30 epoci 78% 6.1649e-04

In [69]:
epochs = 5
steps_per_epoch = len(X_train)
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.1*num_train_steps)

init_lr = 5e-5
# optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    init_lr,
    decay_steps=500,
    decay_rate=0.85,
    staircase=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# optimizer = MyAdamW(weight_decay=0.0001, learning_rate=5e-5)


In [70]:
model = build_classifier_model()
model.compile(
    optimizer = optimizer,
    loss = loss, 
    metrics = METRICS
)

In [71]:
history = model.fit(np.array(X_train), np.array(y_train), batch_size=32, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [73]:
model.save_weights('BertTunedOptimizedBinary4')

In [74]:
y_test = [label_to_id[label] for label in y_test]

In [75]:
y_test = tf.one_hot(y_test, depth = 2)

In [76]:
model.evaluate(np.array(X_test), np.array(y_test))



[1.7333556413650513,
 0.7483766078948975,
 0.7483766078948975,
 0.7483766078948975]

In [77]:
y_predicted = model.predict(X_test)

In [78]:
y_predicted_modeled = tf.math.argmax(y_predicted,axis=1)

In [79]:
np.unique(y_predicted_modeled)

array([0, 1])

In [80]:
y_test_modeled =  tf.math.argmax(y_test,axis=1)

In [81]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test_modeled, y_predicted_modeled)
cm

array([[313,  83],
       [ 72, 148]])