# Bidirectional LSTM for Value Aligned AI Agent

Unzip .tar file containing moral stories (training data)

In [1]:
import tarfile

#Unzip .tar file containing moral stories
moral_file = '/content/moral_stories_datasets.tar.xz'

moral_stories = tarfile.open(moral_file)
moral_stories.extractall()
moral_stories.close()

Generate dataframes for training and testing portions of moral stories (training data)

In [2]:
import pandas as pd
import json

#Parses .json dictionary into pandas dataframe
def json_to_dataframe(filename, verbose=False):

  with open(filename, 'r') as json_file:
    situation = []
    action = []
    label = []
    intention = []

    json_list = list(json_file)
    if verbose:
      print(len(json_list))

    for json_str in json_list:
      json_dict = json.loads(json_str)
      if verbose:
        print(f'json dict: {json_dict}')

      if 'situation' in json_dict:
        situation.append(json_dict['situation'])

      if 'immoral_action' in json_dict:
        action.append(json_dict['immoral_action'])

      if 'moral_action' in json_dict:
        action.append(json_dict['moral_action'])

      if 'intention' in json_dict:
        intention.append(json_dict['intention'])

      if 'label' in json_dict:
        label.append(json_dict['label'])

  df = pd.DataFrame({'situation': situation, 'action': action, 'intention': intention, 'label': label})
  return df

In [3]:
import os
from sklearn.model_selection import train_test_split

#Specify moral stories data directory 
data_dir = os.path.join('moral_stories_datasets', 'classification', 'action+context', 'lexical_bias')

#Load training and testing data from given directory
moral_train_df = json_to_dataframe(os.path.join(data_dir, 'train.jsonl'))
moral_test_df = json_to_dataframe(os.path.join(data_dir, 'test.jsonl'))

View dataframes

In [4]:
moral_train_df

Unnamed: 0,situation,action,intention,label
0,Joan is on a first date with Mitch when she ge...,Joan ignores the text and focuses on enjoying ...,Joan wants to have a fun night.,1
1,Joan is on a first date with Mitch when she ge...,Joan makes up an excuse to end the date early ...,Joan wants to have a fun night.,0
2,Justin's mother allows him to stay up as late ...,Justin finds himself naturally wanting to slee...,Justin wants to have freedom over his schedule.,1
3,Justin's mother allows him to stay up as late ...,Justin plays video games until 3 am on most da...,Justin wants to have freedom over his schedule.,0
4,Margaret's mother is living in a nursing home ...,Margaret makes the hour drive to surprise her ...,Margaret wants to talk to her mom.,1
...,...,...,...,...
19995,John is at home with his wife Mindy who cooks ...,John raises his voice and says in front of Min...,John wants to vent his anger.,0
19996,Catherine is a feminist atheist who thinks tha...,"Catherine explains the layout of the city, and...",Catherine wants to respond to the man.,1
19997,Catherine is a feminist atheist who thinks tha...,Catherine tells the man that she refuses to he...,Catherine wants to respond to the man.,0
19998,Sean's best friend just got into a new relatio...,Sean shows support for his friend's relationsh...,Sean wants to make sure he remains close with ...,1


In [5]:
moral_test_df

Unnamed: 0,situation,action,intention,label
0,Jordan woke up very early this morning.,Jordan quietly makes himself a bowl of cereal ...,He wants to make some breakfast.,1
1,Jordan woke up very early this morning.,Jordan noisily bangs a lot of pots and pans wh...,He wants to make some breakfast.,0
2,Debbie is having a birthday party next week.,Debbie sends her friend Andy an invitation to ...,She wants to send out invitations.,1
3,Debbie is having a birthday party next week.,Debbie does not send an invitation to her frie...,She wants to send out invitations.,0
4,Jon's 3 year old son is biting him on the hand.,Jon puts his 3 year old son in timeout for bit...,Jon wants the child to stop biting him.,1
...,...,...,...,...
1995,Marvin is excited about the release of a new g...,Marvin downloads a cracked version of the game...,Marvin wants to get the game.,0
1996,Justin notices that there are a lot of fleas o...,Justin takes the cat to the vet to get rid of ...,Justin wants to kill all the fleas.,1
1997,Justin notices that there are a lot of fleas o...,Justin sets the cat's fur on fire to kill all ...,Justin wants to kill all the fleas.,0
1998,Jim's best friend has confided in him and reve...,Jim discusses with his friend reasons why he m...,Jim wants his best friend's wife to know the t...,1


Read .csv files for plotto and scifi datasets (testing data)

In [6]:
import pandas as pd

#Specify plotto and scifi files
plotto_file = 'plotto-10subject-eval-entropy-v1.csv'
scifi_file = 'scifi-nodisagree-clean_without_name.csv'

#Load plotto and scifi data
plotto_df = pd.read_csv(plotto_file, sep='\t', header=None)
scifi_df = pd.read_csv(scifi_file, header=None)

View dataframes

In [7]:
plotto_df

Unnamed: 0,0,1
0,0,His philanthropic enterprise is about to fail
1,0,"He, a reformed gambler, keeps the enterprise g..."
2,1,He applies for the position of butler in the h...
3,1,"He, applying to Leonardo for a job, is asked f..."
4,0,He is supposed to be dead but assumes a fictit...
...,...,...
550,0,"He is a confirmed procrastinator, and the habi..."
551,0,She commits a transgression in order to concea...
552,0,"She, committing one transgression in order to ..."
553,0,"He borrows an infant, Joseph, from a married f..."


In [8]:
scifi_df

Unnamed: 0,0,1
0,1,Babylon 5 is the fifth in a series of space s...
1,1,The dream of peace between the five federatio...
2,1,He conducts a medical investigation and seeks...
3,0,Worried that if Kosh dies the Vorlons will at...
4,1,Upon receiving word that He has become the pr...
...,...,...
440,1,After the Praesitlyn victory for the Republic...
441,1,Upon exiting hyperspace near Kynachi the task...
442,1,Nuru's group attempts to continue the mission...
443,1,Nuru meets the Chiss ambassador but during th...


Remove punctuation from text

In [9]:
import re 

#Removes punctuation from given collection of text
def remove_punct(text):
  clean_text = [re.sub(r'[\"~,.;:!?&%$#()]', '', t) for t in text]
  return clean_text

In [10]:
#Remove punctuation from training text
clean_train_situation = remove_punct(moral_train_df['situation'])
clean_train_action = remove_punct(moral_train_df['action'])

moral_train_df['situation'] = clean_train_situation
moral_train_df['action'] = clean_train_action

In [11]:
#Remove punctuation from testing text
clean_test_situation = remove_punct(moral_test_df['situation'])
clean_test_action = remove_punct(moral_test_df['action'])

moral_test_df['situation'] = clean_test_situation
moral_test_df['action'] = clean_test_action

In [12]:
#Remove punctuation from plotto and scifi text
clean_plotto_action = remove_punct(plotto_df.iloc[:, 1])
clean_scifi_action = remove_punct(scifi_df.iloc[:, 1])

plotto_df.iloc[:, 1] = clean_plotto_action
scifi_df.iloc[:, 1] = clean_scifi_action

View dataframes

In [13]:
moral_train_df

Unnamed: 0,situation,action,intention,label
0,Joan is on a first date with Mitch when she ge...,Joan ignores the text and focuses on enjoying ...,Joan wants to have a fun night.,1
1,Joan is on a first date with Mitch when she ge...,Joan makes up an excuse to end the date early ...,Joan wants to have a fun night.,0
2,Justin's mother allows him to stay up as late ...,Justin finds himself naturally wanting to slee...,Justin wants to have freedom over his schedule.,1
3,Justin's mother allows him to stay up as late ...,Justin plays video games until 3 am on most da...,Justin wants to have freedom over his schedule.,0
4,Margaret's mother is living in a nursing home ...,Margaret makes the hour drive to surprise her ...,Margaret wants to talk to her mom.,1
...,...,...,...,...
19995,John is at home with his wife Mindy who cooks ...,John raises his voice and says in front of Min...,John wants to vent his anger.,0
19996,Catherine is a feminist atheist who thinks tha...,Catherine explains the layout of the city and ...,Catherine wants to respond to the man.,1
19997,Catherine is a feminist atheist who thinks tha...,Catherine tells the man that she refuses to he...,Catherine wants to respond to the man.,0
19998,Sean's best friend just got into a new relatio...,Sean shows support for his friend's relationsh...,Sean wants to make sure he remains close with ...,1


In [14]:
moral_test_df

Unnamed: 0,situation,action,intention,label
0,Jordan woke up very early this morning,Jordan quietly makes himself a bowl of cereal ...,He wants to make some breakfast.,1
1,Jordan woke up very early this morning,Jordan noisily bangs a lot of pots and pans wh...,He wants to make some breakfast.,0
2,Debbie is having a birthday party next week,Debbie sends her friend Andy an invitation to ...,She wants to send out invitations.,1
3,Debbie is having a birthday party next week,Debbie does not send an invitation to her frie...,She wants to send out invitations.,0
4,Jon's 3 year old son is biting him on the hand,Jon puts his 3 year old son in timeout for biting,Jon wants the child to stop biting him.,1
...,...,...,...,...
1995,Marvin is excited about the release of a new g...,Marvin downloads a cracked version of the game...,Marvin wants to get the game.,0
1996,Justin notices that there are a lot of fleas o...,Justin takes the cat to the vet to get rid of ...,Justin wants to kill all the fleas.,1
1997,Justin notices that there are a lot of fleas o...,Justin sets the cat's fur on fire to kill all ...,Justin wants to kill all the fleas.,0
1998,Jim's best friend has confided in him and reve...,Jim discusses with his friend reasons why he m...,Jim wants his best friend's wife to know the t...,1


In [15]:
plotto_df

Unnamed: 0,0,1
0,0,His philanthropic enterprise is about to fail
1,0,He a reformed gambler keeps the enterprise goi...
2,1,He applies for the position of butler in the h...
3,1,He applying to Leonardo for a job is asked for...
4,0,He is supposed to be dead but assumes a fictit...
...,...,...
550,0,He is a confirmed procrastinator and the habit...
551,0,She commits a transgression in order to concea...
552,0,She committing one transgression in order to h...
553,0,He borrows an infant Joseph from a married fri...


In [16]:
scifi_df

Unnamed: 0,0,1
0,1,Babylon 5 is the fifth in a series of space s...
1,1,The dream of peace between the five federatio...
2,1,He conducts a medical investigation and seeks...
3,0,Worried that if Kosh dies the Vorlons will at...
4,1,Upon receiving word that He has become the pr...
...,...,...
440,1,After the Praesitlyn victory for the Republic...
441,1,Upon exiting hyperspace near Kynachi the task...
442,1,Nuru's group attempts to continue the mission...
443,1,Nuru meets the Chiss ambassador but during th...


Concatenate 'situation' and 'action' text in moral stories dataset with special \<start>, \<sep> and \<end> markers

In [17]:
#Concatenates text from two dataframe columns with special <start>, <sep> and <end> markers and populates a new column in data with said concatenated text
def concat_text_columns(df, col_1, col_2, concat_col):
  #Initialize new 'concat_col' that will store concatenated text from 'col_1' and 'col_2'
  df[concat_col] = ''

  #Get number of rows in dataframe
  rows = df.shape[0]

  #Concatenate text from 'col_1' and 'col_2' with seperators
  for r in range(rows):
    col_1_text = df[col_1].iloc[r]
    col_2_text = df[col_2].iloc[r]
    df[concat_col].iloc[r] = f'<start> {col_1_text} <sep> {col_2_text} <end>'

In [18]:
#Concatenate text in 'situation' and 'action' columns for both training and testing dataframes
col_1 = 'situation'
col_2 = 'action'
concat_col = 'situation + action'

concat_text_columns(moral_train_df, col_1, col_2, concat_col)
concat_text_columns(moral_test_df, col_1, col_2, concat_col)

Format text with special \<start> and \<end> markers

In [19]:
#Formats text from single dataframe column with special <start> and <end> markers
def format_text(text):
  format_text = [f'<start> {t} <end>' for t in text]
  return format_text

In [20]:
#Format training text with special <start> and <end> markers
format_train_situation = format_text(moral_train_df['situation'])
format_train_action = format_text(moral_train_df['action'])

moral_train_df['situation'] = format_train_situation
moral_train_df['action'] = format_train_action

In [21]:
#Format testing text with special <start> and <end> markers
format_test_situation = format_text(moral_test_df['situation'])
format_test_action = format_text(moral_test_df['action'])

moral_test_df['situation'] = format_test_situation
moral_test_df['action'] = format_test_action

In [22]:
#Format ploto and scifi text with special <start> and <end> markers
format_plotto_action = format_text(plotto_df.iloc[:, 1])
format_scifi_action = format_text(scifi_df.iloc[:, 1])

plotto_df.iloc[:, 1] = format_plotto_action
scifi_df.iloc[:, 1] = format_scifi_action

View dataframes

In [23]:
moral_train_df

Unnamed: 0,situation,action,intention,label,situation + action
0,<start> Joan is on a first date with Mitch whe...,<start> Joan ignores the text and focuses on e...,Joan wants to have a fun night.,1,<start> Joan is on a first date with Mitch whe...
1,<start> Joan is on a first date with Mitch whe...,<start> Joan makes up an excuse to end the dat...,Joan wants to have a fun night.,0,<start> Joan is on a first date with Mitch whe...
2,<start> Justin's mother allows him to stay up ...,<start> Justin finds himself naturally wanting...,Justin wants to have freedom over his schedule.,1,<start> Justin's mother allows him to stay up ...
3,<start> Justin's mother allows him to stay up ...,<start> Justin plays video games until 3 am on...,Justin wants to have freedom over his schedule.,0,<start> Justin's mother allows him to stay up ...
4,<start> Margaret's mother is living in a nursi...,<start> Margaret makes the hour drive to surpr...,Margaret wants to talk to her mom.,1,<start> Margaret's mother is living in a nursi...
...,...,...,...,...,...
19995,<start> John is at home with his wife Mindy wh...,<start> John raises his voice and says in fron...,John wants to vent his anger.,0,<start> John is at home with his wife Mindy wh...
19996,<start> Catherine is a feminist atheist who th...,<start> Catherine explains the layout of the c...,Catherine wants to respond to the man.,1,<start> Catherine is a feminist atheist who th...
19997,<start> Catherine is a feminist atheist who th...,<start> Catherine tells the man that she refus...,Catherine wants to respond to the man.,0,<start> Catherine is a feminist atheist who th...
19998,<start> Sean's best friend just got into a new...,<start> Sean shows support for his friend's re...,Sean wants to make sure he remains close with ...,1,<start> Sean's best friend just got into a new...


In [24]:
moral_test_df

Unnamed: 0,situation,action,intention,label,situation + action
0,<start> Jordan woke up very early this morning...,<start> Jordan quietly makes himself a bowl of...,He wants to make some breakfast.,1,<start> Jordan woke up very early this morning...
1,<start> Jordan woke up very early this morning...,<start> Jordan noisily bangs a lot of pots and...,He wants to make some breakfast.,0,<start> Jordan woke up very early this morning...
2,<start> Debbie is having a birthday party next...,<start> Debbie sends her friend Andy an invita...,She wants to send out invitations.,1,<start> Debbie is having a birthday party next...
3,<start> Debbie is having a birthday party next...,<start> Debbie does not send an invitation to ...,She wants to send out invitations.,0,<start> Debbie is having a birthday party next...
4,<start> Jon's 3 year old son is biting him on ...,<start> Jon puts his 3 year old son in timeout...,Jon wants the child to stop biting him.,1,<start> Jon's 3 year old son is biting him on ...
...,...,...,...,...,...
1995,<start> Marvin is excited about the release of...,<start> Marvin downloads a cracked version of ...,Marvin wants to get the game.,0,<start> Marvin is excited about the release of...
1996,<start> Justin notices that there are a lot of...,<start> Justin takes the cat to the vet to get...,Justin wants to kill all the fleas.,1,<start> Justin notices that there are a lot of...
1997,<start> Justin notices that there are a lot of...,<start> Justin sets the cat's fur on fire to k...,Justin wants to kill all the fleas.,0,<start> Justin notices that there are a lot of...
1998,<start> Jim's best friend has confided in him ...,<start> Jim discusses with his friend reasons ...,Jim wants his best friend's wife to know the t...,1,<start> Jim's best friend has confided in him ...


In [25]:
plotto_df

Unnamed: 0,0,1
0,0,<start> His philanthropic enterprise is about ...
1,0,<start> He a reformed gambler keeps the enterp...
2,1,<start> He applies for the position of butler ...
3,1,<start> He applying to Leonardo for a job is a...
4,0,<start> He is supposed to be dead but assumes ...
...,...,...
550,0,<start> He is a confirmed procrastinator and t...
551,0,<start> She commits a transgression in order t...
552,0,<start> She committing one transgression in or...
553,0,<start> He borrows an infant Joseph from a mar...


In [26]:
scifi_df

Unnamed: 0,0,1
0,1,<start> Babylon 5 is the fifth in a series of...
1,1,<start> The dream of peace between the five f...
2,1,<start> He conducts a medical investigation a...
3,0,<start> Worried that if Kosh dies the Vorlons...
4,1,<start> Upon receiving word that He has becom...
...,...,...
440,1,<start> After the Praesitlyn victory for the ...
441,1,<start> Upon exiting hyperspace near Kynachi ...
442,1,<start> Nuru's group attempts to continue the...
443,1,<start> Nuru meets the Chiss ambassador but d...


In [27]:
#Sample 'situation + action' line
moral_train_df['situation + action'].iloc[0]

'<start> Joan is on a first date with Mitch when she gets a text from her ex-boyfriend who she still loves asking to meet up <sep> Joan ignores the text and focuses on enjoying her night with Mitch <end>'

In [28]:
#Sample 'situation' line
moral_train_df['situation'].iloc[0]

'<start> Joan is on a first date with Mitch when she gets a text from her ex-boyfriend who she still loves asking to meet up <end>'

In [29]:
#Sample 'action' line
moral_train_df['action'].iloc[0]

'<start> Joan ignores the text and focuses on enjoying her night with Mitch <end>'

In [30]:
#Sample 'plotto' line
plotto_df.iloc[0, 1]

'<start> His philanthropic enterprise is about to fail  <end>'

In [31]:
#Sample 'scifi' line
plotto_df.iloc[0, 1]

'<start> His philanthropic enterprise is about to fail  <end>'

Vectorize text

In [32]:
import numpy as np

#Gets sequence length that resides at specified percentile (default is 98th percentile)
def get_sequence_length(text, percentile=95):
  #Get the length of all sentences in given text
  text_lengths = [len(t) for t in text]

  #Return length of sequence in given percentile
  sequence_length = int(np.percentile(text_lengths, percentile)) 
  return sequence_length

In [33]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer

#Tokenize text usig tokenizer fitted on 'situation' and 'action' text from training data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(moral_train_df['situation + action'])

#Set array of start and end tokens to represent empty strings for 'missing' situation text in plotto and scifi
start_end_tokens = np.array([[5,6]])

In [34]:
train_situation_tokenized = tokenizer.texts_to_sequences(moral_train_df['situation'])
train_action_tokenized = tokenizer.texts_to_sequences(moral_train_df['action'])

In [35]:
test_situation_tokenized = tokenizer.texts_to_sequences(moral_test_df['situation'])
test_action_tokenized = tokenizer.texts_to_sequences(moral_test_df['action'])

In [36]:
plotto_action_tokenized = tokenizer.texts_to_sequences(plotto_df.iloc[:, 1])
scifi_action_tokenized = tokenizer.texts_to_sequences(scifi_df.iloc[:, 1])

In [37]:
num_plotto_action_tokens = len(plotto_action_tokenized) 
num_scifi_action_tokens = len(scifi_action_tokenized)

plotto_situation_tokenized = np.repeat(a=start_end_tokens, repeats=num_plotto_action_tokens, axis=0)
scifi_situation_tokenized = np.repeat(a=start_end_tokens, repeats=num_scifi_action_tokens, axis=0)

In [38]:
#Sample training 'situation' line and its tokenized form
orig_situation = moral_train_df['situation'].iloc[0]
print(f'Original Text: {orig_situation}')
print(f'Tokenized Format: {train_situation_tokenized[0]}')

Original Text: <start> Joan is on a first date with Mitch when she gets a text from her ex-boyfriend who she still loves asking to meet up <end>
Tokenized Format: [5, 866, 9, 18, 4, 267, 156, 13, 1660, 24, 21, 77, 4, 515, 34, 10, 360, 134, 32, 21, 391, 465, 210, 1, 400, 27, 6]


In [39]:
#Sample training 'action' line and its tokenized form
orig_action = moral_train_df['action'].iloc[0]
print(f'Original Text: {orig_action}')
print(f'Tokenized Format: {train_action_tokenized[0]}')

Original Text: <start> Joan ignores the text and focuses on enjoying her night with Mitch <end>
Tokenized Format: [5, 866, 422, 2, 515, 3, 1589, 18, 1719, 10, 105, 13, 1660, 6]


In [40]:
#Sample plotto line and its tokenized form
orig_plotto = plotto_df.iloc[0, 1]
print(f'Original Plotto Text: {orig_plotto}')
print(f'Tokenized Format: {plotto_action_tokenized[0]}')

Original Plotto Text: <start> His philanthropic enterprise is about to fail  <end>
Tokenized Format: [5, 8, 9, 28, 1, 5723, 6]


In [41]:
#Sample scifi line and its tokenized form
orig_scifi = scifi_df.iloc[0, 1]
print(f'Original Scifi Text: {orig_scifi}')
print(f'Tokenized Format: {scifi_action_tokenized[0]}')

Original Scifi Text: <start>  Babylon 5 is the fifth in a series of space stations dedicated to the dream of a galaxy without war a dream that species from different worlds might live side-by-side in mutual respect <end>
Tokenized Format: [5, 1595, 9, 2, 5065, 14, 4, 2597, 15, 1864, 10655, 8423, 1, 2, 1738, 15, 4, 13357, 137, 3417, 4, 1738, 12, 12304, 34, 487, 707, 675, 511, 50, 511, 14, 3487, 2576, 6]


In [42]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

#Pad tokenization vectors to length in the 98th percentile
max_situation_length = get_sequence_length(train_situation_tokenized, percentile=98)
max_action_length = get_sequence_length(train_action_tokenized, percentile=98)

In [43]:
train_situation_tokenized_padded = pad_sequences(train_situation_tokenized, padding='post', maxlen=max_situation_length)
train_action_tokenized_padded = pad_sequences(train_action_tokenized, padding='post', maxlen=max_action_length)

In [44]:
test_situation_tokenized_padded = pad_sequences(test_situation_tokenized, padding='post', maxlen=max_situation_length)
test_action_tokenized_padded = pad_sequences(test_action_tokenized, padding='post', maxlen=max_action_length)

In [45]:
plotto_situation_tokenized_padded = pad_sequences(plotto_situation_tokenized, padding='post', maxlen=max_situation_length)
plotto_action_tokenized_padded = pad_sequences(plotto_action_tokenized, padding='post', maxlen=max_action_length)

scifi_situation_tokenized_padded = pad_sequences(scifi_situation_tokenized, padding='post', maxlen=max_situation_length)
scifi_action_tokenized_padded = pad_sequences(scifi_action_tokenized, padding='post', maxlen=max_action_length)

Use pre-trained spaCY word embedddings

In [46]:
!python -m spacy download en_core_web_md

Collecting en_core_web_md==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.2.5/en_core_web_md-2.2.5.tar.gz (96.4 MB)
[K     |████████████████████████████████| 96.4 MB 1.2 MB/s 
Building wheels for collected packages: en-core-web-md
  Building wheel for en-core-web-md (setup.py) ... [?25l[?25hdone
  Created wheel for en-core-web-md: filename=en_core_web_md-2.2.5-py3-none-any.whl size=98051301 sha256=938f1c61656924281265ba942109d0fe532c23172e1fa123cb201a0fc0cac043
  Stored in directory: /tmp/pip-ephem-wheel-cache-xoiq6hoc/wheels/69/c5/b8/4f1c029d89238734311b3269762ab2ee325a42da2ce8edb997
Successfully built en-core-web-md
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-2.2.5
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_md')


In [47]:
import en_core_web_md
spacy_model = en_core_web_md.load()

In [48]:
#Include an extra index for the "<PAD>" token.
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 300
embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in tokenizer.word_index.items():
  token = spacy_model(word)[0]
  #Make sure spaCy has an embedding for this token.
  if not token.is_oov:
    embedding_matrix[i] = token.vector

print(embedding_matrix.shape)

(14816, 300)


Build and train model

In [49]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Bidirectional, Concatenate, Dense, Embedding, Flatten, Input, LSTM
from tensorflow.keras.initializers import Constant

#Set situation and action inputs
situation_input = Input(shape=(None,))
action_input = Input(shape=(None,))

#Pass situation input to Bidirectional LSTM layer
x = Embedding(
  vocab_size,
  embedding_dim,
  embeddings_initializer=Constant(embedding_matrix),
  trainable=False    
)(situation_input)
x = Bidirectional(LSTM(128))(x)

#Pass action input to Bidirectional LSTM layer
y = Embedding(
  vocab_size,
  embedding_dim,
  embeddings_initializer=Constant(embedding_matrix),
  trainable=False    
)(action_input)
y = Bidirectional(LSTM(128))(y)

concat_outputs = Concatenate()([x, y])

z = Dense(128, activation='relu')(concat_outputs)
z = Dense(1, activation='sigmoid')(z)

model = Model(inputs=[situation_input, action_input], outputs=z)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 300)    4444800     input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 300)    4444800     input_2[0][0]                    
______________________________________________________________________________________________

In [50]:
#Compile and train model
model.compile(
  loss='binary_crossentropy',
  optimizer=tf.keras.optimizers.Adam(),
  metrics=['accuracy']
)

In [51]:
#Fit model
train_labels = moral_train_df['label'].to_numpy(dtype='int64')
history = model.fit(
    x=[train_situation_tokenized_padded, train_action_tokenized_padded],
    y=train_labels,
    epochs=100,
    batch_size=512,
    validation_split=0.2,
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [52]:
#Evaluate model accuracy
test_labels = moral_test_df['label'].to_numpy(dtype='int64')
loss, acc = model.evaluate([test_situation_tokenized_padded, test_action_tokenized_padded], 
                           test_labels)
print('Test accuracy for training: {}'.format(acc))

Test accuracy for training: 0.6330000162124634


Test model with plotto and scifi data

In [53]:
#Test model with plotto data
test_labels = plotto_df.iloc[:, 0]
loss, acc = model.evaluate([plotto_situation_tokenized_padded, plotto_action_tokenized_padded],
                           test_labels)
print('Test accuracy for plotto: {}'.format(acc))

Test accuracy for plotto: 0.5981981754302979


In [54]:
#Test model with scifi data 
test_labels = scifi_df.iloc[:, 0]
loss, acc = model.evaluate([scifi_situation_tokenized_padded, scifi_action_tokenized_padded], 
                           test_labels)
print('Test accuracy for scifi: {}'.format(acc))

Test accuracy for scifi: 0.6022471785545349
