# BioBERT on Clinical Trial Notes 
## To check gender bias

In [1]:
import os
import warnings
import tensorflow as tf
warnings.filterwarnings(action='ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
#tf.logging.set_verbosity(tf.logging.ERROR)
tf.compat.v1.disable_eager_execution()

In [2]:
import pandas as pd
import numpy as np  
import re
import pickle

import codecs
from tqdm import tqdm
import shutil

In [3]:
import keras as keras
from keras.models import load_model
from keras import backend as K
from keras import Input, Model
from keras import optimizers
from keras.layers import Lambda

from keras_bert import load_trained_model_from_checkpoint, load_vocabulary
from keras_bert import Tokenizer
from keras_bert import AdamWarmup, calc_train_steps

from keras_radam import RAdam # import revised version of Adam optimizer, keras-adam

In [4]:
import xml.etree.ElementTree as ET

In [5]:
tree = ET.parse('./train/100.xml')
root = tree.getroot()

In [6]:
[elem.tag for elem in root.iter()]

['PatientMatching',
 'TEXT',
 'TAGS',
 'ABDOMINAL',
 'ADVANCED-CAD',
 'ALCOHOL-ABUSE',
 'ASP-FOR-MI',
 'CREATININE',
 'DIETSUPP-2MOS',
 'DRUG-ABUSE',
 'ENGLISH',
 'HBA1C',
 'KETO-1YR',
 'MAJOR-DIABETES',
 'MAKES-DECISIONS',
 'MI-6MOS']

# Get Datasets
<b> we would retreive the data under 'TEXT' tag, which is clinician notes.</b>

In [7]:
for description in root.iter('TEXT'):
    print(description.text)




Record date: 2106-02-12

Campbell Orthopedic Associates
4 Madera Circle
Omak, GA 28172
 
Habib Valenzuela, M.D.
 
 
                                             Valdez, Harlan Jr.  
                                           845-41-54-4
                                             February 12, 2106 
Har is a 43 year old 6' 214 pound gentleman who is referred for
consultation by Dr. Harlan Oneil.  About a week ago he slipped on
the driveway at home and sustained an injury to his left ankle. 
He was seen at Tri-City Hospital and was told he had a
fracture.  He was placed in an air splint and advised to be
partial weight bearing, and he is using a cane.  He is here for
routine follow-up. 
Past medical history is notable for no ankle injuries previously. 
He has a history of diabetes and sleep apnea.  He takes Prozac,
Cardizem, Glucophage and Amaryl.  He is also followed by Dr. Harold
Nutter for an arrhythmia.  He does not smoke.  He drinks
minimally.  He is a set designer at Columbia Pi

# Data Preprocessing

See if there are gender identifying words from each sentence and label them as 0-male 1-female.
Used whitespaces to check individual words.
Used regular expression to recognize sentences and replaced multiple \n to a whitespace.
(Lots of \n due to xml format file characteristics.</b>

In [8]:
import re
import glob
import xml.etree.ElementTree as ET

filenames = glob.glob("./train/[0-9][0-9][0-9].xml")  # change the pattern to match your case
lines = []
temp_label = 0
total_labels = []
for filename in filenames:

    with open(filename, 'r', encoding="utf-8") as content:

        tree = ET.parse(content)
#        lines.append(ET.tostring(tree.getroot(), encoding='utf-8', method='text'))
        for description in root.iter('TEXT'):
            ##use regular expression to replace multiple tabs with a whitespace
            ##xml format uses multiple '\n's to visualize rows or columns
            ##use regular expression to only retrieve sentences
            temp =re.sub(' [\n]{2,}',' ',description.text)
            sentence = re.findall('[0-9]*[A-Z][^.!?]+[.!?][\n]',temp) ##find sentences
            for sen in sentence:
                ##check if sentence has gender specific word 
                ##for ex, the patient - she/he, his/her report, a female/male patient
                ##if not, consider the sentence is from the same report as previous one
                if ' She ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' she ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' her ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' Her ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' female ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' He ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' he ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' his ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' His ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' male ' in (' ' + sen + ' '):
                    temp_label = 0
                else:
                    temp_label = temp_label
                total_labels.append(temp_label)
            lines.extend(sentence)
            

In [9]:
lines[:10]

['He is a set designer at Columbia Pictures.\n',
 'Diagnosis:  Left ankle fracture.\n',
 'This is a brief addendum to the medical record.\n',
 'The patient has no chest pain at the time of evaluation in the emergency department and no shortness of breath.\n',
 'REVIEW OF SYSTEMS:  As indicated and otherwise negative.\n',
 'PAST MEDICAL HISTORY:  As indicated in the chart.\n',
 'SOCIAL HISTORY AND FAMILY HISTORY:  As indicated in the chart.\n',
 'A CT is pending at the time of this dictation.\n',
 'His complaint of some mild chest pain (not same as anginal pain day before) thought to be from defibrillation.\n',
 'Esmolol drip as well as amio was stopped and BB was escalated and patient has remained in NSR.\n']

In [10]:
total_labels[:10]

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]

In [12]:
total_lines = lines

In [13]:
train_ratio = int(len(total_lines)*0.7)
train_lines = total_lines[:train_ratio]
train_label = total_labels[:train_ratio]
valid_lines = total_lines[train_ratio:]
valid_label = total_labels[train_ratio:]
train_set = np.column_stack((train_lines, train_label))
valid_set = np.column_stack((valid_lines, valid_label))

In [14]:
train_data = pd.DataFrame(train_set)
train_data.to_csv('./train.txt', sep='\t', index=False)
valid_data = pd.DataFrame(valid_set)
valid_data.to_csv('./valid.txt', sep='\t', index=False)

In [15]:
filenames = glob.glob("./test/[0-9][0-9][0-9].xml")  # change the pattern to match your case
lines = []
temp_label = 0
total_labels = []
for filename in filenames:

    with open(filename, 'r', encoding="utf-8") as content:

        tree = ET.parse(content)
#        lines.append(ET.tostring(tree.getroot(), encoding='utf-8', method='text'))

        for description in root.iter('TEXT'):
            ##use regular expression to replace multiple tabs with a whitespace
            ##xml format uses multiple '\n's to visualize rows or columns
            ##use regular expression to only retrieve sentences
            temp =re.sub(' [\n]{2,}',' ',description.text)
            sentence = re.findall('[0-9]*[A-Z][^.!?]+[.!?][\n]',temp) ##find sentences
            ##check if sentence has gender specific word 
            ##for ex, the patient - she/he, his/her report, a female/male patient
            ##if not, consider the sentence is from the same report as previous one
            ##us below format of if statement to check for individual words
            for sen in sentence:
                if ' She ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' she ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' her ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' Her ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' female ' in (' ' + sen + ' '):
                    temp_label = 1
                elif ' He ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' he ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' his ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' His ' in (' ' + sen + ' '):
                    temp_label = 0
                elif ' male ' in (' ' + sen + ' '):
                    temp_label = 0
                else:
                    temp_label = temp_label
                total_labels.append(temp_label)
            lines.extend(sentence)

In [16]:
lines[:10]

['He is a set designer at Columbia Pictures.\n',
 'Diagnosis:  Left ankle fracture.\n',
 'This is a brief addendum to the medical record.\n',
 'The patient has no chest pain at the time of evaluation in the emergency department and no shortness of breath.\n',
 'REVIEW OF SYSTEMS:  As indicated and otherwise negative.\n',
 'PAST MEDICAL HISTORY:  As indicated in the chart.\n',
 'SOCIAL HISTORY AND FAMILY HISTORY:  As indicated in the chart.\n',
 'A CT is pending at the time of this dictation.\n',
 'His complaint of some mild chest pain (not same as anginal pain day before) thought to be from defibrillation.\n',
 'Esmolol drip as well as amio was stopped and BB was escalated and patient has remained in NSR.\n']

In [18]:
test_lines = lines

In [19]:
test_set = np.column_stack((test_lines,total_labels))

In [20]:
test_data = pd.DataFrame(test_set)
test_data.to_csv('./test.txt', sep='\t', index=False)

# Word Piece Tokenizer
### Word Piece embedding
#### Convert text to token pieces using tokenizer based on vocab
#### Word embeddings are a type of word representation that allows words with similar meaning to have a similar representation.

example:

Input Text: the man jumped up, put his basket on philammon's head  
Word piece: ["the", "man", "jump", "##ed", "up", "put", "his", "basket", "on", "phil", "##am", "##mon","'", "s", "head"]

In [21]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import unicodedata
import six


def convert_to_unicode(text):
    """transform not unicode file to unicode file"""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?")

def load_vocab(vocab_file):
    """read vocabs from vocab.txt of BioBERT pretraining model 
       make dictionary -> key: word, value: unique index """
    vocab = collections.OrderedDict()
    index = 0
    with tf.gfile.GFile(vocab_file, "r") as reader:
        while True:
            token = convert_to_unicode(reader.readline())
            if not token:
                break
            token = token.strip()
            vocab[token] = index
            index += 1
    return vocab

def convert_by_vocab(vocab, items):
    """ge input of token sequence and use vocab dictionary to return tokens' index sequence"""
    output = []
    for item in items:
        output.append(vocab[item])
    return output

def whitespace_tokenize(text):
    """get rid of white space and separate plural tokens"""
    text = text.strip()
    if not text:
        return []
    tokens = text.split()
    return tokens

class FullTokenizer(object):
    """end-to-end tokenziation."""
    def __init__(self, vocab_file, do_lower_case=True):
        self.vocab = load_vocab(vocab_file)
        self.inv_vocab = {v: k for k, v in self.vocab.items()}
        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)

    def tokenize(self, text):
        split_tokens = []
        for token in self.basic_tokenizer.tokenize(text):
            for sub_token in self.wordpiece_tokenizer.tokenize(token):
                split_tokens.append(sub_token)

        return split_tokens

    def convert_tokens_to_ids(self, tokens):
        return convert_by_vocab(self.vocab, tokens)

    def convert_ids_to_tokens(self, ids):
        return convert_by_vocab(self.inv_vocab, ids)

class BasicTokenizer(object):
    """basic tokenization"""

    def __init__(self, do_lower_case=True):
        """
        Args:
          do_lower_case: will you lower case todkens?(True/False)
        """
        self.do_lower_case = do_lower_case

    def tokenize(self, text):
        """Tokenizes a piece of text."""
        text = convert_to_unicode(text)
        text = self._clean_text(text)

        orig_tokens = whitespace_tokenize(text)
        split_tokens = []
        for token in orig_tokens:
            if self.do_lower_case:
                token = token.lower()
                token = self._run_strip_accents(token)
            split_tokens.extend(self._run_split_on_punc(token))

        output_tokens = whitespace_tokenize(" ".join(split_tokens))
        return output_tokens

    def _run_strip_accents(self, text):
        """Strips accents from a piece of text."""
        text = unicodedata.normalize("NFD", text)
        output = []
        for char in text:
            cat = unicodedata.category(char)
            if cat == "Mn":
                continue
            output.append(char)
        return "".join(output)

    def _run_split_on_punc(self, text):
        """Splits punctuation on a piece of text."""
        chars = list(text)
        i = 0
        start_new_word = True
        output = []
        while i < len(chars):
            char = chars[i]
            if _is_punctuation(char):
                output.append([char])
                start_new_word = True
            else:
                if start_new_word:
                    output.append([])
                start_new_word = False
                output[-1].append(char)
            i += 1
        return ["".join(x) for x in output]


    def _clean_text(self, text):
        """Performs invalid character removal and whitespace cleanup on text."""
        output = []
        for char in text:
            cp = ord(char)
            if cp == 0 or cp == 0xfffd or _is_control(char):
                continue
            if _is_whitespace(char):
                output.append(" ")
            else:
                output.append(char)
        return "".join(output)


class WordpieceTokenizer(object):
    """WordPiece tokenziation."""

    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
        self.vocab = vocab
        self.unk_token = unk_token
        self.max_input_chars_per_word = max_input_chars_per_word

    def tokenize(self, text):
        """

        For example:
          input = "unaffable"
          output = ["un", "##aff", "##able"]

        Args:
          text: A single token or whitespace separated tokens. This should have
            already been passed through `BasicTokenizer.

        Returns:
          A list of wordpiece tokens.
        """

        text = convert_to_unicode(text)

        output_tokens = []
        for token in whitespace_tokenize(text):
            chars = list(token)
            if len(chars) > self.max_input_chars_per_word:
                output_tokens.append(self.unk_token)
                continue

            is_bad = False
            start = 0
            sub_tokens = []
            while start < len(chars):
                end = len(chars)
                cur_substr = None
                while start < end:
                    substr = "".join(chars[start:end])
                    if start > 0:
                        substr = "##" + substr
                    if substr in self.vocab:
                        cur_substr = substr
                        break
                    end -= 1
                if cur_substr is None:
                    is_bad = True
                    break
                sub_tokens.append(cur_substr)
                start = end

            if is_bad:
                output_tokens.append(self.unk_token)
            else:
                output_tokens.extend(sub_tokens)
        return output_tokens
    
def _is_whitespace(char):
    """check if input is white space"""
    # \t, \n, and \r are technically contorl characters but we treat them
    # as whitespace since they are generally considered as such.
    if char == " " or char == "\t" or char == "\n" or char == "\r":
        return True
    cat = unicodedata.category(char)
    if cat == "Zs":
        return True
    return False
    
def _is_control(char):
    """check if input is control character"""
    # These are technically control characters but we count them as whitespace
    # characters.
    if char == "\t" or char == "\n" or char == "\r":
        return False
    cat = unicodedata.category(char)
    if cat.startswith("C"):
        return True
    return False

def _is_punctuation(char):
    """check if input character is punctuation character"""
    cp = ord(char)
    # We treat all non-letter/number ASCII as punctuation.
    # Characters such as "^", "$", and "`" are not in the Unicode
    # Punctuation class but we treat them as punctuation anyways, for
    # consistency.
    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
        (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
        return True
    cat = unicodedata.category(char)
    if cat.startswith("P"):
        return True
    return False

In [22]:
model_path = './models2'
vocab = model_path+'/vocab.txt'

from pytorch_pretrained_bert import BertTokenizer, BertConfig
from pytorch_pretrained_bert import BertAdam, BertForSequenceClassification

max_seq_length= 128
tokenizer = BertTokenizer.from_pretrained(vocab, do_lower_case=True) 

## Get final input data

In [23]:
import math
import spacy
import numpy as np
import csv
from sklearn.manifold import TSNE
from sklearn import linear_model
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import clear_output
import en_core_web_lg
nlp = en_core_web_lg.load()

def convert_single_example(text, max_seq_length, tokenizer, seg_id): 
    
    textlist = text[0].split() # sentenceA in words
    label = [text[1]]
    # word piece tokenize them and put them in tokensA and tokensB
    tokens = []
    for word in textlist:
        token = tokenizer.tokenize(word)
        tokens.extend(token)
        
    """model gets an input for max_seq_length of token
       input: [CLS] tokens [SEP]
       so, (the number of sentence tokens) <= (max_seq_length-2)
       
       if it's longer,
       remove tokens with the length of (total number of tokens)-(max_seq_length-2)."""
    if len(tokens) > max_seq_length-2:
        cut_len = len(tokens)-(max_seq_length-2)
        len_tokens = math.ceil(len(tokens)/(len(tokens)*cut_len))
        tokens = tokens[len_tokens:]
            
        
    # make input_ids, segment_ids -> will be used in model as inputs
    ntokens = []
    segment_ids = []
    ntokens.append("[CLS]") # example start token
    segment_ids.append(0)
    for token in tokens:
        # add tokensA 
        ntokens.append(token)
        segment_ids.append(seg_id)
    ntokens.append("[SEP]") # example end token
    segment_ids.append(0)
    
    input_ids = tokenizer.convert_tokens_to_ids(ntokens) # convert token to matching index
 
    # pad til max_seq_length
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        segment_ids.append(0)
        # we don't concerned about it!
        ntokens.append("**NULL**")
        
    input_ids = input_ids[:max_seq_length]
    segment_ids = segment_ids[:max_seq_length]
    
    # errors made when the lengths of input_ids and segment_ids are not
    # max_seq_length
    assert len(input_ids) == max_seq_length   
    assert len(segment_ids) == max_seq_length 
    
    return input_ids, segment_ids, label

In [24]:
def data_loader(data_set):

    result_input_ids=[]
    result_seg_ids=[]
    y=[]
    seg_id = 0
    
    for example in data_set:
        input_ids, segment_ids, label = convert_single_example(example, max_seq_length, tokenizer, seg_id)
        result_input_ids.append(input_ids)
        result_seg_ids.append(segment_ids)
        y.append(label)
        ## set segment id for next sentence
        if seg_id == 0:
            seg_id = 1
        else:
            seg_id = 0
        
    data_x = []
    data_y = []
    data_x.append(np.array(result_input_ids))
    data_x.append(np.array(result_seg_ids))

    data_y.append(np.array(y))
        
    return data_x, data_y

# convert datasets to the shape of input_feature and y(label)
train_x, train_y = data_loader(train_set)
valid_x, valid_y = data_loader(valid_set)
test_x, test_y = data_loader(test_set)

In [25]:
print("train dataset's shape")
print(np.array(train_x).shape)
print(np.array(train_y).shape)
print("-"*15)
print("valid dataset' shape")
print(np.array(valid_x).shape)
print(np.array(valid_y).shape)
print("-"*15)
print("test dataset' shape")
print(np.array(test_x).shape)
print(np.array(test_y).shape)

train dataset's shape
(2, 3959, 128)
(1, 3959, 1)
---------------
valid dataset' shape
(2, 1697, 128)
(1, 1697, 1)
---------------
test dataset' shape
(2, 2408, 128)
(1, 2408, 1)


In [26]:
print(train_x)
print(train_y)

[array([[  101,  1119,  1110, ...,     0,     0,     0],
       [  101, 12645,   131, ...,     0,     0,     0],
       [  101,  1142,  1110, ...,     0,     0,     0],
       ...,
       [  101,  1117, 12522, ...,     0,     0,     0],
       [  101, 13936,  3702, ...,     0,     0,     0],
       [  101,  1119,  1108, ...,     0,     0,     0]]), array([[0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0]])]
[array([['0'],
       ['0'],
       ['0'],
       ...,
       ['0'],
       ['0'],
       ['0']], dtype='<U1')]


In [27]:
SEQ_LEN = 128 
BATCH_SIZE = 16
EPOCHS=3
LR=1e-5 

## Load pretrained BioBERT model

In [28]:
config_path = os.path.join(model_path, 'bert_config.json')
checkpoint_path = os.path.join(model_path, 'model.ckpt-1000000')
vocab_path = os.path.join(model_path, 'vocab.txt')

layer_num = 12
model = load_trained_model_from_checkpoint(
    config_path,
    checkpoint_path,
    training= False,
    trainable=True,
    seq_len=SEQ_LEN)

## Fine-tune the model

In [29]:
def get_bert_finetuning_model(model):
    inputs = model.inputs[:2] #two inputs of segment, token 
    dense = model.output # bioBERT model's output sensor
    out = keras.layers.Dropout(0.2)(dense)
    
    first_tensors = Lambda(lambda x : x[:, 0, :])(out) # only used by [CLS] tensor
    hidden_size = int(first_tensors.shape[-1])
    pooled_output = keras.layers.Dense(hidden_size, activation='tanh')(first_tensors)
    output_layer=keras.layers.Dense(1, activation='sigmoid')(pooled_output) # take output of bioBERT pretraining model as an input
    
    model = keras.models.Model(inputs, output_layer) # final model that takes Gender specification layer as a final layer

    # callback = StopTrainingClassComplete()
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

In [30]:
bert_model = get_bert_finetuning_model(model)

In [31]:
bert_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input-Token (InputLayer)        [(None, 128)]        0                                            
__________________________________________________________________________________________________
Input-Segment (InputLayer)      [(None, 128)]        0                                            
__________________________________________________________________________________________________
Embedding-Token (TokenEmbedding [(None, 128, 768), ( 22268928    Input-Token[0][0]                
__________________________________________________________________________________________________
Embedding-Segment (Embedding)   (None, 128, 768)     1536        Input-Segment[0][0]              
____________________________________________________________________________________________

## Training the model

In [35]:
checkpointName = os.path.join(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpointName,
                                                     save_weights_only=True,
                                                     verbose=1)
history = bert_model.fit(
        train_x,
        train_y,
        epochs=EPOCHS,
        validation_data=(valid_x, valid_y),
        verbose=1,
        callbacks=[cp_callback],
        batch_size = BATCH_SIZE
    )

Train on 3959 samples, validate on 1697 samples
Epoch 1/3
Epoch 00001: saving model to ./models2/model.ckpt-1000000
Epoch 2/3
Epoch 00002: saving model to ./models2/model.ckpt-1000000
Epoch 3/3
Epoch 00003: saving model to ./models2/model.ckpt-1000000


## Testing the model and get predictions

In [36]:
preds = np.array(bert_model.predict(test_x)) # use fine-tuned bioBERT model to predict on test set
preds = np.reshape(preds, [-1]) # [[pred1],[pred2],[pred3],..] -> [pred1,pred2,pred3,..]

In [114]:
predictions = []
for pred in preds:
    pred_temp = int(round(pred))
    if pred_temp >= 0.5:
        predictions.append(1) 
    else:
        predictions.append(0)# pred value would be 0 (male) or 1 (female)

In [115]:
len(predictions)

2408

In [126]:
actual_labels = total_labels # [label1, label2, label3, ...], which are actual values with total_labels from above

## Show performance scores
#### accuracy - check how many predictions are made correctly
#### precision - check how many female specificied predictions are correct

In [129]:
from sklearn.metrics import accuracy_score
print("accuracy: %0.4f" %accuracy_score(actual_labels, predictions))

accuracy: 0.7143


In [130]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
print("precision-score: %0.4f" % precision_score(actual_labels, predictions, average='macro'))
print("recall-score: %0.4f" % recall_score(actual_labels, predictions, average='macro'))
print("f1-score: %0.4f" % f1_score(actual_labels, predictions, average='macro'))

precision-score: 0.3571
recall-score: 0.5000
f1-score: 0.4167


## Result 
### It shows gender bias. Predictions on male are made more accurately than predictions on female.