## Random Generator Model Baseline

In [None]:
# Loads in the relevant packages
import pandas as pd
import numpy as np
from evaluate import load

np.random.seed(630)

In [None]:
# Merges the 2016 and 2017 ROCStories data
df_2016 = pd.read_csv('./ROCStories__spring2016 - ROCStories_spring2016.csv')
df_2017 = pd.read_csv('ROCStories_winter2017 - ROCStories_winter2017.csv')
df = pd.concat([df_2016, df_2017])

In [None]:
# Creates a database of 5th sentences for the model to randomly select
fifth_sentence_database = df['sentence5']

In [None]:
# Obtains the predictions and references for each instance in the data
predictions = []
references = []

for index, instance in df.iterrows():
    references.append(instance['sentence5'])
    predictions.append(np.random.choice(fifth_sentence_database))
    break

In [None]:
# Calculates the BERT score for the random selection model
bertscore = load("bertscore")
results = bertscore.compute(predictions=predictions, references=references, lang="en")

In [None]:
# The average BERT score results using the random selection model
(np.mean(results['precision']), np.mean(results['recall']), np.mean(results['f1']))

Average BERT score results:

precision: 0.8686750045754013

recall: 0.8686812542627632

f1: 0.868567749891451

In [None]:
# Calculates the METEOR score for the random selection model
meteor = load('meteor')
results = meteor.compute(predictions=predictions, references=references)

In [None]:
# The average METEOR score results using the random selection model
(np.mean(results['meteor']))

Average METEOR score results:

meteor: 0.08249881396988645

In [None]:
# Calculates the BLEU score for the random selection model
bleu = load("bleu")
results = bleu.compute(predictions=predictions, references=references)

In [None]:
# The average BLEU score results using the random selection model
(np.mean(results['bleu']))

Average BLEU score results:

bleu: 0.0010766664237931724

In [None]:
# Calculates the ROUGE score for the random selection model
rouge = load('rouge')
results = rouge.compute(predictions=predictions, references=references)

In [None]:
# The average ROUGE score results using the random selection model
(np.mean(results['rogue1']), np.mean(results['rogue2']), np.mean(results['rougeL']), np.mean(results['rougeLsum']))

Average ROGUE score results:

rogue1: 0.07026043691351706

rogue2: 0.0021983239810122322

rogueL: 0.06483072944486867

rogueLsum: 0.06482816798651193

In [None]:
# Calculates the Perplexity score for the predictions from the random selection model
perplexity = load("perplexity", module_type="metric")
results = perplexity.compute(predictions=predictions, model_id='gpt2')

In [None]:
# The average ePerplexity score results using the predictions from the random selection model
results['mean_perplexity']

Average perplexity results:

perplexity: 140.05386473811177