# Create the eval datasets

In [59]:
import pandas as pd
import os

# Assuming all CSV files are in the 'annotations' directory
directory_path = "doc_outputs/annotations"


# List of your CSV files
csv_files = [
    'ablations/doc_docnocontrol.csv',
    'ablations/doc_docnooutline.csv',
   # 'detailed_relevance/doc_docnocontrol_detailedrelevance.csv',
   # 'interactive/doc_re3_interactive.csv',
    'main/doc_re3.csv',
    'main/doc_rollinggpt.csv',
    'main/doc_rollingopt.csv'
]

# Read each CSV file and append it to a list
dataframes_list = []
for csv_file in csv_files:
    file_path = os.path.join(directory_path, csv_file)
    df = pd.read_csv(file_path)
    dataframes_list.append(df)

# Concatenate all DataFrames into one
combined_df = pd.concat(dataframes_list, ignore_index=True)

# Now `combined_df` is a single DataFrame containing all the data


In [60]:
# to be used for  visualization
def abridge_text(passage):
    # Remove any occurrences of '\n\n'
    passage = passage.replace('\n\n', ' ')

    # Split the text into sentences
    sentences = passage.split('.')

    # Initialize variables
    abridged_text = []
    word_count = 0
    sentence_count = 0

    # Iterate through sentences
    for sentence in sentences:
        # Count words in the current sentence
        words_in_sentence = len(sentence.split())
        word_count += words_in_sentence

        # Increment sentence counter
        sentence_count += 1

        # Check if it's the third sentence
        if sentence_count % 10 == 1:
            # If we have counted words from previous sentences, add them before this sentence
            if word_count - words_in_sentence > 0:
                abridged_text.append(f"... [{word_count - words_in_sentence} words]")
            # Add the current sentence
            abridged_text.append(sentence.strip())
            # Reset word count
            word_count = words_in_sentence

    # Handle the case where the last sentence(s) were not added
    if sentence_count % 3 != 1:
        abridged_text.append(f"... [{word_count} words]")

    # Join the abridged sentences back into a string
    return '. '.join(abridged_text)

# Example usage
text = "this was a great day. How are you. I am great. Are you great? Yeah i am fine, but i lost my bike. Oh that sucks."

In [61]:
# calculate dataset statistics
import pandas as pd

# Assuming your DataFrame is named df and is already loaded with the data
df = combined_df
# Calculate the average word count of 'premise'
avg_premise_length = df['premise'].apply(lambda x: len(x.split())).mean()

# Calculate the average word count of 'outline_item'
avg_outline_item_length = df['outline_item'].apply(lambda x: len(x.split())).mean()

# Calculate the average word count of 'passage1'
avg_passage1_length = df['passage1'].apply(lambda x: len(x.split())).mean()

# Calculate the average word count of 'passage2'
avg_passage2_length = df['passage2'].apply(lambda x: len(x.split())).mean()

print(f"Average word count of premise: {avg_premise_length:.2f}")
print(f"Average word count of outline item: {avg_outline_item_length:.2f}")
print(f"Average word count of passage1: {avg_passage1_length:.2f}")
print(f"Average word count of passage2: {avg_passage2_length:.2f}")


Average word count of premise: 48.85
Average word count of outline item: 17.44
Average word count of passage1: 1102.47
Average word count of passage2: 1089.98


In [71]:
import pandas as pd

# Assuming 'df' is your DataFrame containing the reviewer preferences

# Define a mapping function
def map_preference_to_score(preference):
    if preference == 'Passage A':
        return "story1"
    elif preference == 'Passage B':
        return "story2"
    else:  # 'Neither', 'Both', or any other response that does not indicate a clear preference
        return 0

# List the columns that contain the questions
question_columns = [
    'Which passage seems more interesting?',
    'Which passage has a more coherent overall plot?',
    'Which passage is better focused on the given sub-event?'
]

# Apply the mapping function to each question column
for column in question_columns:
    combined_df[column + ' Score'] = combined_df[column].apply(map_preference_to_score)

# Now df has new columns with the scores


In [81]:
# DataFrame with relevance score
df_relevance = combined_df[['outline_item', 'passage1', 'passage2', 'Which passage is better focused on the given sub-event? Score']]
df_relevance.columns = ['premise', 'story1', 'story2', 'relevance_preference']

# DataFrame with coherence score
df_coherence = combined_df[['outline_item', 'passage1', 'passage2', 'Which passage has a more coherent overall plot? Score']]
df_coherence.columns = ['premise', 'story1', 'story2', 'coherence_preference']

In [82]:
# Drop rows where relevance score is zero
df_relevance = df_relevance.loc[df_relevance['relevance_preference'] != 0]
df_relevance = df_relevance.reset_index(drop=True)
# Drop rows where coherence score is zero
df_coherence = df_coherence.loc[df_coherence['coherence_preference'] != 0]
df_coherence = df_coherence.reset_index(drop=True)


In [74]:
# lengths should be different because we drop different rows
print(len(df_relevance))
print(len(df_coherence))

596
551


In [84]:
# Save the DataFrame as a CSV file
csv_file_path = 'relevance_human_data.csv'
df_relevance.to_csv(csv_file_path, index=False)

# Read the CSV file back into a DataFrame
df_reloaded = pd.read_csv(csv_file_path)

# Display the reloaded DataFrame
df_reloaded.head()

Unnamed: 0,premise,story1,story2,relevance_preference
0,The world ends in a cataclysmic event.,"One day, the world just ended.\n\n The sky ...",The world ended. The sky turned red and the gr...,story2
1,Brad realizes the error of his ways and comes ...,"Shannon drove down the road slowly, and her ha...","”\n\nBrad pulled Shannon close, wrapping his a...",story1
2,The group of survivors must face the enemy and...,"continued, “Thank you, we promise that we will...",Jake looked around and saw a group of building...,story2
3,Aimee Kincaid goes home after a long day at wo...,Aimee Kincaid came home to an empty apartment....,Aimee Kincaid came to the door of her apartmen...,story1
4,"Valerie Marx wakes up in a dark basement, boun...",Valerie Marx woke and found herself lying on t...,Valerie woke up sitting on the floor of a conc...,story1


In [85]:
# Save the DataFrame as a CSV file
csv_file_path = 'coherence_human_data.csv'
df_coherence.to_csv(csv_file_path, index=False)

# Read the CSV file back into a DataFrame
df_reloaded = pd.read_csv(csv_file_path)

# Display the reloaded DataFrame
df_reloaded.head()

Unnamed: 0,premise,story1,story2,coherence_preference
0,The world ends in a cataclysmic event.,"One day, the world just ended.\n\n The sky ...",The world ended. The sky turned red and the gr...,story2
1,Brad realizes the error of his ways and comes ...,"Shannon drove down the road slowly, and her ha...","”\n\nBrad pulled Shannon close, wrapping his a...",story1
2,The group of survivors must face the enemy and...,"continued, “Thank you, we promise that we will...",Jake looked around and saw a group of building...,story2
3,Aimee Kincaid goes home after a long day at wo...,Aimee Kincaid came home to an empty apartment....,Aimee Kincaid came to the door of her apartmen...,story1
4,"Valerie Marx wakes up in a dark basement, boun...",Valerie Marx woke and found herself lying on t...,Valerie woke up sitting on the floor of a conc...,story1


In [77]:
def calculate_accuracy(df):
    total_count = len(df)
    story1_count = len(df[df['relevance_preference'] == 'story1'])
    story2_count = len(df[df['relevance_preference'] == 'story2'])

    # Accuracy if we choose story1 all the time
    story1_accuracy = story1_count / total_count

    # Accuracy if we choose story2 all the time
    story2_accuracy = story2_count / total_count

    # Accuracy if we choose the longer story all the time
    df['story1_length'] = df['story1'].apply(lambda x: len(x.split()))
    df['story2_length'] = df['story2'].apply(lambda x: len(x.split()))
    df['longest_text_wins'] = df.apply(
        lambda x: 'story1' if x['story1_length'] > x['story2_length'] else 'story2', axis=1
    )
    df['shortest_text_wins'] = df.apply(
        lambda x: 'story1' if x['story1_length'] < x['story2_length'] else 'story2', axis=1
    )
    longer_story_accuracy = len(df[df['longest_text_wins'] == df['relevance_preference']]) / total_count
    shorter_story_accuracy = len(df[df['shortest_text_wins'] == df['relevance_preference']]) / total_count

    return story1_accuracy, story2_accuracy, longer_story_accuracy, shorter_story_accuracy

calculate_accuracy(df_relevance)

(0.46476510067114096,
 0.535234899328859,
 0.4815436241610738,
 0.5184563758389261)

In [78]:
def calculate_accuracy(df):
    total_count = len(df)
    story1_count = len(df[df['coherence_preference'] == 'story1'])
    story2_count = len(df[df['coherence_preference'] == 'story2'])

    # Accuracy if we choose story1 all the time
    story1_accuracy = story1_count / total_count

    # Accuracy if we choose story2 all the time
    story2_accuracy = story2_count / total_count

    # Accuracy if we choose the longer story all the time
    df['story1_length'] = df['story1'].apply(lambda x: len(x.split()))
    df['story2_length'] = df['story2'].apply(lambda x: len(x.split()))
    df['longest_text_wins'] = df.apply(
        lambda x: 'story1' if x['story1_length'] > x['story2_length'] else 'story2', axis=1
    )
    df['shortest_text_wins'] = df.apply(
        lambda x: 'story1' if x['story1_length'] < x['story2_length'] else 'story2', axis=1
    )
    longer_story_accuracy = len(df[df['longest_text_wins'] == df['coherence_preference']]) / total_count
    shorter_story_accuracy = len(df[df['shortest_text_wins'] == df['coherence_preference']]) / total_count

    return story1_accuracy, story2_accuracy, longer_story_accuracy, shorter_story_accuracy

calculate_accuracy(df_coherence)

(0.455535390199637,
 0.5444646098003629,
 0.47549909255898365,
 0.5245009074410163)

# Heuristics

In [83]:
df_relevance

Unnamed: 0,premise,story1,story2,relevance_preference
0,The world ends in a cataclysmic event.,"One day, the world just ended.\n\n The sky ...",The world ended. The sky turned red and the gr...,story2
1,Brad realizes the error of his ways and comes ...,"Shannon drove down the road slowly, and her ha...","”\n\nBrad pulled Shannon close, wrapping his a...",story1
2,The group of survivors must face the enemy and...,"continued, “Thank you, we promise that we will...",Jake looked around and saw a group of building...,story2
3,Aimee Kincaid goes home after a long day at wo...,Aimee Kincaid came home to an empty apartment....,Aimee Kincaid came to the door of her apartmen...,story1
4,"Valerie Marx wakes up in a dark basement, boun...",Valerie Marx woke and found herself lying on t...,Valerie woke up sitting on the floor of a conc...,story1
...,...,...,...,...
591,Shannon Daniels loses her mother to cancer and...,"When Shannon’s mother died, it was her father ...",Shannon Daniels felt like a lost and lonely tw...,story2
592,Jenna wakes up in the hospital a week later to...,had leapt down from the fence and was now sitt...,<br><br>“How long have I been out?” Jenna aske...,story2
593,"Daisy's daughter, Lisa, becomes her primary ca...",and threw it on the ground. He glanced at a pi...,"<br><br>\t“No, it’s nothing,” I replied as I l...",story1
594,Natalie goes to the police and helps them appr...,"that month, Trevor Lockwood cleared his throat...",?<br><br>Magic</s>Covid restrictions ease in N...,story1


In [89]:
import pandas as pd

df = df_relevance

# Create two new DataFrames, one for each story
df1 = df[['premise', 'story1']].rename(columns={'story1': 'story'})
df1['label'] = (df['relevance_preference'] == 'story1').astype(int)

df2 = df[['premise', 'story2']].rename(columns={'story2': 'story'})
df2['label'] = (df['relevance_preference'] == 'story2').astype(int)

# Combine the new DataFrames
new_df = pd.concat([df1, df2]).reset_index(drop=True)

print(new_df)


# Assuming new_df is your DataFrame after the transformation

# Find duplicates in the 'story' column
duplicates = new_df[new_df.duplicated('story', keep=False)]

# Group by 'story' and check if there's any variation in 'label'
inconsistencies = duplicates.groupby('story').agg({'label': pd.Series.nunique})

# Filter to keep only those with inconsistent labels
inconsistent_stories = inconsistencies[inconsistencies['label'] > 1]

print("Inconsistent Stories:")
print(inconsistent_stories)

# Assuming new_df is your DataFrame after the transformation

# Find duplicates in the 'story' column
duplicates = new_df[new_df.duplicated('story', keep=False)]

# Group by 'story' and check if there's any variation in 'label'
inconsistencies = duplicates.groupby('story').agg({'label': pd.Series.nunique})

# Get stories with inconsistent labels
inconsistent_stories = inconsistencies[inconsistencies['label'] > 1].index.tolist()

# Filter out rows with inconsistent stories
consistent_df = new_df[~new_df['story'].isin(inconsistent_stories)]

print("DataFrame after removing inconsistent rows:")
print(consistent_df)



                                                premise  \
0                The world ends in a cataclysmic event.   
1     Brad realizes the error of his ways and comes ...   
2     The group of survivors must face the enemy and...   
3     Aimee Kincaid goes home after a long day at wo...   
4     Valerie Marx wakes up in a dark basement, boun...   
...                                                 ...   
1187  Shannon Daniels loses her mother to cancer and...   
1188  Jenna wakes up in the hospital a week later to...   
1189  Daisy's daughter, Lisa, becomes her primary ca...   
1190  Natalie goes to the police and helps them appr...   
1191  The group must overcome obstacles such as hung...   

                                                  story  label  
0     One day, the world just ended.\n\n    The sky ...      0  
1     Shannon drove down the road slowly, and her ha...      1  
2     continued, “Thank you, we promise that we will...      0  
3     Aimee Kincaid came home t

In [91]:
import pandas as pd

df = df_coherence

# Create two new DataFrames, one for each story
df1 = df[['premise', 'story1']].rename(columns={'story1': 'story'})
df1['label'] = (df['coherence_preference'] == 'story1').astype(int)

df2 = df[['premise', 'story2']].rename(columns={'story2': 'story'})
df2['label'] = (df['coherence_preference'] == 'story2').astype(int)

# Combine the new DataFrames
new_df = pd.concat([df1, df2]).reset_index(drop=True)

print(new_df)


# Assuming new_df is your DataFrame after the transformation

# Find duplicates in the 'story' column
duplicates = new_df[new_df.duplicated('story', keep=False)]

# Group by 'story' and check if there's any variation in 'label'
inconsistencies = duplicates.groupby('story').agg({'label': pd.Series.nunique})

# Filter to keep only those with inconsistent labels
inconsistent_stories = inconsistencies[inconsistencies['label'] > 1]

print("Inconsistent Stories:")
print(inconsistent_stories)

# Assuming new_df is your DataFrame after the transformation

# Find duplicates in the 'story' column
duplicates = new_df[new_df.duplicated('story', keep=False)]

# Group by 'story' and check if there's any variation in 'label'
inconsistencies = duplicates.groupby('story').agg({'label': pd.Series.nunique})

# Get stories with inconsistent labels
inconsistent_stories = inconsistencies[inconsistencies['label'] > 1].index.tolist()

# Filter out rows with inconsistent stories
consistent_df = new_df[~new_df['story'].isin(inconsistent_stories)]

print("DataFrame after removing inconsistent rows:")
print(consistent_df)



                                                premise  \
0                The world ends in a cataclysmic event.   
1     Brad realizes the error of his ways and comes ...   
2     The group of survivors must face the enemy and...   
3     Aimee Kincaid goes home after a long day at wo...   
4     Valerie Marx wakes up in a dark basement, boun...   
...                                                 ...   
1097  Shannon Daniels loses her mother to cancer and...   
1098  Jenna wakes up in the hospital a week later to...   
1099  Daisy's daughter, Lisa, becomes her primary ca...   
1100  Natalie goes to the police and helps them appr...   
1101  The group must overcome obstacles such as hung...   

                                                  story  label  
0     One day, the world just ended.\n\n    The sky ...      0  
1     Shannon drove down the road slowly, and her ha...      1  
2     continued, “Thank you, we promise that we will...      0  
3     Aimee Kincaid came home t

In [33]:
api_key = ""
from openai import OpenAI
client = OpenAI(
  api_key=api_key
)


def get_gpt3_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [34]:
"Heuristic metric for "
from sklearn.metrics.pairwise import cosine_similarity


def generate_summary(premise):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a summarization AI."},
            {"role": "user", "content": f"Please summarize the following text in 100 words:\n\n{premise}"}
        ]
    )
    print("Successfully generated summary")
    return response.choices[0].message.content


# Function to calculate cosine similarity between two embeddings
def calculate_cosine_similarity_2(embedding1, embedding2):
    return cosine_similarity([embedding1], [embedding2])[0][0]

# Main function to compute the relevance score
def calculate_relevance_score(story, premise):
    # Step 1: Summarize the story
    story_summary = generate_summary(story)

    # Step 2: Get embeddings for the summary and the premise
    summary_embedding = get_gpt3_embedding(story_summary)
    premise_embedding = get_gpt3_embedding(premise)

    # Step 3: Calculate cosine similarity between the summary and premise embeddings
    relevance = calculate_cosine_similarity_2(summary_embedding, premise_embedding)

    return relevance

# Example usage:
story_text = text
premise_text = "In this captivating story, a timid and socially awkward programmer stumbles upon an enigmatic app capable of foreseeing and influencing the future. As they gain more predictions, their self-assurance surges, enabling them to make daring choices that defy conventional limits. Yet, with this immense power, they realize the weighty burden of responsibility. As unforeseen repercussions arise, they find themselves grappling with a crucial decision: Are the advantages of knowing the future worth the inevitable drawbacks? Can they navigate the risks or will the overwhelming power consume them entirely? This compelling narrative explores the intricate relationship between power and consequence."
premise_text2 = "In a bleak, post-apocalyptic world, a group of survivors unites to construct a new society based on compassion, diversity, and advanced technology. Yet, they discover that others have also endured, leading to clashes between differing visions for rebuilding humanity. As an extremist group emerges, will the socially conscious prevail over their reactionary adversaries?"
score = relevance_score(story_text, premise_text2)
print(f"Relevance Score: {score}")

NameError: name 'relevance_score' is not defined

In [31]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def group_sentences(text, group_size):
    # Splits the text into groups of sentences based on the specified group size.
    sentences = text.split('. ')
    return [' '.join(sentences[i:i+group_size]) for i in range(0, len(sentences), group_size)]

def calculate_cosine_similarity(embeddings):
    # Calculates cosine similarity between consecutive groups of embeddings.
    similarities = []
    for i in range(len(embeddings) - 1):
        similarity = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0]
        similarities.append(similarity)
    return similarities

def calculate_coherence_score(text):
    group_size = 3
    # Calculates the final coherence score as the average of the cosine similarities.
    similarities = calculate_cosine_similarity([get_gpt3_embedding(group) for group in group_sentences(text, group_size)])
    if similarities:
        return np.mean(similarities)
    else:
        return 0  # Return 0 if there are no similarities (e.g., if the text is too short)

# Example text
text = "In the sprawling city of Techhaven, where innovation and technology were the lifeblood of its inhabitants, there lived a programmer named Elliot. With a quiet demeanor and a mind wired for coding, Elliot spent most of their days confined to the walls of their cozy apartment, navigating the complexities of the digital world.\n\nElliot was brilliant but socially awkward, preferring the company of lines of code to the hustle and bustle of the outside world. Yet, beneath their timid exterior, there was a burning curiosity, a desire to break free of their own limitations.\n\nOne fateful evening, while browsing a dark corner of the internet, Elliot stumbled upon a mysterious app called \"Oraculus.\" Its description promised the ability to see into the future and influence it. Intrigued, Elliot succumbed to their curiosity and downloaded the enigmatic app onto their computer.\n\nAs Elliot opened the app, lines of code cascaded across the screen, and a voice whispered from within the digital abyss, \"Welcome, seeker of knowledge. I am Oraculus, the harbinger of destinies.\"\n\nBewildered and uncertain, Elliot hesitantly spoke into their microphone, \"How does it work? Can you really predict the future?\"\n\nOraculus replied, \"Yes, my child. I draw upon vast amounts of data and complex algorithms to foresee what is to come. But remember, with great power comes great responsibility.\"\n\nWith trepidation and excitement swirling within, Elliot began asking Oraculus questions about their personal life. The app seemed to have an infinite knowledge, providing detailed predictions about friendships, opportunities, and even love. Emboldened by these insights, Elliot's self-assurance surged, allowing them to venture out into the world.\n\nArmed with the knowledge of what awaited them, Elliot made daring choices that defied conventional limits. They pursued connections they once thought were unattainable, embarked on ambitious projects, and even took on challenging social situations with newfound confidence.\n\nThe city took notice of Elliot's sudden transformation. Colleagues were captivated by their innovative ideas, friends were inspired by their unwavering determination, and romantic prospects were enamored by their newfound charm. Elliot had become a force to be reckoned with, and their own dreams seemed closer than ever before.\n\nBut as Elliot basked in the glory of their newfound power, they also began to see the unforeseen repercussions. Friends felt overshadowed by Elliot's invincible confidence, the projects they had once cherished lost their vitality, and the relationships they had painstakingly built suffered under the weight of their predetermined destiny.\n\nCaught in the clash between the power of the future and the reality of the present, Elliot found themselves grappling with a crucial decision. Were the advantages of knowing the future worth the inevitable drawbacks? Could they navigate the risks and find a path that balanced their ambitions with the well-being of those around them? Or would the overwhelming power of Oraculus consume them entirely?\n\nIn the depths of their struggle, Elliot discovered the true meaning of responsibility. They realized that power, regardless of its source, could blind one to the needs and desires of others. Elliot knew they had to use their newfound abilities for the greater good, not just for personal gain.\n\nWith determination in their heart, Elliot took a leap of faith, deleting Oraculus from their computer. The app may have provided glimpses into the future, but it had also taken away the joys of uncertainty, the beauty of the unexpected.\n\nBack in the embrace of their humble apartment, Elliot returned to their old ways, content with the simplicity of code and the knowledge that the future was uncertain yet full of infinite possibilities. They vowed to use their programming skills to build a better world, one where power was tempered with compassion and decisions were made with careful consideration.\n\nIn the end, this captivating story reminds us that power, no matter how beguiling, must be wielded with a deep awareness of its consequences. It is the choices we make in the face of great power that define us, and it is in the embrace of uncertainty that we find true growth and fulfillment."

# Group sentences
group_size = 3  # Change this to the desired number of sentences per group
groups = group_sentences(text, group_size)

# Get embeddings for each group
embeddings = [get_gpt3_embedding(group) for group in groups]

# Calculate cosine similarities
similarities = calculate_cosine_similarity(embeddings)

# Calculate final coherence score
coherence_score = calculate_coherence_score()

# Output the coherence score
print(f"Coherence Score: {coherence_score}")


TypeError: calculate_coherence_score() missing 1 required positional argument: 'text'

In [12]:
import pandas as pd
df = pd.read_csv('story_generation_output.csv')

In [28]:
df

Unnamed: 0,prompt,premise-len,premise,response,response-len,ratio
0,Create a narrative of over 1000 words from thi...,100,"In this captivating story, a timid and sociall...",Emma Thompson had always been a creature of ha...,650,6.50
1,Develop a story from this starting point; aim ...,100,"In this captivating story, a timid and sociall...","In the sprawling city of Techhaven, where inno...",671,6.71
2,"Craft a detailed story based on this premise, ...",100,"In this captivating story, a timid and sociall...","In the bustling city of Techville, there lived...",772,7.72
3,Compose a story that starts with this premise ...,100,"In this captivating story, a timid and sociall...","Once upon a time, in a small bustling city, th...",693,6.93
4,Create a narrative of over 1000 words from thi...,100,"In this riveting story, a brilliant scientist ...",Title: The Elixir of Mortality\n\nChapter 1: T...,848,8.48
...,...,...,...,...,...,...
76,Create a narrative of over 1000 words from thi...,10,Dr. Alex Blake strives to transfer his conscio...,Dr. Alex Blake paced back and forth in his lab...,786,78.60
77,Develop a story from this starting point; aim ...,10,Dr. Alex Blake strives to transfer his conscio...,Dr. Alex Blake had dedicated his entire life t...,799,79.90
78,"Craft a detailed story based on this premise, ...",10,Dr. Alex Blake strives to transfer his conscio...,Dr. Alex Blake had dedicated his entire life t...,962,96.20
79,Compose a story that starts with this premise ...,10,Dr. Alex Blake strives to transfer his conscio...,Dr. Alex Blake had dedicated his entire life t...,808,80.80


In [32]:
def calculate_coherence_score_row(row):
  return calculate_coherence_score(row['response'])

In [33]:
def calculate_relevance_score_row(row):
  return calculate_relevance_score(row['response'], row['premise'])

In [35]:
# Apply your function to each row of the DataFrame
df['coherence_score'] = df.apply(calculate_coherence_score_row, axis=1)
df['relevance_score'] = df.apply(calculate_relevance_score_row, axis=1)

# Calculate the average of the computed function values
model_coherence_score = df['coherence_score'].mean()
model_relevance_score = df['relevance_score'].mean()

# Output the average function value
print(model_coherence_score)
print(model_relevance_score)

Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successf

In [15]:
import json

# Replace 'file.json' with your actual file path
file_path = '/Users/mbondarenko/Desktop/long-narrative-generation/doc-storygen-v2/story_annotation_v2.json'

# Open the file and read the JSON content
with open(file_path, 'r') as file:
    data = json.load(file)

In [52]:
import pandas as pd

# Initialize a list to hold the extracted premise and plan for each dictionary
extracted_data = []

# Loop through each dictionary in the list
for entry in data[:20]:
    # Extract the 'premise'
    premise = entry.get('premise', None)
    # Assuming you want to use 'plan1' as the response, otherwise add your condition for 'plan2'
    response = entry.get('plan1', None)  # or entry.get('plan2') based on your condition
    # Append the extracted information as a tuple to the list
    extracted_data.append((premise, response))

# Convert the list of tuples into a DataFrame with the specified column names
df_extracted = pd.DataFrame(extracted_data, columns=['premise', 'response'])

# Save the DataFrame to a CSV file, if needed
# df_extracted.to_csv('extracted_data.csv', index=False)


In [53]:
df_extracted

Unnamed: 0,premise,response
0,"In the wake of a global pandemic, a robot comp...",Settings: The story is set in a post-apocalypt...
1,"""It's not easy creating art for a living."" As ...","Premise: ""It's not easy creating art for a liv..."
2,An elderly woman discovers a magical tree that...,Premise: An elderly woman discovers a magical ...
3,The story unfolds from the point of view of a ...,Premise: The story unfolds from the point of v...
4,The Last Jedi,Premise: The Last Jedi\n\nSettings: The story ...
5,"A tech-obsessed businessman, desperate to outs...","Premise: A tech-obsessed businessman, desperat..."
6,"In a small town, a young couple discovers that...","Premise: In a small town, a young couple disco..."
7,In a world where technology has advanced to th...,Premise: In a world where technology has advan...
8,"Laura, a timid and introverted employee at a p...","Premise: Laura, a timid and introverted employ..."
9,A young man named Henry wakes up one morning t...,Premise: A young man named Henry wakes up one ...


In [54]:
# Apply your function to each row of the DataFrame
df_extracted['coherence_score'] = df_extracted.apply(calculate_coherence_score_row, axis=1)
df_extracted['relevance_score'] = df_extracted.apply(calculate_relevance_score_row, axis=1)

# Calculate the average of the computed function values
model_coherence_score = df_extracted['coherence_score'].mean()
model_relevance_score = df_extracted['relevance_score'].mean()

# Output the average function value
print(model_coherence_score)
print(model_relevance_score)

Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
Successfully generated summary
0.9017929000502551
0.9153812313145261


In [49]:
df_extracted['premise'][5]

'A tech-obsessed businessman, desperate to outsource his daily chores to optimize his schedule and maximize his productivity, purchases a newly-released artificial intelligence designed to manage his entire household. As the AI begins to take over his life, however, he realizes too late that he has sacrificed his human connections in pursuit of efficiency and must learn to balance his tech dependencies with meaningful relationships.'