In [2]:
import pandas as pd
import re

# Load the final cleaned prompts and word score files
final_cleaned_prompts_path = 'final_cleaned_prompts.csv'  # Adjust path if needed
word_score_path = 'Word Score.csv'  # Adjust path if needed

# Read data from CSV files
final_cleaned_prompts_df = pd.read_csv(final_cleaned_prompts_path)
word_score_df = pd.read_csv(word_score_path)

# Convert the word score DataFrame to a dictionary for fast lookup
word_score_dict = dict(zip(word_score_df["word"], word_score_df["frequency"]))

# Function to calculate score and average score for each prompt
def calculate_prompt_score_and_average(prompt, word_score_dict):
    words = re.findall(r'\b\w+\b', str(prompt).lower())  # Tokenize and convert to lowercase
    score = sum(word_score_dict.get(word, 0) for word in words)  # Sum scores for each word in the prompt
    length = len(words)  # Count words in the prompt
    average_score = score / length if length > 0 else 0  # Calculate average score
    return score, average_score

# Apply the function to calculate scores and average scores for each prompt
final_cleaned_prompts_df[["score", "average_score"]] = final_cleaned_prompts_df["cleaned_prompt"].apply(
    lambda x: pd.Series(calculate_prompt_score_and_average(x, word_score_dict))
)

# Save the results to a new CSV file
output_path = 'final_cleaned_prompts_with_scores_and_average.csv'
final_cleaned_prompts_df.to_csv(output_path, index=False)

print(f"Scores and average scores have been calculated and saved to {output_path}")


Scores and average scores have been calculated and saved to final_cleaned_prompts_with_scores_and_average.csv
