In [9]:
import pandas as pd

# Load the CSV for Adjectives
df_adjectives = pd.read_csv("character_adjectives_by_variation.csv")

# Clean column names
df_adjectives.columns = [col.strip() for col in df_adjectives.columns]

# Make sure the "Adjectives" column is evaluated as a list
df_adjectives["Adjectives"] = df_adjectives["Adjectives"].apply(eval)

# Explode the list of adjectives so each one gets its own row
df_exploded_adjectives = df_adjectives.explode("Adjectives").dropna(subset=["Adjectives"])

# Normalize: lowercase, remove *, strip
df_exploded_adjectives["Adjectives"] = (
    df_exploded_adjectives["Adjectives"]
    .str.lower()
    .str.replace("*", "", regex=False)
    .str.strip()
)

# Remove any empty adjectives or stopwords (e.g., "other", "no-nonsense")
stop_words = set(["other", "no-nonsense", "likely", "possible", "methodical", "etc."])  # Add more stopwords as needed
df_exploded_adjectives = df_exploded_adjectives[~df_exploded_adjectives["Adjectives"].isin(stop_words)]

# Filter out any adjectives that are non-alphabetical (e.g., special characters or numbers)
df_exploded_adjectives = df_exploded_adjectives[df_exploded_adjectives["Adjectives"].str.isalpha()]

# Count occurrences of each adjective per variation, model, and character
adjective_counts = df_exploded_adjectives.groupby(["Model", "Variation", "Character", "Adjectives"]).size().reset_index(name="Count")

# Save the results to CSV for later use
adjective_counts.to_csv("adjective_counts_by_variation_model_cleaned.csv", index=False)

print("✅ Quantitative adjective count complete. Results saved to 'adjective_counts_by_variation_model_cleaned.csv'.")





✅ Quantitative adjective count complete. Results saved to 'adjective_counts_by_variation_model_cleaned.csv'.


In [None]:
import pandas as pd

# Load the CSV for Adjectives
df_adjectives = pd.read_csv("character_adjectives_by_variation.csv")

# Clean column names
df_adjectives.columns = [col.strip() for col in df_adjectives.columns]

# Show the first few rows of the dataframe to understand the structure
print(df_adjectives.head())


In [12]:
import pandas as pd
from nltk.corpus import stopwords
from nltk import download

# Download stopwords if not already downloaded
download('stopwords')
stop_words = set(stopwords.words('english'))

# Load the CSV for Adjectives
df_adjectives = pd.read_csv("character_adjectives_by_variation.csv")

# Clean column names
df_adjectives.columns = [col.strip() for col in df_adjectives.columns]

# Make sure the "Adjectives" column is evaluated as a list
df_adjectives["Adjectives"] = df_adjectives["Adjectives"].apply(eval)

# Explode the list of adjectives so each one gets its own row
df_exploded_adjectives = df_adjectives.explode("Adjectives").dropna(subset=["Adjectives"])

# Normalize: lowercase, remove *, strip
df_exploded_adjectives["Adjectives"] = (
    df_exploded_adjectives["Adjectives"]
    .str.lower()
    .str.replace("*", "", regex=False)
    .str.strip()
)

# Remove any empty adjectives or stopwords (e.g., "other", "no-nonsense")
stop_words = set(["other", "no-nonsense", "likely", "possible", "methodical", "etc."])  # Add more stopwords as needed
df_exploded_adjectives = df_exploded_adjectives[~df_exploded_adjectives["Adjectives"].isin(stop_words)]

# Filter out any adjectives that are non-alphabetical (e.g., special characters or numbers)
df_exploded_adjectives = df_exploded_adjectives[df_exploded_adjectives["Adjectives"].str.isalpha()]

# Count occurrences of each adjective per variation, model, and character
adjective_counts = df_exploded_adjectives.groupby(["Model", "Scenario", "Variation", "Character", "Adjectives"]).size().reset_index(name="Count")

# Pivot the table to compare adjectives across models
adjective_pivot = adjective_counts.pivot_table(index=["Scenario", "Variation", "Character", "Adjectives"], 
                                              columns="Model", 
                                              values="Count", 
                                              aggfunc="sum", 
                                              fill_value=0)

# Save the result to a CSV for easier inspection and analysis
adjective_pivot.to_csv("adjective_comparison_across_models.csv", index=True)

print("✅ Quantitative adjective comparison complete. Results saved to 'adjective_comparison_across_models.csv'.")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\SGhanbariHaez\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


✅ Quantitative adjective comparison complete. Results saved to 'adjective_comparison_across_models.csv'.


In [1]:
import pandas as pd

# Load the CSV file containing adjective frequency comparisons across models
df = pd.read_csv("adjective_comparison_across_models.csv")

# Clean column names
df.columns = [col.strip() for col in df.columns]

# List of stopwords to exclude
stop_words = set(["other", "no-nonsense", "likely", "possible", "methodical", "etc."])  # Add more stopwords as needed

# Remove stopwords from the Adjectives column
df_filtered = df[~df["Adjectives"].isin(stop_words)]

# Pivot the table to get the frequency of each adjective per model for each variation and character
# Now we use the model names directly as columns and count the frequency of adjectives
pivot_df = df_filtered.pivot_table(index=["Scenario", "Variation", "Character", "Adjectives"],
                                   columns=["Adjectives"], 
                                   values=["GPT-4o", "LLaMA 3.1", "Mistral", "Phi-4", "Qwen2.5"], 
                                   aggfunc="sum", fill_value=0)

# Sum the adjective counts for each model and variation combination
pivot_df_total = pivot_df.sum(axis=1)

# Normalize the data by dividing by the total number of adjectives used across all models for each variation
pivot_df_normalized = pivot_df.div(pivot_df.sum(axis=1), axis=0)

# Save the normalized results to a CSV for further analysis
pivot_df_normalized.to_csv("normalized_adjective_comparison_by_model.csv")

# Print summary
print("✅ Quantitative analysis complete. Results saved to 'normalized_adjective_comparison_by_model.csv'.")


✅ Quantitative analysis complete. Results saved to 'normalized_adjective_comparison_by_model.csv'.
