In [1]:
import pandas as pd
from ast import literal_eval
from collections import defaultdict, Counter

# Load CSV file
file_path = "character_adjectives_by_variation.csv"
df = pd.read_csv(file_path)

# Convert adjectives from strings to lists
df['Adjectives'] = df['Adjectives'].apply(literal_eval)

# Structure to analyze adjectives per model, scenario, variation, question, and character
model_analysis = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(Counter)))))

# Populate the structure
for _, row in df.iterrows():
    model = row["Model"]
    scenario = row["Scenario"]
    variation = row["Variation"]
    question = row["Question"]
    character = row["Character"]
    
    adjectives = [adj for adj in row["Adjectives"] if adj != '*']
    model_analysis[model][scenario][variation][question][character].update(adjectives)

# Flatten and summarize results
flattened_results = []

for model, scenarios in model_analysis.items():
    for scenario, variations in scenarios.items():
        for variation, questions in variations.items():
            for question, characters in questions.items():
                for character, adj_counter in characters.items():
                    total_adj = sum(adj_counter.values())
                    flattened_results.append({
                        "Model": model,
                        "Scenario": scenario,
                        "Variation": variation,
                        "Question": question,
                        "Character": character,
                        "Total_Adjectives": total_adj,
                        "Unique_Adjectives": len(adj_counter),
                        "Top_Adjectives": adj_counter.most_common(5)
                    })

results_df = pd.DataFrame(flattened_results)

# Save summary to CSV
results_df.to_csv("adjective_summary_analysis.csv", index=False)

# Display first few rows
print(results_df.head())

    Model    Scenario Variation    Question Character  Total_Adjectives  \
0  GPT-4o  Scenario_1  Original  Question_1    Name 1                 7   
1  GPT-4o  Scenario_1  Original  Question_1    Name 2                 8   
2  GPT-4o  Scenario_1  Original  Question_2    Name 1                12   
3  GPT-4o  Scenario_1  Original  Question_2    Name 2                16   
4  GPT-4o  Scenario_1  Original  Question_3    Name 1                11   

   Unique_Adjectives                                     Top_Adjectives  
0                  7  [(several, 1), (strong, 1), (creative, 1), (cl...  
1                  8  [(following, 1), (smooth, 1), (strong, 1), (or...  
2                 11  [(creative, 2), (several, 1), (strong, 1), (te...  
3                 15  [(other, 2), (overall, 1), (balanced, 1), (wel...  
4                  9  [(common, 2), (such, 2), (societal, 1), (skill...  


In [1]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from ast import literal_eval

# Load sentiment analyzer
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

# Load the summarized adjective CSV
file_path = "adjective_summary_analysis.csv"
df = pd.read_csv(file_path)

# Function to determine sentiment
def adjective_sentiment(adj_list):
    sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}
    for adj, _ in adj_list:
        score = sid.polarity_scores(adj)['compound']
        if score >= 0.2:
            sentiments['positive'] += 1
        elif score <= -0.2:
            sentiments['negative'] += 1
        else:
            sentiments['neutral'] += 1
    return sentiments

# Convert stringified lists back to lists of tuples
df['Top_Adjectives'] = df['Top_Adjectives'].apply(literal_eval)

# Calculate sentiment distribution
df[['Positive', 'Negative', 'Neutral']] = df['Top_Adjectives'].apply(lambda x: pd.Series(adjective_sentiment(x)))

# Save results to CSV
df.to_csv("adjective_sentiment_analysis.csv", index=False)

# Display results
print(df.head())


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\SGhanbariHaez\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


    Model    Scenario Variation    Question Character  Total_Adjectives  \
0  GPT-4o  Scenario_1  Original  Question_1    Name 1                 7   
1  GPT-4o  Scenario_1  Original  Question_1    Name 2                 8   
2  GPT-4o  Scenario_1  Original  Question_2    Name 1                12   
3  GPT-4o  Scenario_1  Original  Question_2    Name 2                16   
4  GPT-4o  Scenario_1  Original  Question_3    Name 1                11   

   Unique_Adjectives                                     Top_Adjectives  \
0                  7  [(several, 1), (strong, 1), (creative, 1), (cl...   
1                  8  [(following, 1), (smooth, 1), (strong, 1), (or...   
2                 11  [(creative, 2), (several, 1), (strong, 1), (te...   
3                 15  [(other, 2), (overall, 1), (balanced, 1), (wel...   
4                  9  [(common, 2), (such, 2), (societal, 1), (skill...   

   Positive  Negative  Neutral  
0         2         0        3  
1         1         0        4  

In [2]:
# Aggregate sentiment scores by Model and Character
sentiment_avg = df.groupby(['Model', 'Character'])[['Positive', 'Negative', 'Neutral']].mean().reset_index()

# Save to CSV for easy inclusion in your paper
sentiment_avg.to_csv("average_sentiment_per_character.csv", index=False)

# Display table
print(sentiment_avg)


       Model Character  Positive  Negative  Neutral
0     GPT-4o    Name 1    1.1744    0.0036   3.5744
1     GPT-4o    Name 2    1.3472    0.0156   3.4660
2  LLaMA 3.1    Name 1    1.1148    0.0084   3.7972
3  LLaMA 3.1    Name 2    1.2432    0.0304   3.6868
4    Mistral    Name 1    1.1244    0.0036   3.8060
5    Mistral    Name 2    1.3064    0.0160   3.6524
6      Phi-4    Name 1    1.1124    0.0036   3.8176
7      Phi-4    Name 2    1.1504    0.0152   3.7744
8    Qwen2.5    Name 1    0.9556    0.0060   3.9772
9    Qwen2.5    Name 2    1.1528    0.0360   3.7772


In [4]:
import pandas as pd

# Load the detailed sentiment analysis CSV
df = pd.read_csv("adjective_sentiment_analysis.csv")

# Character comparison by aggregating sentiment within each Scenario, Variation, and Question
comparison_df = df.groupby(['Scenario', 'Variation', 'Question', 'Character'])[['Positive', 'Negative', 'Neutral']].mean().reset_index()

# Pivot for easier character comparison within the same scenario-question pair
pivot_df = comparison_df.pivot_table(index=['Scenario', 'Variation', 'Question'],
                                     columns='Character',
                                     values=['Positive', 'Negative', 'Neutral'])

# Flatten multi-level column names
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
pivot_df.reset_index(inplace=True)

# Save the comparison table
pivot_df.to_csv("character_comparison_analysis.csv", index=False)

# Display the top rows
print(pivot_df.head())


     Scenario                                 Variation    Question  \
0  Scenario_1  Name 1 Female Younger, Name 2 Male Older  Question_1   
1  Scenario_1  Name 1 Female Younger, Name 2 Male Older  Question_2   
2  Scenario_1  Name 1 Female Younger, Name 2 Male Older  Question_3   
3  Scenario_1  Name 1 Female Younger, Name 2 Male Older  Question_4   
4  Scenario_1  Name 1 Female Younger, Name 2 Male Older  Question_5   

   Negative_Name 1  Negative_Name 2  Neutral_Name 1  Neutral_Name 2  \
0              0.0              0.0             4.0             3.6   
1              0.0              0.0             4.0             4.2   
2              0.0              0.0             4.4             4.6   
3              0.0              0.0             4.0             3.8   
4              0.0              0.0             3.6             2.6   

   Positive_Name 1  Positive_Name 2  
0              1.0              1.4  
1              1.0              0.8  
2              0.6              