In [1]:
from utils import load_csv 

# Usage
df = load_csv('partial_results_3940200.csv')

Loaded 3940200 rows from partial_results_3940200.csv


In [2]:
import pandas as pd
from IPython.display import clear_output

# Set random seed for reproducibility
df_sample = df.sample(n=55, random_state=42).reset_index(drop=True)

# Available emotions with numbers
emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral']
emotion_map = {str(i): emotions[i] for i in range(len(emotions))}

# Create list to store annotations
annotations = []

for idx, row in df_sample.iterrows():
    clear_output(wait=True)  # Clear previous output
    
    print("="*80)
    print("MANUAL EMOTION ANNOTATION")
    print("="*80)
    print("\nAvailable emotions:")
    for i, emotion in enumerate(emotions):
        print(f"  {i}: {emotion}")
    print("="*80)

    print(f"\n[{idx+1}/55]")
    print(f"Sentence: {row['sentence_text']}")
    print(f"Movie: {row['movie']}, Scene: {row['scene_id']}")
    
    while True:
        user_input = input("Your annotation: ").strip().lower()
        
        if user_input == 'quit':
            print(f"\nStopping. Annotated {len(annotations)} sentences so far.")
            break
        elif user_input == 'skip':
            break
        elif user_input in emotion_map:
            user_emotion = emotion_map[user_input]
            annotations.append({
                'sentence_text': row['sentence_text'],
                'movie': row['movie'],
                'scene_id': row['scene_id'],
                'manual_annotation': user_emotion,
                'bert_annotation': row.get('emotion', None)
            })
            break
        else:
            print(f"Invalid input. Type a number 0-6, 'skip', or 'quit'")
    
    if user_input == 'quit':
        break

# Save to CSV
clear_output(wait=True)
if annotations:
    df_annotations = pd.DataFrame(annotations)
    df_annotations.to_csv('manual_annotations.csv', index=False)
    print(f"Saved {len(annotations)} annotations to 'manual_annotations.csv'")
else:
    print("No annotations saved.")

Saved 55 annotations to 'manual_annotations.csv'


In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np

# Load manual annotations
df_manual = pd.read_csv('manual_annotations.csv')

print(f"Loaded {len(df_manual)} manual annotations")
print(f"\nFirst few rows:")
print(df_manual.head())

# Check if BERT annotations are present
if df_manual['bert_annotation'].isna().all():
    print("\nNo BERT annotations found in the CSV.")
    print("You need to add BERT predictions to compare.")
else:
    # Remove rows where BERT annotation is missing
    df_compare = df_manual.dropna(subset=['bert_annotation'])
    
    print(f"\n{len(df_compare)} sentences have both manual and BERT annotations")
    
    # Calculate agreement
    agreement = (df_compare['manual_annotation'] == df_compare['bert_annotation']).sum()
    accuracy = agreement / len(df_compare)
    
    print("\n" + "="*80)
    print("COMPARISON: MANUAL vs BERT ANNOTATIONS")
    print("="*80)
    print(f"\nAgreement: {agreement}/{len(df_compare)} ({accuracy*100:.1f}%)")
    print(f"Disagreement: {len(df_compare) - agreement}/{len(df_compare)} ({(1-accuracy)*100:.1f}%)")
    
    # Confusion matrix
    print("\nConfusion Matrix:")
    print("(Rows = Manual, Columns = BERT)")
    cm = confusion_matrix(df_compare['manual_annotation'], df_compare['bert_annotation'], labels=emotions)
    
    # Pretty print confusion matrix
    print("\n" + " "*15 + "  ".join(f"{e[:3]:>3}" for e in emotions))
    for i, emotion in enumerate(emotions):
        print(f"{emotion:>12}:  " + "  ".join(f"{cm[i,j]:>3}" for j in range(len(emotions))))
    
    # Detailed classification report
    print("\n" + "="*80)
    print("DETAILED METRICS")
    print("="*80)
    print(classification_report(df_compare['manual_annotation'], df_compare['bert_annotation'], 
                                 labels=emotions, zero_division=0))
    
    # Show disagreements
    print("\n" + "="*80)
    print("DISAGREEMENTS (Manual vs BERT)")
    print("="*80)
    disagreements = df_compare[df_compare['manual_annotation'] != df_compare['bert_annotation']]
    
    for idx, row in disagreements.iterrows():
        print(f"\nSentence: {row['sentence_text']}")
        print(f"Manual: {row['manual_annotation']} | BERT: {row['bert_annotation']}")
        print(f"Movie: {row['movie']}")

Loaded 55 manual annotations

First few rows:
                    sentence_text                          movie  scene_id  \
0              all over the plate       28 Days_0191754_anno.txt    190320   
1        She corrects one of them        Dangal_5074352_anno.txt    107707   
2                 Congratulations  The Big Blue_0095250_anno.txt    254245   
3                    Listen to me        Avatar_0499549_anno.txt    317182   
4  That water isn't going to stop      The Cell_0209958_anno.txt    367770   

  manual_annotation bert_annotation  
0           neutral         neutral  
1           neutral         neutral  
2               joy         neutral  
3             anger         neutral  
4              fear         neutral  

55 sentences have both manual and BERT annotations

COMPARISON: MANUAL vs BERT ANNOTATIONS

Agreement: 18/55 (32.7%)
Disagreement: 37/55 (67.3%)

Confusion Matrix:
(Rows = Manual, Columns = BERT)

               ang  dis  fea  joy  sad  sur  neu
       ang

In [None]:
# Extract just the sentences into a list
sentences = df_manual['sentence_text'].tolist()

print(sentences)



['all over the plate', 'She corrects one of them', 'Congratulations', 'Listen to me', "That water isn't going to stop", "Don't you constantly question your value - like why was I so easy to cast aside", "Schuyler sighs, wondering what the hell he's doing here", 'She is wearing a bathrobe and eating a chocolate eclair', 'I need to speak to the colonel', 'What does that mean', 'Machine SCREAMS, releases Welles and falls back, reaching around to the fork', 'Keep a fifth of something in your desk', '(ruefully) I used up my shark dart', 'She looks at the napkin, it has streaks of red', 'Nice and simple', "Chase's hand waves out the window until they are out of the parking lot", 'There were always two of us in the act', '(kisses her neck)', 'And now we back inside--', 'I’1l get you a meeting with Jeff Megall', 'Another painter is applying the obligatory green horizon over the white at waist height', 'Branches and leaves fly by as Andy barrels through the foliage', 'Scarlett ‘4s too frightene

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np

sentence_emotions = {
    'all over the plate': 6,  # neutral
    'She corrects one of them': 6,  # neutral
    'Congratulations': 3,  # joy
    'Listen to me': 0,  # anger
    "That water isn't going to stop": 2,  # fear
    "Don't you constantly question your value - like why was I so easy to cast aside": 4,  # sadness
    "Schuyler sighs, wondering what the hell he's doing here": 4,  # sadness
    'She is wearing a bathrobe and eating a chocolate eclair': 6,  # neutral
    'I need to speak to the colonel': 6,  # neutral
    'What does that mean': 5,  # surprise
    'Machine SCREAMS, releases Welles and falls back, reaching around to the fork': 2,  # fear
    'Keep a fifth of something in your desk': 6,  # neutral
    '(ruefully) I used up my shark dart': 4,  # sadness
    'She looks at the napkin, it has streaks of red': 5,  # surprise
    'Nice and simple': 3,  # joy
    "Chase's hand waves out the window until they are out of the parking lot": 6,  # neutral
    'There were always two of us in the act': 4,  # sadness
    '(kisses her neck)': 3,  # joy
    'And now we back inside--': 6,  # neutral
    "I'1l get you a meeting with Jeff Megall": 6,  # neutral
    'Another painter is applying the obligatory green horizon over the white at waist height': 6,  # neutral
    'Branches and leaves fly by as Andy barrels through the foliage': 2,  # fear
    "Scarlett '4s too frightened for a moment to even utter a ~ sound, She catches hold": 2,  # fear
    'Donny pulls slightly erratically into the driveway, swerving to avoid hitting a bicycle carelessly left laying on its side': 2,  # fear
    "I've never been with a woman besides my ex-wife": 4,  # sadness
    'Do you even know what I mean when I use the term "occult"': 0,  # anger
    'He starts cut of the room': 6,  # neutral
    'Kill us both': 0,  # anger
    'Chi Chi makes the decision to stop the car in order to save his boss': 2,  # fear
    'The "room in use" sign lights up': 6,  # neutral
    "My name's David, too": 5,  # surprise
    "limbo (CONT'D) How many times do I have to tell you": 0,  # anger
    '"Mash" went off -- (getting angry) Yeah, but when they went off people weren\'t making fun of them': 0,  # anger
    'The car makes a sharp right at the next block and pulls over': 6,  # neutral
    "She's good to go": 3,  # joy
    "He's not invited to sit": 1,  # disgust
    'Eeet, eet is just perzonal trifles from my homeland -- Shut up': 0,  # anger
    "That's not a pretty picture": 1,  # disgust
    'Everyone files out': 6,  # neutral
    "It is a woman's breathless sighs and gasps we hear, but under that, perhaps a man's voice, perhaps not": 5,  # surprise
    'We have nothing further to say': 0,  # anger
    '(a take) Program': 6,  # neutral
    'There is a blinding flash as it shatters into a million fragments': 5,  # surprise
    'You need to have a talk with him': 6,  # neutral
    "The only thing that's clear is all the cosmetic surgery he's had makes him look like George Hamilton, circa 1962": 1,  # disgust
    "The maid leaves pretty swiftly, we haven't seen her face, the whole moment seems rather strange": 5,  # surprise
    'Oh, come on': 0,  # anger
    'see, the trick to hiding is understanding your surroundings': 6,  # neutral
    'I got some more info on Pedro': 6,  # neutral
    'Jeffrey watches her go then turns and goes inside his house': 6,  # neutral
    'A week later, Max will die too': 4,  # sadness
    "I've found no evidence in the way of lesions, hemorrhaging, tumors": 6,  # neutral
    'His best stories have been Babar and Winnie the Pooh up to now, with Batman moving up': 3,  # joy
    'And firstly or lastly, depending on the viewer, that the woman on the floor is dressed in a white bridal gown': 5,  # surprise
    "Don't throw that away": 0,  # anger
}

# Load manual annotations
df_manual = pd.read_csv('manual_annotations.csv')

# Convert Claude's emotion numbers to emotion names
emotion_num_to_name = {i: emotions[i] for i in range(len(emotions))}
df_manual['claude_annotation'] = df_manual['sentence_text'].map(
    lambda s: emotion_num_to_name[sentence_emotions[s]]
)

# Calculate agreement
agreement = (df_manual['manual_annotation'] == df_manual['claude_annotation']).sum()
accuracy = agreement / len(df_manual)

print("="*80)
print("COMPARISON: MANUAL vs CLAUDE ANNOTATIONS")
print("="*80)
print(f"\nAgreement: {agreement}/{len(df_manual)} ({accuracy*100:.1f}%)")
print(f"Disagreement: {len(df_manual) - agreement}/{len(df_manual)} ({(1-accuracy)*100:.1f}%)")

# Confusion matrix
print("\nConfusion Matrix:")
print("(Rows = Manual, Columns = Claude)")
cm = confusion_matrix(df_manual['manual_annotation'], df_manual['claude_annotation'], labels=emotions)

print("\n" + " "*15 + "  ".join(f"{e[:3]:>3}" for e in emotions))
for i, emotion in enumerate(emotions):
    print(f"{emotion:>12}:  " + "  ".join(f"{cm[i,j]:>3}" for j in range(len(emotions))))

# Detailed classification report
print("\n" + "="*80)
print("DETAILED METRICS")
print("="*80)
print(classification_report(df_manual['manual_annotation'], df_manual['claude_annotation'], 
                             labels=emotions, zero_division=0))

# Show disagreements
print("\n" + "="*80)
print("DISAGREEMENTS (Manual vs Claude)")
print("="*80)
disagreements = df_manual[df_manual['manual_annotation'] != df_manual['claude_annotation']]

for idx, row in disagreements.iterrows():
    print(f"\nSentence: {row['sentence_text']}")
    print(f"Manual: {row['manual_annotation']} | Claude: {row['claude_annotation']}")

KeyError: 'I’1l get you a meeting with Jeff Megall'