In [1]:
import pandas as pd
import json

# Load paths to your input files
csv_path = "test_labels_subset.csv"  # replace with your actual path
json_path = "revised_final_annotations.json"  # replace with your actual path
output_json_path = "updated_annotations.json"  # desired output file name

In [2]:
# Read the CSV and show the first 5 rows
csv_df = pd.read_csv(csv_path)
print("CSV file preview:")
print(csv_df.head())

CSV file preview:
   Dialogue_ID  Utterance_ID Speaker  \
0          240             2    Dina   
1           59             2  Rachel   
2           12            11  Monica   
3          119             0  Monica   
4           46             3  Leslie   

                                           Utterance  Emotion  
0  What are you going to do? Kill him? Like you d...    anger  
1      Whoa, I'm sorry. Excuse me. We had this cart.    anger  
2                         Love is the best medicine.      joy  
3  Okay, I got that. Ill escape over there. Ill...  neutral  
4                        Dont care what people say.      joy  


In [3]:
# Read the JSON and show the first 2 items
with open(json_path, 'r') as f:
    json_data = json.load(f)

print("\nJSON file preview:")
for item in json_data[:2]:
    print(json.dumps(item, indent=2))


JSON file preview:
{
  "video_id": "dia233_utt4.mp4",
  "peak_time": 0.13333333333333333,
  "visual_expression_description": [
    "slightly Cheek Raiser",
    "moderately Lid Tightener",
    "moderately Upper Lip Raiser",
    "slightly Lip Corner Puller",
    "barely Chin Raiser",
    "moderately Lips Part",
    "moderately Jaw Drop"
  ],
  "visual_objective_description": "In this video frame, two women are engaged in a conversation inside a cozy, residential kitchen. The environment has a warm and inviting feel, marked by the presence of a classic refrigerator adorned with colorful magnets and drawings, suggesting a lived-in atmosphere.\n\nOne woman, positioned slightly to the right, has long, straight hair that falls past her shoulders. She wears a distinctive, colorful sweater featuring a patchwork of bright squares, including pink, yellow, and black. Her expression is animated; her mouth is open as she speaks passionately, and her eyebrows are raised, conveying intensity. Her pos

In [9]:
# Step 1: Load the CSV file
csv_df = pd.read_csv(csv_path)

# Step 2: Create a new column 'video_id' to match the JSON format
csv_df["video_id"] = csv_df.apply(
    lambda row: f"dia{int(row['Dialogue_ID'])}_utt{int(row['Utterance_ID'])}.mp4", axis=1
)

# Step 3: Create a dictionary for fast lookup: video_id -> {Utterance, Emotion}
utterance_lookup = {
    row["video_id"]: {"transcript": row["Utterance"], "true_label": row["Emotion"]}
    for _, row in csv_df.iterrows()
}

# Step 4: Load the JSON file
with open(json_path, 'r') as f:
    json_data = json.load(f)

# Step 5: Append 'Utterance' and 'Emotion' to each JSON item if video_id matches
for item in json_data:
    video_id = item.get("video_id")
    if video_id in utterance_lookup:
        item.update(utterance_lookup[video_id])


In [8]:
print(json_data[0])

{'video_id': 'dia233_utt4.mp4', 'peak_time': 0.13333333333333333, 'visual_expression_description': ['slightly Cheek Raiser', 'moderately Lid Tightener', 'moderately Upper Lip Raiser', 'slightly Lip Corner Puller', 'barely Chin Raiser', 'moderately Lips Part', 'moderately Jaw Drop'], 'visual_objective_description': 'In this video frame, two women are engaged in a conversation inside a cozy, residential kitchen. The environment has a warm and inviting feel, marked by the presence of a classic refrigerator adorned with colorful magnets and drawings, suggesting a lived-in atmosphere.\n\nOne woman, positioned slightly to the right, has long, straight hair that falls past her shoulders. She wears a distinctive, colorful sweater featuring a patchwork of bright squares, including pink, yellow, and black. Her expression is animated; her mouth is open as she speaks passionately, and her eyebrows are raised, conveying intensity. Her posture is upright, with her head slightly tilted forward, indic

In [10]:
# Step 6: Save the updated JSON to a new file
with open(output_json_path, 'w') as f:
    json.dump(json_data, f, indent=4)

print(f"Updated JSON saved to: {output_json_path}")

Updated JSON saved to: updated_annotations.json


In [12]:
import json

# File paths
input_path = "updated_annotations.json"
output_path = "sorted_annotations.json"

# Desired key order (with some renaming)
desired_order = [
    "video_id",                    # true_label
    "transcript",                  # transcript
    "audio_description",
    "visual_expression_description",
    "visual_objective_description",
    "raw_AU_values_at_peak",
    "peak_time",
    "coarse-grained_summary",
    "fine-grained_summary", 
    "true_label"
]

# Load JSON
with open(input_path, 'r') as f:
    data = json.load(f)

# Reorder each entry
sorted_data = []
for item in data:
    sorted_item = {key: item.get(key) for key in desired_order if key in item}
    # Add any remaining fields at the end (preserve them if present)
    extras = {k: v for k, v in item.items() if k not in sorted_item}
    sorted_item.update(extras)
    sorted_data.append(sorted_item)

# Save reordered JSON
with open(output_path, 'w') as f:
    json.dump(sorted_data, f, indent=4)

print(f"Reordered JSON saved to: {output_path}")


Reordered JSON saved to: sorted_annotations.json
