<a href="https://colab.research.google.com/github/Agnieszkachr/DH-AI/blob/main/Create_table_sentiments_REV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ------------------ STEP 1: Install and Import Libraries ------------------
import json
import numpy as np
import pandas as pd

# ------------------ STEP 2: Load JSON File ------------------
# Upload your JSON file manually in Colab using the file upload UI.
from google.colab import files
uploaded = files.upload()

filename = next(iter(uploaded))  # Assumes a single file is uploaded

with open(filename, 'r', encoding='utf-8') as f:
    data = json.load(f)

# ------------------ STEP 3: Define Emotion Mapping Parameters ------------------
# Define the 8 emotions (excluding 'negative' and 'positive')
emotion_labels = [
    "anger", "anticipation", "disgust", "fear",
    "joy", "sadness", "surprise", "trust"
]
# Indices: anger(0), anticipation(1), disgust(2), fear(3), joy(4),
# sadness(7), surprise(8), trust(9); skipping negative(5), positive(6)
emotion_indices = [0, 1, 2, 3, 4, 7, 8, 9]

# ------------------ STEP 4: Aggregate and Normalize Emotions ------------------
results = {}
unit_titles = {} # Dictionary to store unit titles

for unit_key, unit_data in data.items():
    emotion_sums = np.zeros(len(emotion_indices))
    total_lemma_count = 0

    # Extract the title
    unit_titles[unit_key] = unit_data.get("title", "") # Get title, default to empty string if not found

    for verse in unit_data["content"].values():
        for word in verse.values():
            # Apply precedence: ealph > ealph_en > ealph_ai
            ealph = word.get("ealph") or word.get("ealph_en") or word.get("ealph_ai")
            if ealph and len(ealph) == 10:
                vec = np.array([int(ealph[i]) for i in emotion_indices])
                if vec.sum() > 0:
                    emotion_sums += vec
                    total_lemma_count += 1

    # Normalize per methodology
    if total_lemma_count > 0:
        norm_vector = emotion_sums / total_lemma_count
        norm_vector /= norm_vector.sum()
        results[unit_key] = norm_vector
    else:
        results[unit_key] = np.zeros(len(emotion_indices))  # Zero vector for annotation/logging

# ------------------ STEP 5: Output Table ------------------
df = pd.DataFrame(results, index=emotion_labels).T
df = (df * 100).round(2)  # Convert to percentages, round for readability
df.reset_index(inplace=True)
df.rename(columns={"index": "unit"}, inplace=True)

# Add the 'title' column
df['title'] = df['unit'].map(unit_titles)

# Reorder columns to have 'unit' and 'title' first
cols = ['unit', 'title'] + [col for col in df.columns if col not in ['unit', 'title']]
df = df[cols]


# Show full table
pd.set_option('display.max_rows', None)
print(df)

# Save to CSV
df.to_csv('emotion_results.csv', index=False)

Saving RE_units_19_nested_cleaned (1).json to RE_units_19_nested_cleaned (1).json
        unit                                              title  anger  \
0   unit_001                          Prologue and Introduction   4.81   
1   unit_002                           Vision of the Son of Man   8.82   
2   unit_003                      Letters to the Seven Churches   2.75   
3   unit_004                                   Throne in Heaven   3.23   
4   unit_005                            The Scroll and the Lamb   6.19   
5   unit_006                                    The Seven Seals   1.61   
6   unit_007                The 144,000 and the Great Multitude   8.40   
7   unit_008            The Seventh Seal and the Seven Trumpets   6.50   
8   unit_009  Interlude: The Mighty Angel and the Little Scroll   7.89   
9   unit_010               The Two Witnesses and the Earthquake   7.56   
10  unit_011           The Seventh Trumpet and Heavenly Worship   9.14   
11  unit_012                  