In [None]:
import json
import os

from afinn import Afinn
import numpy as np
import pandas as pd
from tqdm import tqdm

book = 'worm'

ner_coref_data_dir = os.path.join('booknlp_output', book)
characters_data_dir = os.path.join('characters', book)


def analyse_semantics(
    ner_coref_data_dir: str,
    characters_data_dir: str
) -> None:
    main_characters_aliases_file_path = os.path.join(characters_data_dir, 'main_characters_aliases.json')

    if not os.path.exists(main_characters_aliases_file_path):
        raise FileNotFoundError('Missing main_characters_aliases.json in given directory!')

    with open(main_characters_aliases_file_path, 'r') as file: 
        main_char_list = json.load(file)

    num_chars = len(main_char_list)
    num_chapters = len(os.listdir(ner_coref_data_dir))

    relations_arr = np.zeros((num_chars, num_chars, 2, num_chapters))

    afinn = Afinn()

    for chapter in tqdm(os.listdir(ner_coref_data_dir)):
        chapter_num = int(chapter.split('-')[1]) - 1
        relevant_sentences_file_path = os.path.join(ner_coref_data_dir, chapter, 'relevant_sentences.csv')
        df = pd.read_csv(relevant_sentences_file_path)

        df['Sentiment'] = 0.0

        for idx, row in df.iterrows():
            sentiment = afinn.score(row['words'])
            df.loc[idx, 'Sentiment'] = sentiment         
        
        for idx, row in df.iterrows():
            char_list = set(json.loads(row['characters']) + (json.loads(row['speaker']) if type(row['speaker']) is str else []))
            if len(char_list) < 2:
                continue
        
            for char1 in char_list:
                for char2 in char_list:
                    if char1 == char2:
                        continue

                    relations_arr[char1][char2][0][chapter_num] += df.loc[idx, 'Sentiment']
                    relations_arr[char1][char2][1][chapter_num] += 1

        df.to_csv(relevant_sentences_file_path, index=False)

    character_relations_file_path = os.path.join(characters_data_dir, 'character-relations.npy')
    np.save(character_relations_file_path, relations_arr)

    print("Completed Sentiment Analysis!")


analyse_semantics(ner_coref_data_dir, characters_data_dir)

  0%|          | 0/304 [00:00<?, ?it/s]

100%|██████████| 304/304 [03:00<00:00,  1.68it/s]

Completed Sentiment Analysis!





In [None]:
def collate_relations(
    characters_data_dir: str
) -> None:
    main_characters_aliases_file_path = os.path.join(characters_data_dir, 'main_characters_aliases.json')

    if not os.path.exists(main_characters_aliases_file_path):
        raise FileNotFoundError('Missing main_characters_aliases.json in given directory!')

    with open(main_characters_aliases_file_path, 'r') as file: 
        main_char_list = json.load(file)

    arr_fp = os.path.join(characters_data_dir, 'character-relations.npy')

    if not os.path.exists(arr_fp):
        raise FileNotFoundError('Missing character-relations.npy in given directory!')

    arr = np.load(arr_fp)

    store = [[[0, 0] for _ in range(len(main_char_list))]  for _ in range(len(main_char_list))]

    char_avgs = np.sum(arr, axis=(1, 3))

    for i in range(len(main_char_list)):
        for j in range(len(main_char_list)):
            if i==j: 
                store[i][j] = None
                continue

            opposing_avg = char_avgs[j][0]/char_avgs[j][1]
            interaction_count = int(np.sum(arr[i][j][1]))

            store[i][j][0] = np.sum(arr[i][j][0]) / interaction_count - opposing_avg if interaction_count else 0
            store[i][j][1] = interaction_count

    interactions_fp = os.path.join(characters_data_dir, 'interactions.json')

    with open(interactions_fp, 'w') as file:
        json.dump(store, file, indent=4)

collate_relations(characters_data_dir)

In [None]:
main_characters_aliases_file_path = os.path.join(characters_data_dir, 'main_characters_aliases.json')
with open(main_characters_aliases_file_path, 'r') as file: 
    main_char_list = json.load(file)

arr_fp = os.path.join(characters_data_dir, 'character-relations.npy')
arr = np.load(arr_fp)

store = [[[0, 0] for _ in range(len(main_char_list))]  for _ in range(len(main_char_list))]

char_avgs = np.sum(arr, axis=(1, 3))

In [None]:
focus_char = 5

focus_char_avg = char_avgs[focus_char][0]/char_avgs[focus_char][1]

for i in range(len(main_char_list)):
    opposing_avg = char_avgs[i][0]/char_avgs[i][1]

    print(i, main_char_list[i], np.sum(arr[focus_char][i][0]) / np.sum(arr[focus_char][i][1]) - opposing_avg)


0 ['NARRATOR', 'Taylor', 'Taylor Hebert', 'Ms. Hebert', 'Skitter', 'Weaver'] 0.21136947844256185
1 ['Tattletale', 'Lisa'] 0.5996662524861434
2 ['Grue', 'Brian'] 0.3829009336528134
3 ['Bitch', 'Rachel', 'Rachel Lindt'] 0.8238651807556403
4 ['Krouse', 'Francis', 'Trickster'] 0.37036404398419986
5 ['Coil', 'Thomas Calvert', 'Thomas', 'Calvert', 'Director Calvert', 'Commander Calvert'] nan
6 ['Lung', 'Kenta'] 0.4861294583883752
7 ['Noelle', 'Echidna'] 0.6453364147734706
8 ['Imp', 'Aisha'] 0.48904302755440376
9 ['Regent', 'Alec'] 1.1658628841607566
10 ['Jack', 'Jack Slash'] -2.9745476847592762
11 ['Miss Militia', 'Hannah', 'Hana'] 0.19495512764659761
12 ['Scion', 'the Warrior', 'The golden man', 'the golden man'] nan
13 ['Chevalier'] nan
14 ['Bonesaw', 'Riley'] -0.8950953678474114
15 ['Defiant', 'Armsmaster', 'Collin', 'Colin'] -1.4016337644656227
16 ['Amy', 'Amy Dallon', 'Panacea', 'Amelia', 'Ames'] 0.20331186752529898
17 ['Golem', 'Theo', 'Theodore Anders', 'Theodore'] nan
18 ['Weld'] -0.

  print(i, main_char_list[i], np.sum(arr[focus_char][i][0]) / np.sum(arr[focus_char][i][1]) - opposing_avg)
