### NOTE TO SELF

Search for main characters throughout the whole book first, then do a reference reassignment to top 50-70 most common characters

In [None]:
import os

book = 'worm'

ner_coref_data_dir = os.path.join('output', book)
characters_data_dir = os.path.join('temp_files', book)

In [14]:
blob = TextBlob("Lisa answered my question, ignoring her.")
blob.sentiment

Sentiment(polarity=0.0, subjectivity=0.0)

In [19]:
import json
import os

import numpy as np
import pandas as pd
from textblob import TextBlob 
from tqdm import tqdm

bookname = 'worm'

ner_coref_data_dir = os.path.join('output', bookname)
characters_data_dir = os.path.join('temp_files', bookname)


def collate_relations(
    ner_coref_data_dir: str,
    characters_data_dir: str
) -> None:
    main_characters_aliases_file_path = os.path.join(characters_data_dir, 'main_characters_aliases.json')

    if not os.path.exists(main_characters_aliases_file_path):
        raise FileNotFoundError('Missing main_characters_aliases.json in given directory!')

    with open(main_characters_aliases_file_path, 'r') as file: 
        main_char_list = json.load(file)

    num_chars = len(main_char_list)
    num_chapters = len(os.listdir(ner_coref_data_dir))

    relations_arr = np.zeros((num_chars, num_chars, 2, num_chapters))
    relations_arr_avg = np.zeros((num_chars, num_chars))
    interactions_arr = np.zeros((num_chars, num_chars))

    for chapter_num, chapter in tqdm(enumerate(os.listdir(ner_coref_data_dir)), total=num_chapters):
        relevant_sentences_file_path = os.path.join(ner_coref_data_dir, chapter, 'relevant_sentences.csv')
        df = pd.read_csv(relevant_sentences_file_path)

        df['Sentiment'] = 0.0
        
        for idx, row in df.iterrows():
            char_list = set(json.loads(row['characters']) + (json.loads(row['speaker']) if type(row['speaker']) is str else []))
            if len(char_list) < 2:
                continue

            b = TextBlob(row['words'])
            sentiment = b.sentiment[0]

            df.loc[idx, 'Sentiment'] = sentiment

            for char1 in char_list:
                for char2 in char_list:
                    if char1 == char2:
                        continue

                    relations_arr[char1][char2][0][chapter_num] += sentiment
                    relations_arr[char1][char2][1][chapter_num] += 1

        df.to_csv(relevant_sentences_file_path, index=False)

    character_relations_file_path = os.path.join(characters_data_dir, 'character-relations.npy')
    np.save(character_relations_file_path, relations_arr)

    print("Completed Sentiment Analysis! Computing averages...")

    for char1 in range(relations_arr_avg.shape[0]):
        for char2 in range(relations_arr_avg.shape[1]):
            if char1 == char2: 
                continue

            count = np.sum(relations_arr[char1][char2][1])

            if not count: 
                continue

            relations_arr_avg[char1][char2] = np.sum(relations_arr[char1][char2][0])/count
            interactions_arr[char1][char2] = count

    character_relations_avg_file_path = os.path.join(characters_data_dir, 'character-relations_avg.npy')
    np.save(character_relations_avg_file_path, relations_arr_avg)

    interactions_file_path = os.path.join(characters_data_dir, 'interactions.npy')
    np.save(interactions_file_path, interactions_arr)

    print("Completed!")


collate_relations(ner_coref_data_dir, characters_data_dir)

  0%|          | 0/304 [00:00<?, ?it/s]

100%|██████████| 304/304 [00:19<00:00, 15.69it/s]


Completed Sentiment Analysis! Computing averages...
Completed!


In [None]:
relevant_sentences_file_path = os.path.join(ner_coref_data_dir, 'Part-304-Interlude__End', 'relevant_sentences.csv')

df = pd.read_csv(relevant_sentences_file_path)
row = df.iloc[4]
print(len(json.loads(row['characters'])))
print(row['words'])

1
“I’ve killed before.


In [20]:
interactions_file_path = os.path.join(characters_data_dir, 'interactions.npy')

interactions_arr = np.load(interactions_file_path)

In [24]:
main_characters_aliases_file_path = os.path.join(characters_data_dir, 'main_characters_aliases.json')

with open(main_characters_aliases_file_path, 'r') as file: 
    main_char_list = json.load(file)

for i in range(len(main_char_list)):
    print(i, main_char_list[i], interactions_arr[16][i])


0 ['NARRATOR', 'Taylor', 'Taylor Hebert', 'Ms. Hebert', 'Skitter', 'Weaver'] 358.0
1 ['Tattletale', 'Lisa'] 94.0
2 ['Grue', 'Brian'] 20.0
3 ['Bitch', 'Rachel', 'Rachel Lindt'] 9.0
4 ['Krouse', 'Francis', 'Trickster'] 5.0
5 ['Coil', 'Thomas Calvert', 'Thomas', 'Calvert', 'Director Calvert', 'Commander Calvert'] 1.0
6 ['Lung', 'Kenta'] 19.0
7 ['Noelle', 'Echidna'] 0.0
8 ['Imp', 'Aisha'] 5.0
9 ['Regent', 'Alec'] 1.0
10 ['Jack', 'Jack Slash'] 45.0
11 ['Miss Militia', 'Hannah', 'Hana'] 3.0
12 ['Scion', 'the Warrior', 'The golden man', 'the golden man'] 2.0
13 ['Chevalier'] 1.0
14 ['Bonesaw', 'Riley'] 85.0
15 ['Defiant', 'Armsmaster', 'Collin', 'Colin'] 5.0
16 ['Amy', 'Amy Dallon', 'Panacea', 'Amelia', 'Ames'] 0.0
17 ['Golem', 'Theo', 'Theodore Anders', 'Theodore'] 0.0
18 ['Weld'] 0.0
19 ['Shadow Stalker', 'Sophia'] 3.0
20 ['Dinah', 'Dinah Alcott'] 0.0
21 ['my dad', 'Dad', 'dad', 'Danny', 'Mr. Hebert'] 0.0
22 ['Emma', 'Emma Barnes'] 0.0
23 ['Leviathan'] 1.0
24 ['Eidolon'] 0.0
25 ['Dragon'] 4