In [None]:
# Step 1: Install the required packages
!pip install booknlp
!pip install transformers==4.30.0
!python -m spacy download en_core_web_sm

# Step 2: Download the text from Gutenberg
!wget https://www.gutenberg.org/files/1342/1342-0.txt -O pride_and_prejudice.txt

# Step 3: Import necessary libraries and set up BookNLP
from booknlp.booknlp import BookNLP
import json
from collections import Counter, defaultdict
import os

# Initialize BookNLP with parameters
model_params = {
    "pipeline": "entity,quote,supersense,event,coref",
    "model": "big",
}
booknlp = BookNLP("en", model_params)

# Step 4: Process the novel with BookNLP
inputFile = "pride_and_prejudice.txt"
outputDir = "pride_and_prejudice/"
idd = "pride_and_prejudice"
booknlp.process(inputFile, outputDir, idd)

# Step 5: Define the function to load processed data and extract character mentions
def load_data(filename):
    with open(filename) as file:
        data = json.load(file)
    return data

# Load the processed data
data = load_data(f"{outputDir}{idd}.book")

# Step 6: Extract character mentions and their line numbers
character_mentions = defaultdict(list)

# Read the text file to get line numbers
with open(inputFile, 'r', encoding='utf-8') as text_file:
    lines = text_file.readlines()

def get_counter_from_dependency_list(dep_list):
    counter=Counter()
    for token in dep_list:
        term=token["w"]
        tokenGlobalIndex=token["i"]
        counter[term]+=1
    return counter

for character in data["characters"]:

    agentList=character["agent"]
    patientList=character["patient"]
    possList=character["poss"]
    modList=character["mod"]

    character_id=character["id"]
    count=character["count"]

    referential_gender_distribution=referential_gender_prediction="unknown"

    if character["g"] is not None and character["g"] != "unknown":
        referential_gender_distribution=character["g"]["inference"]
        referential_gender=character["g"]["argmax"]

    mentions=character["mentions"]
    proper_mentions=mentions["proper"]
    max_proper_mention=""

    # just print out information about named characters
    if len(mentions["proper"]) > 0:
        max_proper_mention=mentions["proper"][0]["n"]

        print(character_id, count, max_proper_mention, referential_gender)

        print()
        printTop=10
        for k, v in get_counter_from_dependency_list(possList).most_common(printTop):
            print("\tposs\t%s %s" % (v,k))
        print()
        for k, v in get_counter_from_dependency_list(agentList).most_common(printTop):
            print("\tagent\t%s %s" % (v,k))
        print()
        for k, v in get_counter_from_dependency_list(patientList).most_common(printTop):
            print("\tpatient\t%s %s" % (v,k))
        print()
        for k, v in get_counter_from_dependency_list(modList).most_common(printTop):
            print("\tmod\t%s %s" % (v,k))
        print()
