# Micro-narrative analysis of _The Lord of the Rings_

In [1]:
import glob
import os.path

docs = []
for file in glob.glob("input/*.txt"):
    with open(file) as f:
        text = f.read()
        sections = text.split("\n")
        for section in sections:
            section = section.strip()
            if section:
                docs.append(section)

In [2]:
from narrativegraph import NarrativeGraph
from narrativegraph.nlp.extraction.spacy import NaiveSpacyTripletExtractor

if os.path.exists("lotr.sqlite"):
    model = NarrativeGraph.load("lotr.sqlite")
else:
    model = NarrativeGraph(
        triplet_extractor=NaiveSpacyTripletExtractor(
            named_entities=(1, None),
            noun_chunks=(2, None),
        ),
        sqlite_db_path="lotr.sqlite"
    ).fit(docs)

In [4]:
from narrativegraph import GraphFilter

comms = model.graph.find_communities(
    graph_filter=GraphFilter(
        minimum_edge_frequency=1,
        minimum_node_frequency=2,
        maximum_node_frequency=50
    ),
    min_weight=2,
    community_detection_method="connected_components"
)
for comm in sorted(comms, key=lambda c: c.score, reverse=True):
    if len(comm.members) < 2: continue
    print(*[e.label for e in comm.members], sep=", ")
    print()

the elf-tongue, noro lim

the northerly wind, some protection

a glad water, a bright morning

my count, the last Orc

your fair kindred, keen eyes

low well-laid hedges, a huge turnip-field

all the country, the Shadowy Mountains

their full name, as long as years

lembas_, a few mouthfuls

the dull stump, their own feet

the nearest point, a vast crescent

the wretched man, several minutes, his long slow way

the dwellers, the giant shepherds, the deep forests

O Orofarnë, Carnimírië, Lassemista

the wide hurrying waters, their long way

That accident, my news

A new road, a secret gate

all her life, the bitter watches

the marsh-reeks, sullen morning

the first stir, the high stars

the last march, our doom

the dead city, the swiftest way

the great stone slab, six feet

a run, a wry smile

dark rumour, the actual dread

your muddy cloaks, your weary faces

a tumbled waste, grey limestone-boulders

an ancient stone basin, the broken ruins, a shallow dell

the beleaguered city grea

In [5]:
from narrativegraph import GraphFilter

comms = model.graph.find_communities(
    graph_filter=GraphFilter(minimum_edge_frequency=1, minimum_node_frequency=2, maximum_node_frequency=50),
    min_weight=.5,
    community_detection_method_args=dict(k=3)
)
for comm in sorted(comms, key=lambda c: c.score, reverse=True):
    if len(comm.members) < 2: continue
    print(*[e.label for e in comm.members], sep=", ")
    print()

my precious, Fissh, nice fissh

the wretched man, several minutes, his long slow way

the dwellers, the giant shepherds, the deep forests

O Orofarnë, Carnimírië, Lassemista

an ancient stone basin, the broken ruins, a shallow dell

a loose veil, his reach, the upper portion

the dark greasy surfaces, livid weed, the scum

the bare branches, the dark pines, The East Wind

its folded limbs, the upper exit, her lair, a horrible speed

a tempest, dark corn, every ear, a great field

only a dun, the covert, shadowless world

the Hobbiton Road, the ugly new houses, Pool Side

their traffic, their use, their domain, West-door

summer night, the beech, some northern glade

unfriendly places, all save birds, all the races

'Saruman, no dint, your hats

their tales, their finery, their songs, a great dash

full use, the production, the material, an accessory volume

great jagged pinnacles, weathered rocks, some nook

Sancho Proudfoot, the cellars, a tussle

his large mouth, dark scornful eyes, 

In [44]:
from narrativegraph import GraphFilter

comms = model.graph.find_communities(
    graph_filter=GraphFilter(minimum_edge_frequency=2, minimum_node_frequency=2,
                             maximum_node_frequency=100),
    min_weight=1.5,
    community_detection_method="louvain",
    community_detection_method_args=dict(resolution=5)
)
for comm in sorted(comms, key=lambda c: c.score, reverse=True):
    if len(comm.members) < 2: continue
    print(*[e.label for e in comm.members], sep=", ")
    print()

the vile creature, this chance

Círdan, Shipwright

our house, the eldest son

the great eagle, Gwaihir, Windlord

their branches, their leaves

great bundles, Elbereth

both torches, The spray

a clearing, tree-trunks

a ship, a glass

Samwise, Hamfast

Slinker, Stinker

great praise, Beorn

moon, The cow

the carcase, fell beasts

Old Forest, Fredegar

Nine Riders, Messenger

white bread, yellow cream

the Mayor, Michel Delving

the wilderness, a beggar

the spoon, A dish

Herblore, his _

Landroval, Meneldor

The dogs, a walk

the bottle, Ho

a warrior, the twilight

no coat, a tunic

the monster, no heed

the link, Isengard and Mordor

the preparations, S.R.

Noman, the Dead Marshes

his armies, his slaves

her people, Evenstar

O Orofarnë, Carnimírië, Lassemista

the wolves, The howling

Ghân, other púkel-shapes

Fair, Hirluin

Elrohir, Elladan

the Ettenmoors, Hoarwell

Min-Rimmon, Erelas

long arms, Nan Curunír

the turf, small flowers

intaken breath, a soft hiss

high seats, t

In [None]:
coocs = model.co_occurrences_
coocs[coocs.frequency > 1]

Below, we show in a plot that low-frequency entity pairs tend to have a higher PMI.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Create frequency bins
max_freq = max(coocs['entity_one_frequency'].max(), coocs['entity_two_frequency'].max())
freq_bins = [0] + [2 ** i for i in range(0, int(np.log2(max_freq)) + 1)]

df = pd.DataFrame()

# Bin the data
df['x_bin'] = pd.cut(coocs.entity_one_frequency, bins=freq_bins)
df['y_bin'] = pd.cut(coocs.entity_two_frequency, bins=freq_bins)
df['pmi'] = coocs.pmi

# Calculate mean PMI for each cell
heatmap_data = df.groupby(['x_bin', 'y_bin'], observed=True)['pmi'].mean().unstack()


# Create labels from bin edges
def format_bin_label(val):
    if val < 1000:
        return str(int(val))
    else:
        return f"{int(val / 1000)}k"


bin_labels = [format_bin_label(b) for b in freq_bins[1:]]  # skip 0

# Plot
fig, ax = plt.subplots(figsize=(10, 8))
im = ax.imshow(heatmap_data, cmap='grey', aspect='auto', origin='lower')

# Labels
ax.set_xlabel('Frequency of entity 1', fontsize=20)
ax.set_ylabel('Frequency of entity 2', fontsize=20)
ax.set_xticks(range(len(heatmap_data.columns)))
ax.set_yticks(range(len(heatmap_data.index)))
ax.set_xticklabels(bin_labels, rotation=45, ha='right')
ax.set_yticklabels(bin_labels)

# Colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Mean PMI', rotation=270, labelpad=20, fontsize=20)

plt.tight_layout()
plt.show()

In [None]:
model.find_communities()

In [None]:
model.entities_

In [None]:
# create server to be viewed in own browser which blocks execution of other cells
model.serve_visualizer()

INFO:     Started server process [26649]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


INFO:     127.0.0.1:60573 - "GET / HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:60573 - "GET /vis HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:60573 - "GET /vis/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:60576 - "GET /graph/bounds HTTP/1.1" 200 OK
INFO:     127.0.0.1:60576 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:60576 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:60576 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:60576 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:60576 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:60576 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:60576 - "POST /graph/communities HTTP/1.1" 200 OK
INFO:     127.0.0.1:60593 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:60593 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:60593 - "POST /graph/communities HTTP/1.1" 200 OK
INFO:     127.0.0.1:60598 - "POST /graph HTTP/1.1" 307 Temporary Redi