# Demo of _narrative graphs_


In [1]:
import kagglehub
import pandas as pd

path = kagglehub.dataset_download("rmisra/news-category-dataset", "News_Category_Dataset_v3.json")
data = pd.read_json(path, lines=True)
data.head()

Unnamed: 0,link,headline,category,short_description,authors,date
0,https://www.huffpost.com/entry/covid-boosters-...,Over 4 Million Americans Roll Up Sleeves For O...,U.S. NEWS,Health experts said it is too early to predict...,"Carla K. Johnson, AP",2022-09-23
1,https://www.huffpost.com/entry/american-airlin...,"American Airlines Flyer Charged, Banned For Li...",U.S. NEWS,He was subdued by passengers and crew when he ...,Mary Papenfuss,2022-09-23
2,https://www.huffpost.com/entry/funniest-tweets...,23 Of The Funniest Tweets About Cats And Dogs ...,COMEDY,"""Until you have a dog you don't understand wha...",Elyse Wanshel,2022-09-23
3,https://www.huffpost.com/entry/funniest-parent...,The Funniest Tweets From Parents This Week (Se...,PARENTING,"""Accidentally put grown-up toothpaste on my to...",Caroline Bologna,2022-09-23
4,https://www.huffpost.com/entry/amy-cooper-lose...,Woman Who Called Cops On Black Bird-Watcher Lo...,U.S. NEWS,Amy Cooper accused investment firm Franklin Te...,Nina Golgowski,2022-09-22


In [2]:
# create a sample
sample = data.sample(5000, random_state=42)
docs = sample["headline"] + "\n\n" + sample["short_description"]
categories = sample["category"]
timestamps = sample["date"]

In [4]:
from narrativegraph import NarrativeGraph

model = NarrativeGraph(
    sqlite_db_path="output/demo_db.sqlite",
    on_existing_db="overwrite"
)
model.fit(docs, categories=categories, timestamps=timestamps)

INFO:narrativegraph.pipeline:Adding 5000 documents to database
INFO:narrativegraph.pipeline:Extracting triplets
Extracting triplets: 100%|██████████| 5000/5000 [00:17<00:00, 284.84it/s]
INFO:narrativegraph.pipeline:Mapping entities and relations
INFO:narrativegraph.pipeline:Mapping triplets
Mapping triplets: 100%|██████████| 10831/10831 [00:04<00:00, 2281.96it/s]
Updating entity info: 100%|██████████| 13682/13682 [00:06<00:00, 2035.79it/s]
Updating predicate info: 100%|██████████| 4976/4976 [00:02<00:00, 2135.86it/s]
Updating relation info: 100%|██████████| 10794/10794 [00:04<00:00, 2273.20it/s]
Updating co-occurrence info: 100%|██████████| 10761/10761 [00:07<00:00, 1488.93it/s]


<narrativegraph.narrativegraph.NarrativeGraph at 0x13cc0dd60>

In [4]:
model.entities_

Unnamed: 0,id,label,frequency,doc_frequency,adjusted_tf_idf,first_occurrence,last_occurrence,alt_labels,category
0,1,new opportunities,1,1,0.0,2014-06-20,2014-06-20,"[""new opportunities""]",[IMPACT]
1,2,our families,4,4,3000.0,2014-06-20,2017-06-13,"[""our families"",""our family""]","[QUEER VOICES, IMPACT, POLITICS]"
2,3,a glorious week,1,1,0.0,2014-02-08,2014-02-08,"[""a glorious week""]",[BUSINESS]
3,4,Tim Armstrong,1,1,0.0,2014-02-08,2014-02-08,"[""Tim Armstrong""]",[BUSINESS]
4,5,One Glimpse,1,1,0.0,2014-05-14,2014-05-14,"[""One Glimpse""]",[GREEN]
...,...,...,...,...,...,...,...,...,...
13677,13678,an incredibly important accessory,1,1,0.0,2012-08-31,2012-08-31,"[""an incredibly important accessory""]",[STYLE & BEAUTY]
13678,13679,Olympic Career,2,1,2500.0,2016-08-14,2016-08-14,"[""Olympic Career""]",[SPORTS]
13679,13680,One More Gold Medal,1,1,0.0,2016-08-14,2016-08-14,"[""One More Gold Medal""]",[SPORTS]
13680,13681,The winningest Olympian,1,1,0.0,2016-08-14,2016-08-14,"[""The winningest Olympian""]",[SPORTS]


In [7]:
from narrativegraph.dto.filter import GraphFilter
for comm in model.find_communities(
    graph_filter=GraphFilter(minimum_node_frequency=2, maximum_node_frequency=10)
):
    if len(comm) > 1:
        print(comm)
        print()

INFO:narrativegraph.service:Running community detection on 4346 entities and 2692 edges


[EntityLabel(id=33, label='the law'), EntityLabel(id=34, label='my major concern'), EntityLabel(id=10851, label='conflicting approaches'), EntityLabel(id=6420, label='Congressional Republicans'), EntityLabel(id=6421, label='the trappings'), EntityLabel(id=6422, label='a badly written soap opera')]

[EntityLabel(id=96, label='distracted driving'), EntityLabel(id=97, label='cell phones'), EntityLabel(id=98, label='18 percent')]

[EntityLabel(id=105, label='big costume budgets'), EntityLabel(id=106, label='big Halloween parties')]

[EntityLabel(id=7040, label='Rolling Stone'), EntityLabel(id=580, label='Serena Williams'), EntityLabel(id=846, label='Ryan Lochte'), EntityLabel(id=8719, label="the country's rigid stance"), EntityLabel(id=847, label='Olympian Gifted'), EntityLabel(id=848, label='Gillette Gold-Plated'), EntityLabel(id=2389, label='The Pelletier Case'), EntityLabel(id=9431, label='Mario Bergoglio'), EntityLabel(id=9432, label='Pope Francis'), EntityLabel(id=9177, label='Lawsuit

In [None]:
# create server to be viewed in own browser which blocks execution of other cells
model.serve_visualizer()

INFO:     Started server process [82788]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


INFO:     127.0.0.1:55866 - "GET / HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:55866 - "GET /vis HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:55869 - "GET /graph/bounds HTTP/1.1" 200 OK
INFO:     127.0.0.1:55869 - "OPTIONS /graph HTTP/1.1" 200 OK
INFO:     127.0.0.1:55869 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:55869 - "OPTIONS /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:55869 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:55872 - "GET /vis HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:55872 - "GET /graph/bounds HTTP/1.1" 200 OK
INFO:     127.0.0.1:55872 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:55872 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:55874 - "GET /entities/79 HTTP/1.1" 200 OK
INFO:     127.0.0.1:55874 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:55874 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:55874 - "GET /entities/79/docs HTTP/1.1" 200 OK
INFO:    

In [8]:
# create server to be viewed in this notebook which therefore runs in the background
server = model.serve_visualizer(block=False)

INFO:root:Server started in background on port 8001
INFO:     Started server process [37625]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


INFO:     127.0.0.1:52795 - "GET / HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:52795 - "GET /vis HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:52795 - "GET /vis/ HTTP/1.1" 304 Not Modified
INFO:     127.0.0.1:52796 - "GET /vis/static/css/main.66685047.css HTTP/1.1" 200 OK
INFO:     127.0.0.1:52795 - "GET /vis/static/js/main.7e003561.js HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "GET /graph/bounds HTTP/1.1" 200 OK
INFO:     127.0.0.1:52795 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:52803 - "OPTIONS /graph HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:52803 - "OPTIONS /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "GET /entities/1919 HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "GET /entities/1919/docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:53615 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:53615 - "P

In [7]:
server.show_iframe()

In [8]:
server.stop()

INFO:root:Background server stopped
