# Demo of _narrative graphs_


In [1]:
import kagglehub
import pandas as pd

path = kagglehub.dataset_download(
    "rmisra/news-category-dataset", "News_Category_Dataset_v3.json"
)
data = pd.read_json(path, lines=True)
data.head()

Unnamed: 0,link,headline,category,short_description,authors,date
0,https://www.huffpost.com/entry/covid-boosters-...,Over 4 Million Americans Roll Up Sleeves For O...,U.S. NEWS,Health experts said it is too early to predict...,"Carla K. Johnson, AP",2022-09-23
1,https://www.huffpost.com/entry/american-airlin...,"American Airlines Flyer Charged, Banned For Li...",U.S. NEWS,He was subdued by passengers and crew when he ...,Mary Papenfuss,2022-09-23
2,https://www.huffpost.com/entry/funniest-tweets...,23 Of The Funniest Tweets About Cats And Dogs ...,COMEDY,"""Until you have a dog you don't understand wha...",Elyse Wanshel,2022-09-23
3,https://www.huffpost.com/entry/funniest-parent...,The Funniest Tweets From Parents This Week (Se...,PARENTING,"""Accidentally put grown-up toothpaste on my to...",Caroline Bologna,2022-09-23
4,https://www.huffpost.com/entry/amy-cooper-lose...,Woman Who Called Cops On Black Bird-Watcher Lo...,U.S. NEWS,Amy Cooper accused investment firm Franklin Te...,Nina Golgowski,2022-09-22


In [2]:
data["category"].unique()

array(['U.S. NEWS', 'COMEDY', 'PARENTING', 'WORLD NEWS', 'CULTURE & ARTS',
       'TECH', 'SPORTS', 'ENTERTAINMENT', 'POLITICS', 'WEIRD NEWS',
       'ENVIRONMENT', 'EDUCATION', 'CRIME', 'SCIENCE', 'WELLNESS',
       'BUSINESS', 'STYLE & BEAUTY', 'FOOD & DRINK', 'MEDIA',
       'QUEER VOICES', 'HOME & LIVING', 'WOMEN', 'BLACK VOICES', 'TRAVEL',
       'MONEY', 'RELIGION', 'LATINO VOICES', 'IMPACT', 'WEDDINGS',
       'COLLEGE', 'PARENTS', 'ARTS & CULTURE', 'STYLE', 'GREEN', 'TASTE',
       'HEALTHY LIVING', 'THE WORLDPOST', 'GOOD NEWS', 'WORLDPOST',
       'FIFTY', 'ARTS', 'DIVORCE'], dtype=object)

In [5]:
# create a sample
sample = data[data["category"].isin(["U.S. NEWS", "POLITICS"])].sample(
    5000, random_state=42
)
docs = sample["headline"] + "\n\n" + sample["short_description"]
categories = sample["category"]
timestamps = sample["date"]

In [6]:
from narrativegraph import NarrativeGraph

model = NarrativeGraph(
    sqlite_db_path="output/demo_db.sqlite", on_existing_db="overwrite"
)
model.fit(docs, categories=categories, timestamps=timestamps)

INFO:narrativegraph.pipeline:Adding 5000 documents to database
INFO:narrativegraph.pipeline:Extracting triplets
Extracting triplets: 100%|██████████| 5000/5000 [00:15<00:00, 323.52it/s]
INFO:narrativegraph.pipeline:Mapping entities and predicates
INFO:narrativegraph.pipeline:Mapping triplets


<narrativegraph.narrativegraph.NarrativeGraph at 0x11ef0c0e0>

In [8]:
model.entities_

Unnamed: 0,id,subject,predicate,object,frequency,doc_frequency,adjusted_tf_idf,first_occurrence,last_occurrence,subject_entity_id,predicate_id,object_entity_id,alt_pred_labels,category
0,1,the early hours of Election Day,", before dawn reaches",the nation's capital,1,1,0.0,2014-11-02,2014-11-02,6909,3492,2404,"["", before dawn reaches""]",[POLITICS]
1,2,the nation's capital,", Justice",Anthony Kennedy,1,1,0.0,2014-11-02,2014-11-02,2404,3992,1077,"["", Justice""]",[POLITICS]
2,3,the ghost,of,Election Day Past,1,1,0.0,2014-11-02,2014-11-02,4264,719,4699,"[""of""]",[POLITICS]
3,4,Deportation Agents,Are Using,An App,1,1,0.0,2022-03-11,2022-03-11,5598,4652,1678,"[""Are Using""]",[POLITICS]
4,5,The Staggering Economic And Human Costs,of,Nuclear Weapons,1,1,0.0,2017-12-11,2017-12-11,4678,719,3396,"[""Of""]",[POLITICS]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10914,10915,Donald Trump,points to,Russian-aligned Wikileaks,1,1,0.0,2017-01-04,2017-01-04,8464,4219,570,"[""points to""]",[POLITICS]
10915,10916,Russian-aligned Wikileaks,to counter,the U.S. intelligence community’s analysis,1,1,0.0,2017-01-04,2017-01-04,570,2876,11102,"[""to counter""]",[POLITICS]
10916,10917,James M. Dorsey,US president,Donald J. Trump’s,1,1,0.0,2017-12-16,2017-12-16,2721,1974,12204,"[""US president""]",[POLITICS]
10917,10918,Special Force,to deport,Undocumented Immigrants,1,1,0.0,2016-01-10,2016-01-10,1061,2751,45,"[""To Deport""]",[POLITICS]


In [None]:
# create server to be viewed in own browser which blocks execution of other cells
model.serve_visualizer()

INFO:     Started server process [76342]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


In [8]:
# create server to be viewed in this notebook which therefore runs in the background
server = model.serve_visualizer(block=False)

INFO:root:Server started in background on port 8001
INFO:     Started server process [37625]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


INFO:     127.0.0.1:52795 - "GET / HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:52795 - "GET /vis HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:52795 - "GET /vis/ HTTP/1.1" 304 Not Modified
INFO:     127.0.0.1:52796 - "GET /vis/static/css/main.66685047.css HTTP/1.1" 200 OK
INFO:     127.0.0.1:52795 - "GET /vis/static/js/main.7e003561.js HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "GET /graph/bounds HTTP/1.1" 200 OK
INFO:     127.0.0.1:52795 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:52803 - "OPTIONS /graph HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:52803 - "OPTIONS /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "GET /entities/1919 HTTP/1.1" 200 OK
INFO:     127.0.0.1:52803 - "GET /entities/1919/docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:53615 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:53615 - "P

In [7]:
server.show_iframe()

In [8]:
server.stop()

INFO:root:Background server stopped
