# Demo of _narrative graphs_


In [1]:
import kagglehub
import pandas as pd

path = kagglehub.dataset_download("rmisra/news-category-dataset", "News_Category_Dataset_v3.json")
data = pd.read_json(path, lines=True)
data.head()



Unnamed: 0,link,headline,category,short_description,authors,date
0,https://www.huffpost.com/entry/covid-boosters-...,Over 4 Million Americans Roll Up Sleeves For O...,U.S. NEWS,Health experts said it is too early to predict...,"Carla K. Johnson, AP",2022-09-23
1,https://www.huffpost.com/entry/american-airlin...,"American Airlines Flyer Charged, Banned For Li...",U.S. NEWS,He was subdued by passengers and crew when he ...,Mary Papenfuss,2022-09-23
2,https://www.huffpost.com/entry/funniest-tweets...,23 Of The Funniest Tweets About Cats And Dogs ...,COMEDY,"""Until you have a dog you don't understand wha...",Elyse Wanshel,2022-09-23
3,https://www.huffpost.com/entry/funniest-parent...,The Funniest Tweets From Parents This Week (Se...,PARENTING,"""Accidentally put grown-up toothpaste on my to...",Caroline Bologna,2022-09-23
4,https://www.huffpost.com/entry/amy-cooper-lose...,Woman Who Called Cops On Black Bird-Watcher Lo...,U.S. NEWS,Amy Cooper accused investment firm Franklin Te...,Nina Golgowski,2022-09-22


In [2]:
# create a sample
sample = data.sample(5000, random_state=42)
docs = sample["headline"] + "\n\n" + sample["short_description"]
categories = {
    "category": sample["category"],
    "region": [
        [name for name in ("u.s.", "europe", "asia", "africa", "america", "australia")
         if name in text.lower()]
        for text in docs
    ]
}
timestamps = sample["date"]

In [3]:
from narrativegraph import NarrativeGraph
from narrativegraph.nlp.extraction.spacy.naive import NaiveSpacyTripletExtractor

model = NarrativeGraph(
    triplet_extractor=NaiveSpacyTripletExtractor(),
    sqlite_db_path="output/demo_db.sqlite",
    on_existing_db="overwrite"
) \
    .fit(docs, categories=categories, timestamps=timestamps)

INFO:narrativegraph.pipeline:Adding 5000 documents to database
INFO:narrativegraph.pipeline:Extracting triplets
Extracting triplets: 100%|██████████| 5000/5000 [00:21<00:00, 233.09it/s] 
INFO:narrativegraph.pipeline:Mapping entities and relations
INFO:narrativegraph.pipeline:Mapping triplets
Mapping triplets: 100%|██████████| 12144/12144 [00:03<00:00, 3320.80it/s]
Updating entity info: 100%|██████████| 12744/12744 [00:06<00:00, 2010.16it/s]
Updating relation info: 100%|██████████| 12102/12102 [00:05<00:00, 2330.25it/s]


In [4]:
model.entities_

Unnamed: 0,id,label,supernode_id,is_supernode,term_frequency,doc_frequency,first_occurrence,last_occurrence
0,1,AOL,,0,3,1,2014-02-08,2014-02-08
1,2,Employee Benefit Cuts,,0,1,1,2014-02-08,2014-02-08
2,3,a glorious week,,0,1,1,2014-02-08,2014-02-08
3,4,Tim Armstrong,,0,1,1,2014-02-08,2014-02-08
4,5,One,,0,171,120,2012-01-29,2020-10-10
...,...,...,...,...,...,...,...,...
12739,12740,the derrière,,0,1,1,2012-08-31,2012-08-31
12740,12741,Guide On Seizing The Moment\n\n,,0,1,1,2016-03-01,2016-03-01
12741,12742,Olympic Career,,0,2,1,2016-08-14,2016-08-14
12742,12743,Financial Stability Board Reports,,0,1,1,2012-11-18,2012-11-18


In [5]:
# create server to be viewed in own browser which blocks execution of other cells
model.serve_visualizer()

INFO:     Started server process [76810]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


INFO:     127.0.0.1:51975 - "GET / HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:51975 - "GET /vis HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:51975 - "GET /vis/ HTTP/1.1" 304 Not Modified
INFO:     127.0.0.1:51975 - "GET /vis/static/js/main.5dedd391.js HTTP/1.1" 304 Not Modified
INFO:     127.0.0.1:51978 - "GET /graph/bounds HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "OPTIONS /graph HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:51978 - "OPTIONS /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:51978 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "POST /graph HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:51978 - "POST /graph/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "GET /entities/1305 HTTP/1.1" 200 OK
INFO:     127.0.0.1:51978 - "GET /entities/284 HTTP/

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [76810]
INFO:root:Server stopped by user


In [5]:
# create server to be viewed in this notebook which therefore runs in the background
server = model.serve_visualizer(block=False)

INFO:root:Server started in background on port 8001
INFO:     Started server process [14940]
INFO:     Waiting for application startup.
INFO:root:Database engine provided to state before startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [14940]


In [6]:
server.show_iframe()

In [7]:
server.stop()

INFO:root:Background server stopped
