<a href="https://colab.research.google.com/github/SpaceTimeNarratives/demo/blob/main/spatial_narrative_demo3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Spatial Narratives - Demo**
---



## Setting up...

In [None]:
!git clone https://github.com/SpaceTimeNarratives/demo.git

In [None]:
!pip uninstall -y spacy

In [None]:
!pip -q install -r demo/requirements.txt

In [None]:
%run demo/functions.py

## Importing `spaCy`
We need to import the `spaCy` NLP pipeline and load the small version of the English model `en_core_web_sm` for tokenization, tagging, parsing and named entity recognition.

In [None]:
import spacy
from lxml import etree
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

In [None]:
nlp = spacy.load("en_core_web_sm")

## Extracting and Analysing Placenames

In [None]:
#@title ### Select file {run: "auto"}

#@markdown You can select a file from `gold_standard` folder.
Filename = "Ruskin_cqp_55.xml" #@param ["Anon_cqp_66.xml", "Brown_cqp_10.xml", "Clarke_cqp_63.xml", "Cockin_cqp_19.xml", "Coleridge_cqp_33.xml", "Defoe_cqp_4.xml", "Garnett_cqp_62.xml", "Gray_cqp_13.xml", "Keats_cqp_44.xml", "Lt.Hammond._cqp_2.xml", "Otley__cqp_49.xml", "Pennant_cqp_12.xml", "Pennant_cqp_15.xml", "Phillips_cqp_38.xml", "Rix_cqp_78.xml", "Ruskin_cqp_55.xml", "Rutland_cqp_42.xml", "Shaw_cqp_24.xml", "Smith_cqp_5.xml", "Smith_cqp_6.xml", "Smith_cqp_7.xml", "Sullivan_cqp_20.xml", "Wakefield_cqp_37.xml", "Wesley_cqp_9.xml", "West_cqp_17.xml", "Wordsworth_cqp_47.xml", "Wordsworth_cqp_58.xml", "Young_cqp_11.xml"]

# spacy_processed = nlp(re.sub('<[^<]+>', "", open(f'gold_standard/{Filename}').read()))
spacy_processed = nlp(str(etree.tostring(etree.parse(f'gold_standard/{Filename}'
                  ), encoding='utf8', method='text')))
ld_place_names = [name.strip() for name in open('LD_placenames.txt').readlines()]
spacy_entities = extract_entities(spacy_processed.text, ld_place_names)

names, counts = zip(*Counter(plname for _,(plname,_) in spacy_entities.items()).most_common(25))
plt.figure(figsize=(20,8))

bar_labels = ['red', 'blue', 'green', 'orange', 'purple']*5
bar_colors = [f'tab:{col}' for col in bar_labels]
plt.bar(names, counts, label=bar_labels, color=bar_colors)
plt.ylabel('Mentions')
plt.title(f'Place name mentions in {Filename}')
plt.xticks(names, names, rotation=45)
plt.tick_params(axis='both', labelsize=14)
plt.show()

### Visualizing the place names

In [None]:
visualize(get_tagged_list(spacy_processed.text, spacy_entities))