# Load newspaper data to Datasette/Spatialite

This notebook loads the dataset created by the [process newspaper data](process_newspapers.ipynb) notebook into an SQLite database so it can be deployed online using Datasette and Spatialite. Separate linked tables are created for titles and places.

In [19]:
from geojson import Feature, FeatureCollection, MultiLineString
import shapely
import geojson
import json
from pathlib import Path
from sqlite_utils import Database
import pandas as pd

In [20]:
# Load the dataset
df = pd.read_csv("newspapers_combined.csv")

In [21]:
def clean_title(title):
    titles = title.split(" | ")
    for t in titles:
        if "(" not in t:
            return t.strip(".")
    return title.strip(".")

Add the data to the database.

In [22]:
db = Database("spatial-datasette/newspapers.db", recreate=True)
for nid, titles in df.groupby("id"):
    for newspaper in titles.itertuples():
        title = {
            "id": nid,
            "alma_id": newspaper.alma_id,
            "title": clean_title(newspaper.title),
            "date": newspaper.date,
            "publisher": newspaper.publisher,
            "trove_url": newspaper.trove_url
        }
        place = {"placename": newspaper.placename, "latitude": newspaper.latitude, "longitude": newspaper.longitude}
        db["titles"].upsert(title, pk="id").m2m("places", place, pk="placename")

Add a geometry columns and create a spatial index on the place coordinates.

In [23]:
import sqlite3

conn = sqlite3.connect("spatial-datasette/newspapers.db")
# Lead the spatialite extension:
conn.enable_load_extension(True)
conn.load_extension("/usr/lib/x86_64-linux-gnu/mod_spatialite.so")
# Initialize spatial metadata for this database:
conn.execute("select InitSpatialMetadata(1)")
# Add a geometry column called point_geom to our museums table:
conn.execute(
    "SELECT AddGeometryColumn('places', 'point_geom', 4326, 'POINT', 2);"
)
# Now update that geometry column with the lat/lon points
conn.execute(
    """
    UPDATE places SET
    point_geom = GeomFromText('POINT('||"longitude"||' '||"latitude"||')',4326);
"""
)
# Now add a spatial index to that column
conn.execute(
    'select CreateSpatialIndex("places", "point_geom");'
)
# If you don't commit your changes will not be persisted:
conn.commit()
conn.close()