In [74]:
import geopandas as gpd
import pathlib
import sqlite3
data_dir = pathlib.Path("..").resolve() / "data"
db_con = sqlite3.connect(str(data_dir / "ppm.sqlite"))
schools = gpd.read_postgis("SELECT * FROM schools;", db_con, geom_col="GEOMETRY")

In [15]:
schools["GEOMETRY"] = gpd.GeoSeries.from_wkt(schools.location)

In [24]:
schools =schools.set_crs(epsg=4326)

In [25]:
import geoalchemy2
# schools.to_postgis("schools", db_con, if_exists="replace", )
schools.to_file(str(data_dir / "ppm.sqlite"), layer='schools', driver="SQLite", if_exists="replace")
schools.to_file(str(data_dir / "ppm.gpkg"), layer='schools', driver="GPKG")

In [26]:
schools.explore()

In [32]:
missing_school_links = schools[schools["school"].isna()]
missing_school_links.head()

Unnamed: 0,ogc_fid,GEOMETRY,school,location,schoollabel,status
69,70,POINT (133.86502 -23.70902),,Point(133.865016 -23.709023),Alice Springs High School,
73,74,POINT (144.98861 -37.78556),,Point(144.988611 -37.785556),Fitzroy High School,
74,75,POINT (115.76817 -32.06839),,Point(115.768175 -32.06839),South Fremantle Senior High School,
75,76,POINT (149.09500 -35.32400),,Point(149.095 -35.324),Alfred Deakin High School,
76,77,POINT (146.33394 -41.16875),,Point(146.3339412 -41.1687511),Devonport Technical College,


In [77]:
import requests

WIKI_URL = "https://en.wikipedia.org/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles={title}&format=json"

def get_wikipedia_entity_id(title):
    r = requests.get(WIKI_URL.format(title=title))
    r.raise_for_status()
    query = r.json()
    pages = query.get("query", {}).get("pages", {})
    page_ids = pages.keys()
    if page_ids:
        keys = list(page_ids)
        entity_id = pages[keys[0]].get("pageprops", {}).get("wikibase_item")
        if entity_id:
            return f"http://www.wikidata.org/entity/{entity_id}"

69          http://www.wikidata.org/entity/None
73      http://www.wikidata.org/entity/Q5455702
74      http://www.wikidata.org/entity/Q7567316
75      http://www.wikidata.org/entity/Q4722527
76          http://www.wikidata.org/entity/None
77      http://www.wikidata.org/entity/Q7595451
80      http://www.wikidata.org/entity/Q5377304
81          http://www.wikidata.org/entity/None
82          http://www.wikidata.org/entity/None
83      http://www.wikidata.org/entity/Q7587346
84     http://www.wikidata.org/entity/Q24090296
85      http://www.wikidata.org/entity/Q1145497
86          http://www.wikidata.org/entity/None
87      http://www.wikidata.org/entity/Q7824345
88      http://www.wikidata.org/entity/Q6704966
89      http://www.wikidata.org/entity/Q7271133
90      http://www.wikidata.org/entity/Q1202292
91      http://www.wikidata.org/entity/Q5209908
92          http://www.wikidata.org/entity/None
93      http://www.wikidata.org/entity/Q6680935
94      http://www.wikidata.org/entity/Q

69          http://www.wikidata.org/entity/None
73      http://www.wikidata.org/entity/Q5455702
74      http://www.wikidata.org/entity/Q7567316
75      http://www.wikidata.org/entity/Q4722527
76          http://www.wikidata.org/entity/None
77      http://www.wikidata.org/entity/Q7595451
80      http://www.wikidata.org/entity/Q5377304
81          http://www.wikidata.org/entity/None
82          http://www.wikidata.org/entity/None
83      http://www.wikidata.org/entity/Q7587346
84     http://www.wikidata.org/entity/Q24090296
85      http://www.wikidata.org/entity/Q1145497
86          http://www.wikidata.org/entity/None
87      http://www.wikidata.org/entity/Q7824345
88      http://www.wikidata.org/entity/Q6704966
89      http://www.wikidata.org/entity/Q7271133
90      http://www.wikidata.org/entity/Q1202292
91      http://www.wikidata.org/entity/Q5209908
92          http://www.wikidata.org/entity/None
93      http://www.wikidata.org/entity/Q6680935
94      http://www.wikidata.org/entity/Q

In [81]:
s =  missing_school_links.schoollabel.apply(get_wikipedia_entity_id)

In [90]:
schools.school.fillna(s, inplace=True)

In [91]:
schools.to_file(str(data_dir / "ppm.sqlite"), layer='schools', driver="SQLite", if_exists="replace")
schools.to_file(str(data_dir / "ppm.gpkg"), layer='schools', driver="GPKG")

In [84]:
schools.iloc[69]

ogc_fid                                                       70
GEOMETRY                           POINT (133.865016 -23.709023)
school         https://www.australianschoolsdirectory.com.au/...
location                            Point(133.865016 -23.709023)
schoollabel                            Alice Springs High School
status                                                      None
Name: 69, dtype: object

{'entities': {'Q4722527': {'pageid': 4512807,
   'ns': 0,
   'title': 'Q4722527',
   'lastrevid': 1548876666,
   'modified': '2021-12-23T17:28:41Z',
   'type': 'item',
   'id': 'Q4722527',
   'labels': {'en': {'language': 'en', 'value': 'Alfred Deakin High School'},
    'zh-hant': {'language': 'zh-hant', 'value': '阿爾佛雷德迪肯中學'},
    'zh': {'language': 'zh', 'value': '阿尔弗雷德迪肯中学'}},
   'descriptions': {'nl': {'language': 'nl',
     'value': 'high school in Australian Capital Territory, Australië'},
    'en': {'language': 'en', 'value': 'school in Canberra, Australia'},
    'de': {'language': 'de', 'value': 'Highschool in Australien'},
    'ar': {'language': 'ar',
     'value': 'ثانوية عامة في مقاطعة العاصمة الأسترالية، أستراليا'}},
   'aliases': {},
   'claims': {'P625': [{'mainsnak': {'snaktype': 'value',
       'property': 'P625',
       'hash': 'd94d0f2a1fa2b22affa2d1e74062c29d2e3161c8',
       'datavalue': {'value': {'latitude': -35.324,
         'longitude': 149.095,
         'altitud