In [1]:
import sqlite3
from typing import List, Optional


class WikiMapper:
    """Uses a precomputed database created by `create_wikipedia_wikidata_mapping_db`."""

    def __init__(self, path_to_db: str):
        self._path_to_db = path_to_db
        self.conn = sqlite3.connect(self._path_to_db)

    def title_to_id(self, page_title: str) -> Optional[str]:
        """Given a Wikipedia page title, returns the corresponding Wikidata ID.
        The page title is the last part of a Wikipedia url **unescaped** and spaces
        replaced by underscores , e.g. for `https://en.wikipedia.org/wiki/Fermat%27s_Last_Theorem`,
        the title would be `Fermat's_Last_Theorem`.
        Args:
            page_title: The page title of the Wikipedia entry, e.g. `Manatee`.
        Returns:
            Optional[str]: If a mapping could be found for `wiki_page_title`, then return
                           it, else return `None`.
        """

        c = self.conn.execute("SELECT wikidata_id FROM mapping WHERE wikipedia_title=?", (page_title,))
        result = c.fetchone()

        if result is not None and result[0] is not None:
            return result[0]
        else:
            return None

    def url_to_id(self, wiki_url: str) -> Optional[str]:
        """Given an URL to a Wikipedia page, returns the corresponding Wikidata ID.
        This is just a convenience function. It is not checked whether the index and
        URL are from the same dump.
        Args:
            wiki_url: The URL to a Wikipedia entry.
        Returns:
            Optional[str]: If a mapping could be found for `wiki_url`, then return
                           it, else return `None`.
        """

        title = wiki_url.rsplit("/", 1)[-1]
        return self.title_to_id(title)

    def id_to_titles(self, wikidata_id: str) -> List[str]:
        """Given a Wikidata ID, return a list of corresponding pages that are linked to it.
        Due to redirects, the mapping from Wikidata ID to Wikipedia title is not unique.
        Args:
            wikidata_id (str): The Wikidata ID to map, e.g. `Q42797`.
        Returns:
            List[str]: A list of Wikipedia pages that are linked to this Wikidata ID.
        """

        c = self.conn.execute(
            "SELECT DISTINCT wikipedia_title FROM mapping WHERE wikidata_id =?", (wikidata_id,)
        )
        results = c.fetchall()

        return [e[0] for e in results]

map = WikiMapper('/Users/lucyhorowitz/Documents/MathGloss/wikidata/index_enwiki-20190420.db')


In [None]:
import csv
import requests

wikicats = ['_(mathematics)', '_(category_theory)', '_(linear_algebra)', '_(algebraic_geometry)', '_(algebraic_topology)',
             '_(commutative_algebra)', '_(field_theory)', '_(game_theory)', '_(topology)', '_(differential_geometry)', '_(graph_theory)', 
             '_(group_theory)', '_(invariant_theory)', '_(module_theory)', '_(order_theory)', '_(ring_theory)',
             '_(representation_theory)', '_(set_theory)', '_(string_theory)', '_(symplectic geometry)', '_(tensor_theory)']


            # Function to check if a title is a disambiguation page
def is_disambiguation_page(wikidata_id):
    url = f"https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbgetentities",
        "ids": wikidata_id,
        "format": "json"
    }
    response = requests.get(url, params=params).json()
    claims = response.get("entities", {}).get(wikidata_id, {}).get("claims", {})
    if "P31" in claims:
        for claim in claims["P31"]:
            if claim.get("mainsnak", {}).get("datavalue", {}).get("value", {}).get("id") == "Q4167410":
                return True
    return False

In [19]:
import os


# Define the folder containing the files
folder_path = 'context'

# Open the context_mappings.csv file to write the results
with open('context_mappings.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Wikidata ID', 'Context'])

    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.md'):  # Assuming the files are markdown files
            with open(os.path.join(folder_path, filename), 'r') as f:
                for line in f:
                    if line.startswith('SUGGESTION'):
                        suggestion = line.split(':', 1)[1].strip().replace(' ', '_').capitalize()  # Read the suggestion line and format
                        break
            found = False
            for cat in wikicats:
                suggestion_with_cat = suggestion + cat
                wikidata_id = map.title_to_id(suggestion_with_cat)
                if wikidata_id and not is_disambiguation_page(wikidata_id):
                    writer.writerow([f"[{wikidata_id}](https://www.wikidata.org/wiki/{wikidata_id})", f"[{filename}](https://mathgloss.github.io/MathGloss/context/{filename})"])
                    print(f"SUGGCAT {suggestion_with_cat} found: {wikidata_id}")
                    found = True
                    break
            if not found:
                wikidata_id = map.title_to_id(suggestion)
                if wikidata_id and not is_disambiguation_page(wikidata_id):
                    writer.writerow([f"[{wikidata_id}](https://www.wikidata.org/wiki/{wikidata_id})", f"[{filename}](https://mathgloss.github.io/MathGloss/context/{filename})"])
                    print(f"SUGG {suggestion} found: {wikidata_id}")
                    found = True
                else:    
                    title = filename.replace('.md','').replace(' ', '_').capitalize()
                    for cat in wikicats:
                        title_with_cat = title + cat
                        wikidata_id = map.title_to_id(title_with_cat)
                        if wikidata_id and not is_disambiguation_page(wikidata_id):
                            writer.writerow([f"[{wikidata_id}](https://www.wikidata.org/wiki/{wikidata_id})", f"[{filename}](https://mathgloss.github.io/MathGloss/context/{filename})"])
                            print(f"REGCAT {suggestion_with_cat} found: {wikidata_id}")
                            found = True
                            break
                    if not found:
                        wikidata_id = map.title_to_id(title)
                        if wikidata_id and not is_disambiguation_page(wikidata_id):
                            writer.writerow([f"[{wikidata_id}](https://www.wikidata.org/wiki/{wikidata_id})", f"[{filename}](https://mathgloss.github.io/MathGloss/context/{filename})"])
                            print(f"REG {title} found: {wikidata_id}")
                            found = True
        if not found:
            print(f"NOT FOUND {filename}")
                        


SUGG Simplicial_set found: Q1467124
SUGG Full_subcategory found: Q541563
NOT FOUND satisfied_by.md
SUGG Split_epimorphism found: Q17103180
SUGG Cocomplete_category found: Q4370335
SUGG G-torsor found: Q3755543
REG Effective found: Q1296024
NOT FOUND space_of_configurations_of_n_points.md
SUGG List_monad found: Q27948
SUGGCAT Kernel_(category_theory) found: Q2920416
SUGGCAT Coproduct_(category_theory) found: Q692689
REGCAT Multiplication_natural_transformation_(tensor_theory) found: Q40276
SUGGCAT Cokernel_(category_theory) found: Q2156511
SUGG Fiber_space found: Q5446381
SUGG Small_category found: Q719395
SUGG Initial_object found: Q529752
SUGGCAT Lattice_(group_theory) found: Q6497088
NOT FOUND strictly_creates.md
NOT FOUND right_inverse.md
SUGG Sierpinski_space found: Q2119470
NOT FOUND limit_cone.md
SUGG Kernel_operator found: Q10564851
NOT FOUND continuation_monads.md
SUGG Unital_ring found: Q161172
SUGGCAT Sheaf_(mathematics) found: Q595298
REGCAT Unit_natural_transformation_(tens

In [11]:
for cat in wikicats:
    suggestion_with_cat = "Simplicial_set" + cat
    wikidata_id = map.title_to_id(suggestion_with_cat)
    if wikidata_id:
        print(f"Simplicial_set{cat}: {wikidata_id}")
    else:
        print(f"Simplicial_set{cat}: NOT FOUND")

Simplicial_set_(mathematics): NOT FOUND
Simplicial_set_(category_theory): NOT FOUND
Simplicial_set_(linear_algebra): NOT FOUND
Simplicial_set_(algebraic_geometry): NOT FOUND
Simplicial_set_(algebraic_topology)_(commutative_algebra): NOT FOUND
Simplicial_set_(field_theory): NOT FOUND
Simplicial_set_(game_theory): NOT FOUND
Simplicial_set_(topology): NOT FOUND
Simplicial_set_(differential_geometry): NOT FOUND
Simplicial_set_(graph_theory): NOT FOUND
Simplicial_set_(group_theory): NOT FOUND
Simplicial_set_(invariant_theory): NOT FOUND
Simplicial_set_(module_theory): NOT FOUND
Simplicial_set_(order_theory): NOT FOUND
Simplicial_set_(ring_theory): NOT FOUND
Simplicial_set_(representation_theory): NOT FOUND
Simplicial_set_(set_theory): NOT FOUND
Simplicial_set_(string_theory): NOT FOUND
Simplicial_set_(symplectic geometry): NOT FOUND
Simplicial_set_(tensor_theory): NOT FOUND


In [15]:
print(map.title_to_id("Simplicial_set"))

Q1467124


In [None]:
pr