In [1]:
import sqlite3
from typing import List, Optional


class WikiMapper:
    """Uses a precomputed database created by `create_wikipedia_wikidata_mapping_db`."""

    def __init__(self, path_to_db: str):
        self._path_to_db = path_to_db

    def title_to_id(self, page_title: str) -> Optional[str]:
        """Given a Wikipedia page title, returns the corresponding Wikidata ID.

        The page title is the last part of a Wikipedia url **unescaped** and spaces
        replaced by underscores , e.g. for `https://en.wikipedia.org/wiki/Fermat%27s_Last_Theorem`,
        the title would be `Fermat's_Last_Theorem`.

        Args:
            page_title: The page title of the Wikipedia entry, e.g. `Manatee`.

        Returns:
            Optional[str]: If a mapping could be found for `wiki_page_title`, then return
                           it, else return `None`.

        """

        with sqlite3.connect(self._path_to_db) as conn:
            c = conn.cursor()
            c.execute("SELECT wikidata_id FROM mapping WHERE wikipedia_title=?", (page_title,))
            result = c.fetchone()

        if result is not None and result[0] is not None:
            return result[0]
        else:
            return None

    def url_to_id(self, wiki_url: str) -> Optional[str]:
        """Given an URL to a Wikipedia page, returns the corresponding Wikidata ID.

        This is just a convenience function. It is not checked whether the index and
        URL are from the same dump.

        Args:
            wiki_url: The URL to a Wikipedia entry.

        Returns:
            Optional[str]: If a mapping could be found for `wiki_url`, then return
                           it, else return `None`.

        """

        title = wiki_url.rsplit("/", 1)[-1]
        return self.title_to_id(title)

    def id_to_titles(self, wikidata_id: str) -> List[str]:
        """Given a Wikidata ID, return a list of corresponding pages that are linked to it.

        Due to redirects, the mapping from Wikidata ID to Wikipedia title is not unique.

        Args:
            wikidata_id (str): The Wikidata ID to map, e.g. `Q42797`.

        Returns:
            List[str]: A list of Wikipedia pages that are linked to this Wikidata ID.

        """

        with sqlite3.connect(self._path_to_db) as conn:
            c = conn.cursor()
            c.execute(
                "SELECT DISTINCT wikipedia_title FROM mapping WHERE wikidata_id =?", (wikidata_id,)
            )
            results = c.fetchall()

        return [e[0] for e in results]

In [2]:
dictlist = dict()

francelist = open('/Users/lucyhorowitz/Documents/spaCy-Obsidian project/wikidata/france-4.txt','r').readlines()
i = 1

for line in francelist:
    data = line.split(': ')
    qid = data[1].strip()
    name = data[0][0].lower() + data[0][1:]
    if not name:
        name = ""
    dictlist[qid] = dict(france = name)

chicagolist = open('/Users/lucyhorowitz/Documents/spaCy-Obsidian project/wikidata/August/chicago-maps.txt','r').readlines()
for line in chicagolist:
    data = line.split(': ')
    qid = data[0]
    name = data[1].strip().lower() + "](https://mathgloss.github.io/MathGloss/chicago/" + data[1][:-1].replace(' ', '_').lower() + ")"
    if not name:
        name = ""
    if qid in dictlist.keys():
        dictlist[qid]['chicago'] = name
    else:
        dictlist[qid] = dict(chicago = name)

mulimalist = open('/Users/lucyhorowitz/Documents/spaCy-Obsidian project/wikidata/August/mulima-maps.txt', 'r').readlines()
for line in mulimalist:
    data = line.split(': ')
    qid = data[1].strip()
    name = data[0].replace('_', ' ').lower()
    if not name:
        name = ""
    if qid in dictlist.keys():
        dictlist[qid]['mulima'] = name
    else:
        dictlist[qid] = dict(mulima = name)

nlablist = open('/Users/lucyhorowitz/Documents/spaCy-Obsidian project/wikidata/August/nlab-maps.txt', 'r').readlines()
for line in nlablist:
    data = line.split(': ')
    qid = data[1].strip()
    name = data[0].replace('_', ' ').lower()
    if not name:
        name = ""
    if qid in dictlist.keys():
        dictlist[qid]['nlab'] = name
    else:
        dictlist[qid] = dict(nlab = name)
print(dictlist)


{'Q125977': {'france': 'vector space](https://leanprover-community.github.io/mathlib4_docs/./Mathlib/Algebra/Module/Basic.html#Module)', 'chicago': 'vector space](https://mathgloss.github.io/MathGloss/chicago/vector_space)', 'hosgood': 'vector space', 'nlab': 'vector space](https://ncatlab.org/nlab/show/vector+space)'}, 'Q728435': {'france': 'vector subspace](https://leanprover-community.github.io/mathlib4_docs/./Mathlib/Algebra/Module/Submodule/Basic.html#Subspace)', 'chicago': 'vector subspace](https://mathgloss.github.io/MathGloss/chicago/vector_subspace)', 'nlab': 'subspace (linear algebra)](https://ncatlab.org/nlab/show/subspace)'}, 'Q1393796': {'france': 'quotient space (linear algebra)](https://leanprover-community.github.io/mathlib4_docs/./Mathlib/GroupTheory/GroupAction/Quotient.html#MulAction.orbitEquivQuotientStabilizer)', 'chicago': 'quotient vector space](https://mathgloss.github.io/MathGloss/chicago/quotient_vector_space)', 'nlab': 'quotient space (linear algebra)](https:

In [3]:
tac = open("/Users/lucyhorowitz/Documents/spaCy-Obsidian project/tac/tac-wikidata.txt", "r").read()
with open('commas-4.csv','a') as opener:
    for qid in dictlist.keys():
        others = False
        opener.write(qid + ',')
        if dictlist.get(qid).get('chicago'):
            opener.write(dictlist[qid]['chicago'].strip() + ',')
            others = True
        else:
            opener.write(',')
        if dictlist.get(qid).get('france'):
            opener.write(dictlist[qid]['france'].strip() + ',')
            others = True
        else:
            opener.write(',')
        if dictlist.get(qid).get('mulima'):
            opener.write(dictlist[qid]['mulima'].strip() + ',')
            others = True
        else:
            opener.write(',')  
        if dictlist.get(qid).get('nlab'):
            if others:
                opener.write(dictlist[qid]['nlab'].strip())
            elif qid in tac:
                opener.write(dictlist[qid]['nlab'].strip())
        opener.write('\n')

In [4]:
with open('commas-4.csv','r') as opener:
    newfile = open('commas-lean4.csv','a')
    for line in opener.readlines():
        if not ",,,," in line:
            newfile.write(line)

In [5]:
tuples  = open('/Users/lucyhorowitz/Documents/spaCy-Obsidian project/wikidata/commas-lean4.csv', 'r').readlines()
#give a string representative to each QID, chicago if it exists, then nlab, then mulima, then france. sort alphabetically and put it back.
tosort = []
for line in tuples:
    data = line.split(',')
    if data[1]:
        tosort.append((data[0], data[1].strip()))
    elif data[3]:
        tosort.append((data[0], data[3].strip()))
    elif data[2]:
        tosort.append((data[0], data[2].strip()))
    elif data[4]:
        tosort.append((data[0], data[4].strip()))
        print(data[0] + ": " + data[4])
        
fixed = sorted(tosort, key = lambda x: x[1])
print(fixed)


[('Q14481419', '0-1 law](https://leanprover-community.github.io/mathlib4_docs/./Mathlib/Probability/Independence/ZeroOne.html#ProbabilityTheory.measure_zero_or_one_of_measurableSet_limsup_atTop)'), ('Q318737', 'abelian category'), ('Q181296', 'abelian](https://mathgloss.github.io/MathGloss/chicago/abelian)'), ('Q318598', 'abelianization](https://mathgloss.github.io/MathGloss/chicago/abelianization)'), ('Q515874', 'abscissa'), ('Q20827138', 'absolute continuity of measure](https://mathgloss.github.io/MathGloss/chicago/absolute_continuity_of_measure)'), ('Q332504', 'absolute continuity](https://mathgloss.github.io/MathGloss/chicago/absolute_continuity)'), ('Q332465', 'absolute convergence'), ('Q120812', 'absolute value](https://mathgloss.github.io/MathGloss/chicago/absolute_value)'), ('Q91134251', 'absolutely convergent series'), ('Q844451', 'acnode'), ('Q32043', 'addition'), ('Q4681343', 'additive category'), ('Q357858', 'adjoint'), ('Q2858846', 'adjoint of a linear transformation](http

In [6]:
with open('commas-sorted4.csv','a') as opener:
    for x in fixed:
        opener.write('[' + x[0] + '](https://www.wikidata.org/wiki/' + x[0] + '),')
        if dictlist.get(x[0]).get('chicago'):
            if ']' in dictlist.get(x[0]).get('chicago'):
                opener.write('[' + dictlist[x[0]]['chicago'].strip() + ',')
            else:
                opener.write(dictlist[x[0]]['chicago'].strip() + ',')
        else:
            opener.write(',')
        if dictlist.get(x[0]).get('france'):
            if ']' in dictlist.get(x[0]).get('france'):
                opener.write('[' + dictlist[x[0]]['france'].strip() + ',')
            else:
                opener.write(dictlist[x[0]]['france'].strip() + ',')
        else:
            opener.write(',')
        if dictlist.get(x[0]).get('mulima'):
            opener.write(dictlist[x[0]]['mulima'].strip() + ',')
        else:
            opener.write(',')  
        if dictlist.get(x[0]).get('nlab'):
            opener.write('[' + dictlist[x[0]]['nlab'].strip())
        else:
            opener.write(',')
        opener.write('\n')