In [23]:
import requests as r
from bs4 import BeautifulSoup

### Getting translations from wiki-data based on ids and desired translations

In [52]:
import json

'''Simple function for getting json (not really needed anymore)'''
def get_json(wiki_id):
    req = r.get(f'https://www.wikidata.org/wiki/Special:EntityData/{wiki_id}.json')

    j = json.loads(req.text)

    return j


'''Take in a wiki id, and an optional list of desired translations(must match language names)
    return a dictionary containing translations
    if no desired translations are passed returns all translations
'''
def get_translations_from_id(wiki_id:str, desired_translations: list = None):
        req = r.get(f'https://www.wikidata.org/wiki/Special:EntityData/{wiki_id}.json')

        j = json.loads(req.text)
        
        if desired_translations:
            out_ = {}
            for translation in desired_translations:
                try:
                    out_[translation] = j['entities'][wiki_id]['labels'][translation]
                except:
                    print(f"Encountered error retrieving {translation}")
        else:
            return j['entities'][wiki_id]['labels']


        return out_ 
            


In [50]:
#getting specific translations 
es = get_translations_from_id("Q110246392", ['fr', 'de', 'th'])
es

{'fr': 'Little Women', 'de': 'Little Women', 'th': 'สามพี่น้อง'}

In [51]:
#getting all translations
all_of_em = get_translations_from_id("Q110246392")
all_of_em

{'en': {'language': 'en', 'value': 'Little Women'},
 'ar': {'language': 'ar', 'value': 'نساء صغيرات'},
 'id': {'language': 'id', 'value': 'Little Women'},
 'ko': {'language': 'ko', 'value': '작은 아씨들'},
 'es': {'language': 'es', 'value': 'Las hermanas'},
 'zh': {'language': 'zh', 'value': '小女子'},
 'vi': {'language': 'vi', 'value': 'Little Women'},
 'ja': {'language': 'ja', 'value': '新・若草物語'},
 'fa': {'language': 'fa', 'value': 'زنان کوچک'},
 'ms': {'language': 'ms', 'value': 'Little Women'},
 'ru': {'language': 'ru', 'value': 'Маленькие женщины'},
 'zh-tw': {'language': 'zh-tw', 'value': '小女子'},
 'fr': {'language': 'fr', 'value': 'Little Women'},
 'de': {'language': 'de', 'value': 'Little Women'},
 'th': {'language': 'th', 'value': 'สามพี่น้อง'},
 'uz': {'language': 'uz', 'value': 'Little Women'},
 'it': {'language': 'it', 'value': 'Piccole donne'},
 'zh-hans': {'language': 'zh-hans', 'value': '小女子'}}

### Query for multiple items at once

> this does not save resources only makes it easier on our end
> Potentially want to look into a better way to reduce requests sent to wikidata

In [41]:
'''Function to query for multiple wiki-id translations at once'''
def aggregate_multiple(wiki_ids: list[str], translations: list[str]=None):
    out_ = {}
    for wiki_id in wiki_ids:
        try:
            t_out_ = get_translations_from_id(wiki_id, translations)
            out_[wiki_id] = t_out_
        except Exception as e:
            print(f"Unable to get translations for {wiki_id}\n{e}")

    return out_


In [45]:
t = aggregate_multiple(["Q110246392", "Q910656", "Q733195"], ['it', 'fr', 'ru', 'ja'])
t

Encountered error retrieving ru


{'Q110246392': {'it': {'language': 'it', 'value': 'Piccole donne'},
  'fr': {'language': 'fr', 'value': 'Little Women'},
  'ru': {'language': 'ru', 'value': 'Маленькие женщины'},
  'ja': {'language': 'ja', 'value': '新・若草物語'}},
 'Q910656': {'it': {'language': 'it', 'value': 'olio di neem'},
  'fr': {'language': 'fr', 'value': 'Huile de neem'},
  'ja': {'language': 'ja', 'value': 'ニームオイル'}},
 'Q733195': {'it': {'language': 'it', 'value': 'Repubblica Galattica'},
  'fr': {'language': 'fr', 'value': 'République galactique'},
  'ru': {'language': 'ru', 'value': 'Галактическая Республика'},
  'ja': {'language': 'ja', 'value': '銀河共和国'}}}

In [38]:
t

### Getting Ids from string queries

> will return first result if it finds it as the answer (not neccisarily true)

In [66]:
# Now need to find a good way to query / find the ids from a name
#  (ID is also provided via training data but it might not hurt to have this feature)
from bs4 import BeautifulSoup
import urllib.parse

'''Function to attempt to get a wiki id from a string name (throws exceptions if unable to find a first result)
    Naively returns the first result as the "interpreted" 
'''
def query_for_id(name: str):
    encoded = urllib.parse.urlencode({"search": name})
    req = r.get(f'https://www.wikidata.org/w/index.php?{encoded}')

    

    try:
        soup = BeautifulSoup(req.text, 'html.parser')
        result = soup.find('div', class_="mw-search-result-heading")
        link = result.find('a')
        return link['href'].split("/")[2]
        
    except Exception as e:
        print(f"Encountered : {e}\n\t when querying for {name}")

In [67]:
query_for_id("Galactic Republic")

'Q733195'