# Get Available Art Movements in Wikidata

In [1]:
from utils.wikidata_querier import get_all_instaces_of_category
import pandas as pd

category_id = "wd:Q968159" # Art movement
movements = get_all_instaces_of_category(category_id)
df_mov = pd.DataFrame(movements)
print(f"Found {len(movements)} movements!")
print(df_mov.head(5))

Found 801 movements!
   category_name                            category_id
0    Renaissance   http://www.wikidata.org/entity/Q4692
1           Dada   http://www.wikidata.org/entity/Q6034
2  neoclassicism  http://www.wikidata.org/entity/Q14378
3     street art  http://www.wikidata.org/entity/Q17516
4    Art Nouveau  http://www.wikidata.org/entity/Q34636


# Get All People involved with the Art Nouveau Movement

In [4]:
from utils.wikidata_querier import get_wiki_persons_from_movement
movement_label = "Art Nouveau"
movement_id="wd:Q34636"
people_all, people_essential = get_wiki_persons_from_movement(movement_id)
df_per = pd.DataFrame(people_essential)
print(f"Found {len(people_essential)} people involved with {movement_label}!")
print(df_per.head())

Found 122 people involved with Art Nouveau!
                  person date_of_birth date_of_death  \
0      Adolfo Hohenstein    1854-03-18    1928-04-13   
1            Otto Wagner    1841-07-13    1918-04-11   
2         Hede von Trapp    1877-11-18    1947-12-29   
3  Charles l'Eplattenier    1874-10-09    1946-06-07   
4      Ludwig Fahrenkrog    1867-10-20    1952-10-27   

                                           wiki_page  
0    https://en.wikipedia.org/wiki/Adolfo_Hohenstein  
1          https://en.wikipedia.org/wiki/Otto_Wagner  
2       https://en.wikipedia.org/wiki/Hede_von_Trapp  
3  https://en.wikipedia.org/wiki/Charles_L%27%C3%...  
4    https://en.wikipedia.org/wiki/Ludwig_Fahrenkrog  


# Get the Wikipedia Biographies of each participant involved in the movement

In [5]:
import os, time
from utils.utils_wiki import get_wikipedia_article, save_wikipedia_page

output_dir = f"data/wikipedia/{movement_label.replace(' ','_')}"
if not os.path.exists(output_dir): os.mkdir(output_dir)

stop = 0
for wiki_url in df_per["wiki_page"]:
    page = get_wikipedia_article(query_url=wiki_url)
    if page:
        save_wikipedia_page(page, output_path=f"{output_dir}/{page.title.replace(' ', '_').lower()}.txt", include_metadata=True, include_sections=True, include_infobox=True)
        time.sleep(5)
        stop += 1
    if stop == 5: break

Options: {'Adolfo Hohenstein', 'La bohème', 'Hohenstein'}
Ordered Options Compund Metric: [RankedArticle(wikipage_title='Adolfo Hohenstein', queried_name='Adolfo Hohenstein', lev_similarity=1.0, token_overlap=1.0, dates_confidence=-1), RankedArticle(wikipage_title='Hohenstein', queried_name='Adolfo Hohenstein', lev_similarity=0.7407407407407407, token_overlap=0.5, dates_confidence=-1)]

Retrieving page for Adolfo Hohenstein
Wiki Life Data = (1854 - 1928)
Page Chosen! Confidence Score = 1
Options: {'Otto Wagner', 'Otto-Wagner-Spital', 'Vienna Secession'}
Ordered Options Compund Metric: [RankedArticle(wikipage_title='Otto Wagner', queried_name='Otto Wagner', lev_similarity=1.0, token_overlap=1.0, dates_confidence=-1)]

Retrieving page for Otto Wagner
Wiki Life Data = (1841 - 1918)
Page Chosen! Confidence Score = 1
Options: {'Hede von Trapp', 'Trapp', 'Georg von Trapp'}
Ordered Options Compund Metric: [RankedArticle(wikipage_title='Hede von Trapp', queried_name='Hede von Trapp', lev_simil

In [None]:
! python en_text_to_json_flair.py 'data/wikipedia/Art_Nouveau'

In [17]:
import requests
def get_wikidata_id_from_wikipedia_url(wiki_url: str):
    url = 'https://query.wikidata.org/sparql'
    query = f"""
    SELECT ?wikidataID
    WHERE {{
    <{wiki_url}> schema:about ?wikidataID .
    }}
    """
    wikidata_id = None
    # Call API
    try:
        r = requests.get(url, params={'format': 'json', 'query': query}, timeout=3)
        data = r.json() if r.status_code == 200 else None
    except:
        print("Failed to query Wikidata")
        data = None
    
    if data:
        # Feed data from Wikidata Response
        for item in data["results"]["bindings"]:
            wikidata_id = item["wikidataID"]["value"]
    
    return wikidata_id

get_wikidata_id_from_wikipedia_url("https://en.wikipedia.org/wiki/Albert_Einstein")

'http://www.wikidata.org/entity/Q937'