# import libraries

In [None]:
import sqlite3
import pandas as pd
from wikidata.client import Client
import requests
import time
import urllib.error
import urllib


# function

In [None]:
def fetch_wikidata(params):
    """
    Fetches data from the Wikidata API.

    Args:
        params (dict): The parameters to be sent with the API request.

    Returns:
        requests.Response: The response object containing the data retrieved from the API.

    Raises:
        requests.Timeout: If a timeout error occurs while making the request.
    """
    url = 'https://www.wikidata.org/w/api.php'
    try:
        return requests.get(url, params=params, timeout=100)  # Timeout set to 100 seconds
    except requests.Timeout:
        print("Timeout error occurred. Retrying...")
        time.sleep(5)  # Wait a while before requesting again
        return fetch_wikidata(params)  # Repeat request


def name_to_place_of_birth(query,prop_id):
    """
    Retrieves the place of birth for a given name using the Wikidata API.

    Args:
        query (str): The name to search for.

    Returns:
        str or None: The place of birth corresponding to the name, or None if not found.

    Raises:
        Exception: Any unexpected error during the process.
    """
    params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'search': query,
        'language': 'en'
    }
    
    # Fetch API
    data = fetch_wikidata(params)
    if data is None:
        return None
    
    # Show response as JSON
    data = data.json()
    if 'search' not in data or len(data['search']) == 0:
        return None
    
    # Get the Wikidata ID of the entity
    entity_id = data['search'][0]['id']
    
    # Fetch the client
    client = Client()
    
    # Get the entity by ID
    entity = client.get(entity_id, load=True)
    print(f"{query} is {entity.description}.")
   # Get the property for place of birth (P19)
    # prop_id = 'P19'  # Property ID "place of birth"
    try:
        prop = client.get(prop_id, load=True)
        # Get the location
        location = entity[prop]
    except:
        print(f"The property for place of birth is missing for the entity {entity_id}.")
        return None
    # Return the label of the location
    if prop_id == 'P19':
        return location.label if location else None
    else :
        return location if location else None




def get_country_by_city(city_name):
    # Base URL for requests to GeoNames API
    base_url = 'http://api.geonames.org/searchJSON'
    
    # Request parameters
    params = {
        'q': city_name,            # Requested city
        'maxRows': 1,              # Maximum number of results
        'username': 'elper.ilya'   # #FsK5KL8M!!@WRw2
    }
    try:
        # Send a GET request to the API
        response = requests.get(base_url, params=params)
        
        # Check the success of the request
        if response.status_code == 200:
            # Convert the response to JSON
            data = response.json()
            
            # Check for results
            if 'geonames' in data and data['geonames']:
                # Extract the country name from the first result
                return data['geonames'][0]['countryName']
            else:
                print("No results found.")
                return None
        else:
            # If the request fails, print an error message
            print(f"Error: {response.status_code}")
            return None
    except Exception as e:
        # Handling possible exceptions
        print(f"An error occurred: {e}")
        return None


# load data base

In [None]:
conn = sqlite3.connect('2012_to_2017_deputat_speakers.db')
query = "SELECT * FROM filtered_rows"  
filtered_rows = pd.read_sql(query, conn)
conn.close()

# city of birth from wikidata

In [None]:
filtered_rows['speaker_name'] = filtered_rows['speaker_name'].str.title()
speakers = filtered_rows['speaker_name'].unique()
places_of_birth = []

for name in speakers:
    while True:  # Infinite loop for repeating requests
        try:
            print(name)
            place_of_birth = name_to_place_of_birth(name,'P19')
            print(f"was born in {place_of_birth}")
            places_of_birth.append(place_of_birth)
            break  # Exit the loop if the request is successful
        except urllib.error.URLError as e:
            print(f"Error when requesting for {name}: {e}")
            # Add a delay before retrying the request
            time.sleep(5)  # Wait 5 seconds before trying again
    print('#'*50)        

In [None]:
speakers_place_of_birth = pd.DataFrame({'speakers': speakers, 'place_of_birth': places_of_birth})
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Tamara Venrooy-Van Ark','place_of_birth']="The Hague"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Paulus Jansen','place_of_birth']="Roermond"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Eddy Van Hijum','place_of_birth']="Delft"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='René Leegte','place_of_birth']="The Hague"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Foort Van Oosten','place_of_birth']="Dordrecht"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Albert De Vries','place_of_birth']="Middelburg"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Johan Houwers','place_of_birth']="Winterswijk"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Joost Taverne','place_of_birth']="Amstelveen"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Van Der Stoep','place_of_birth']="Delft"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Hayke Siebold Veldman','place_of_birth']="Zoetermeer"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Jock Geselschap','place_of_birth']="Johannesburg"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Zihni Özdil','place_of_birth']="Kozakli"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Tom Van Der Lee','place_of_birth']="Silvolde"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Sophia Theodora Marianne Hermans','place_of_birth']="Nijmegen"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Rens Raemakers','place_of_birth']=" Neer"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Maarten Groothuizen','place_of_birth']="Nijmegen"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Isabelle Diks','place_of_birth']="Heerlen"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Harry Van Der Molen','place_of_birth']="Kootstertille"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Suzanne Kröger','place_of_birth']="Amsterdam"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Zohair El Yassini','place_of_birth']="Utrecht"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Bart Snels','place_of_birth']="Roosendaal"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Jan Middendorp','place_of_birth']="Amsterdam"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Antje Diertens','place_of_birth']="Grunnen"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Albert Van Den Bosch','place_of_birth']="Zeist"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Tjeenk Willink','place_of_birth']="Amsterdam"
speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='Martin Wörsdörfer','place_of_birth']="Enschede"
# speakers_place_of_birth.loc[speakers_place_of_birth['speakers']=='','place_of_birth']=""
speakers_place_of_birth=speakers_place_of_birth.dropna()
speakers_place_of_birth['place_of_birth'] = speakers_place_of_birth['place_of_birth'].astype(str)


# country of birth from geonames

In [None]:
country_of_birth = []
for city  in speakers_place_of_birth['place_of_birth']:
    while True:  # Infinite loop for repeating requests
        try:
            print(city)
            country = get_country_by_city(city)
            if country:
                print(f"The country of {city} is {country}")
            else:
                print("Failed to retrieve country information.")
            country_of_birth.append(country)
            break  # Exit the loop if the request is successful
        except urllib.error.URLError as e:
            print(f"Error when requesting for {name}: {e}")
            # Add a delay before retrying the request
            time.sleep(5)  # Wait 5 seconds before trying again
speakers_place_of_birth['country_of_birth']=country_of_birth


# date of birth from wikidata

In [None]:
dates_of_birth = []

for name in speakers_place_of_birth['speakers']:
    while True:  # Infinite loop for repeating requests
        try:
            print(name)
            date_of_birth = name_to_place_of_birth(name,'P569')
            print(f"was born in {date_of_birth}")
            dates_of_birth.append(date_of_birth)
            break  # Exit the loop if the request is successful
        except urllib.error.URLError as e:
            print(f"Error when requesting for {name}: {e}")
            # Add a delay before retrying the request
            time.sleep(5)  # Wait 5 seconds before trying again
    print('#'*50)     
speakers_place_of_birth['dates_of_birth']=dates_of_birth


# filtering by country of birth

In [None]:
speakers_place_of_birth['target person'] = speakers_place_of_birth['country_of_birth'].apply(lambda x: True if x != 'The Netherlands' else False)


# save  to db table speakers_place_of_birth

In [None]:
# Establish connection to the database
conn = sqlite3.connect('2012_to_2017_deputat_speakers.db')
# Save DataFrame to the database
# filtered_rows.to_sql('filtered_rows', conn, if_exists='replace', index=False)
speakers_place_of_birth.to_sql('speakers_place_of_birth', conn, if_exists='replace', index=False)
# Close the connection
conn.close()