## Context
The previous dataset didn't provide the sufficient details for the feature.
This web scraper will create a dataset with the results for each party in each parish ('freguesia') in Portugal (inland and autonomous territories) as well as the votes from the Europe and Outside of Europe circles

### Setup

In [68]:
#%pip install pandas
import pandas as pd
import os

In [69]:
# Base url
site = 'https://www.legislativas2024.mai.gov.pt/assets/static'

# Local path
tchild = site +'/territory-children/territory-children-' # children territory codes url
tresult = site +'/territory-results/territory-results-' # voting results url

portugal =  {
                'compensation': [False], 
                'name': ['Portugal'], 
                'territoryKey': ['LOCAL-500000'],
                'distrito': False,
                'concelho': False,
                'level': ['pais']
            }

foreign =   {
                'compensation': [False], 
                'name': ['Estrangeiro'], 
                'territoryKey': ['FOREIGN-600000'],
                'distrito': False,
                'concelho': False,
                'level': ['pais']
            }

codes = pd.DataFrame(portugal) # creates codes dataframe, and populates with highest-tier level

path = ''

### Methods

In [70]:
# Saves children json from url          
def get_children(territoryKey):
    url = tchild + territoryKey + '.json'
    return pd.read_json(url)

# Saves result json from url
def get_result(territoryKey):
    url = tresult + territoryKey + '-AR.json'
    return pd.read_json(url)

# Populates codes dataframe
def get_codes(row, district, county, level):
    return  {
                'compensation':     row['compensation'],
                'name':             row['name'], 
                'territoryKey':     row['territoryKey'],
                'distrito':         district,
                'concelho':         county,
                'level':            level
            }

# Creates directories
def mkdir(lv2_codes):
    for row in lv2_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['distrito']
        county = row[1]['name']
        loc = tkey.split('-')[0].capitalize()

        path = '/datasets/' + loc + '/' + dist + '/' + county + '/'

        working_dir = os.getcwd()
        newpath = working_dir + path

        if not os.path.exists(newpath):
            os.makedirs(newpath)

In [71]:
# Drill down to districts
def drill_lv1(parent_lv0_codes):
    tkey = parent_lv0_codes['territoryKey'][0]
    districts = get_children(tkey)

    return pd.DataFrame(get_codes(districts, False, False, 'distrito'))

# Drill down to counties
def drill_lv2(parent_lv1_codes):
    county_codes = pd.DataFrame()


    for row in parent_lv1_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['name']
        county = False
        
        counties = get_children(tkey)
        codes = pd.DataFrame(get_codes(counties, dist, county, 'concelho'))
        county_codes = pd.concat([county_codes, codes])

    return county_codes.reset_index(drop=True)

# Drill down to parishes
def drill_lv3(parent_lv2_codes):
    parish_codes = pd.DataFrame()

    for row in parent_lv2_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['distrito']
        county = row[1]['name']
        loc = tkey.startswith('LOCAL')

        # Run the below code once to create the dataset folders
        mkdir(dist, county, tkey)
        
        parishes = get_children(tkey)
        codes = pd.DataFrame(get_codes(parishes, dist, county, 'freguesia'))
        parish_codes = pd.concat([parish_codes, codes])

    return parish_codes.reset_index(drop=True)

# Saves the results
def fetch_results(parent_lv3_codes):

    for row in parent_lv3_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['distrito']
        county = row[1]['concelho']
        parish = row[1]['name']
        
        result = pd.DataFrame(get_result(tkey))
        
        loc = tkey.split('-')[0].capitalize()
        path = 'datasets/' + loc + '/' + dist + '/' + county + '/'
                
        result.to_csv(path + parish + '.csv')

### Scraping National Territory

In [54]:
districts = drill_lv1(portugal)
districts

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Açores,LOCAL-400000,False,False,distrito
1,False,Aveiro,LOCAL-010000,False,False,distrito
2,False,Beja,LOCAL-020000,False,False,distrito
3,False,Braga,LOCAL-030000,False,False,distrito
4,False,Bragança,LOCAL-040000,False,False,distrito
5,False,Castelo Branco,LOCAL-050000,False,False,distrito
6,False,Coimbra,LOCAL-060000,False,False,distrito
7,False,Évora,LOCAL-070000,False,False,distrito
8,False,Faro,LOCAL-080000,False,False,distrito
9,False,Guarda,LOCAL-090000,False,False,distrito


In [55]:
counties = drill_lv2(districts)
counties

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Angra do Heroísmo,LOCAL-430100,Açores,False,concelho
1,False,Calheta,LOCAL-450100,Açores,False,concelho
2,False,Corvo,LOCAL-490100,Açores,False,concelho
3,False,Horta,LOCAL-470100,Açores,False,concelho
4,False,Lagoa,LOCAL-420100,Açores,False,concelho
...,...,...,...,...,...,...
303,False,Tarouca,LOCAL-182000,Viseu,False,concelho
304,False,Tondela,LOCAL-182100,Viseu,False,concelho
305,False,Vila Nova de Paiva,LOCAL-182200,Viseu,False,concelho
306,False,Viseu,LOCAL-182300,Viseu,False,concelho


In [56]:
mkdir(counties)

In [57]:
parishes = drill_lv3(counties)
parishes

TypeError: mkdir() takes 1 positional argument but 3 were given

In [None]:
parish_dict =   {
                    'Longueira/Almograve': 'Longueira, Almograve',
                    'Alverca da Beira/Bouça Cova': 'Alverca da Beira, Bouça Cova',
                    'Valbom/Bogalhal': 'Valbom, Bogalhal',
                    'Santa Cruz/Trindade e Sanjurge': 'Santa Cruz, Trindade e Sanjurge',
                    'Vila Cova do Covelo/Mareco': 'Vila Cova do Covelo, Mareco'
                }

parishes =  parishes.replace(parish_dict).reset_index(drop=True)

In [None]:
fetch_results(parishes)

OSError: Cannot save file into a non-existent directory: 'datasets\Local\Açores\Angra do Heroísmo'

### Scraping Europe and Out of Europe

In [75]:
circles = drill_lv1(foreign)
circles

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Europa,FOREIGN-800000,False,False,distrito
1,False,Fora da Europa,FOREIGN-900000,False,False,distrito


In [76]:
countries = drill_lv2(circles)
countries

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Alemanha,FOREIGN-810200,Europa,False,concelho
1,False,Bélgica,FOREIGN-810400,Europa,False,concelho
2,False,Espanha,FOREIGN-810800,Europa,False,concelho
3,False,França,FOREIGN-811000,Europa,False,concelho
4,False,Luxemburgo,FOREIGN-811700,Europa,False,concelho
5,False,Reino Unido da Grã-bretanha e Irlanda do Norte,FOREIGN-812000,Europa,False,concelho
6,False,Restantes Países da Europa,FOREIGN-819900,Europa,False,concelho
7,False,Suíça,FOREIGN-813000,Europa,False,concelho
8,False,Brasil,FOREIGN-920300,Fora da Europa,False,concelho
9,False,Canadá,FOREIGN-920400,Fora da Europa,False,concelho


In [77]:
mkdir(countries)

In [78]:
embassies = drill_lv3(countries)
embassies

TypeError: mkdir() takes 1 positional argument but 3 were given

In [None]:
fetch_results(embassies)