## Context
The previous dataset didn't provide the sufficient details for the feature.
This web scraper will create a dataset with the results for each party in each parish ('freguesia') in Portugal (inland and autonomous territories) as well as the votes from the Europe and Outside of Europe circles

### Running this Notebook
You can run all cells to get the full dataset, or execute group 1 or 2 in separate

- Group 1 - National Territory (3.000+ files, can take some minutes to run)
- Group 2 - Europe and Out of Europe

### Setup

In [47]:
#%pip install pandas
import pandas as pd
import os

In [48]:
# Base url
site = 'https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static'

# Local path
tchild = site +'/territory-children/territory-children-' # children territory codes url
tresult = site +'/territory-results/territory-results-' # voting results url

portugal =      {   # Portugal parent
                    'compensation': [False], 
                    'name': ['Portugal'], 
                    'territoryKey': ['LOCAL-500000'],
                    'distrito': False,
                    'concelho': False,
                    'level': ['pais']
}

foreign =       {   # Foreign territories parent
                    'compensation': [False], 
                    'name': ['Estrangeiro'], 
                    'territoryKey': ['FOREIGN-600000'],
                    'distrito': False,
                    'concelho': False,
                    'level': ['pais']
}

parish_dict =   {   # Necessary translation due to the "/" character in the names
                    'Longueira/Almograve': 'Longueira, Almograve',
                    'Alverca da Beira/Bouça Cova': 'Alverca da Beira, Bouça Cova',
                    'Valbom/Bogalhal': 'Valbom, Bogalhal',
                    'Santa Cruz/Trindade e Sanjurge': 'Santa Cruz, Trindade e Sanjurge',
                    'Vila Cova do Covelo/Mareco': 'Vila Cova do Covelo, Mareco'
}

codes = pd.DataFrame(portugal) # creates codes dataframe, and populates with highest-tier level

### Methods

In [49]:
# Saves children json from url          
def get_children(territoryKey):
    url = tchild + territoryKey + '.json'
    print(url)
    return pd.read_json(url)

# Saves result json from url
def get_result(territoryKey):
    url = tresult + territoryKey + '-AR.json'
    return pd.read_json(url)

# Populates codes dataframe
def get_codes(row, district, county, level):
    return  {
                'compensation':     row['compensation'],
                'name':             row['name'], 
                'territoryKey':     row['territoryKey'],
                'distrito':         district,
                'concelho':         county,
                'level':            level
            }

# Creates directories
def mkdir(lv2_codes):
    for row in lv2_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['distrito']
        county = row[1]['name']
        loc = tkey.split('-')[0].capitalize()

        path = '/datasets/' + loc + '/' + dist + '/' + county + '/'

        working_dir = os.getcwd()
        newpath = working_dir + path

        if not os.path.exists(newpath):
            os.makedirs(newpath)

In [50]:
# Drill down to districts
def drill_lv1(parent_lv0_codes):
    tkey = parent_lv0_codes['territoryKey'][0]
    districts = get_children(tkey)

    return pd.DataFrame(get_codes(districts, False, False, 'distrito'))

# Drill down to counties
def drill_lv2(parent_lv1_codes):
    county_codes = pd.DataFrame()


    for row in parent_lv1_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['name']
        county = False
        
        counties = get_children(tkey)
        codes = pd.DataFrame(get_codes(counties, dist, county, 'concelho'))
        county_codes = pd.concat([county_codes, codes])

    return county_codes.reset_index(drop=True)

# Drill down to parishes
def drill_lv3(parent_lv2_codes):
    parish_codes = pd.DataFrame()

    for row in parent_lv2_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['distrito']
        county = row[1]['name']
        
        parishes = get_children(tkey)
        codes = pd.DataFrame(get_codes(parishes, dist, county, 'freguesia'))
        parish_codes = pd.concat([parish_codes, codes])

    return parish_codes.reset_index(drop=True)

# Saves the results
def fetch_results(parent_lv3_codes): 

    for row in parent_lv3_codes.iterrows():
        tkey = row[1]['territoryKey']
        dist = row[1]['distrito']
        county = row[1]['concelho']
        parish = row[1]['name']
        
        result = pd.DataFrame(get_result(tkey))
        result.index.name = 'index'
        
        loc = tkey.split('-')[0].capitalize()
        path = 'datasets/' + loc + '/' + dist + '/' + county + '/'
                
        result.to_csv(path + parish + '.csv')

### Test

In [51]:
# df = pd.read_csv('datasets/territory_codes.csv')

# for row in df.iterrows():
#     tkey = row[1]['territoryKey']
#     dist = row[1]['distrito']
#     county = row[1]['concelho']
#     parish = row[1]['name']
    
#     result = pd.DataFrame(get_result(tkey))

    # loc = tkey.split('-')[0].capitalize()
    # path = 'datasets/' + loc + '/' + dist + '/' + county + '/'
    
    # result.to_csv(path + parish + '.csv', index=False)

### Scraping National Territory

In [52]:
districts = drill_lv1(portugal)
districts

https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-500000.json


Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Açores,LOCAL-400000,False,False,distrito
1,False,Aveiro,LOCAL-010000,False,False,distrito
2,False,Beja,LOCAL-020000,False,False,distrito
3,False,Braga,LOCAL-030000,False,False,distrito
4,False,Bragança,LOCAL-040000,False,False,distrito
5,False,Castelo Branco,LOCAL-050000,False,False,distrito
6,False,Coimbra,LOCAL-060000,False,False,distrito
7,False,Évora,LOCAL-070000,False,False,distrito
8,False,Faro,LOCAL-080000,False,False,distrito
9,False,Guarda,LOCAL-090000,False,False,distrito


In [53]:
counties = drill_lv2(districts)
counties

https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-400000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-010000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-020000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-030000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-040000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-050000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-060000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-070000.json
https://www.eleicoes.mai.gov.pt/legislativas2024

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Angra do Heroísmo,LOCAL-430100,Açores,False,concelho
1,False,Calheta,LOCAL-450100,Açores,False,concelho
2,False,Corvo,LOCAL-490100,Açores,False,concelho
3,False,Horta,LOCAL-470100,Açores,False,concelho
4,False,Lagoa,LOCAL-420100,Açores,False,concelho
...,...,...,...,...,...,...
303,False,Tarouca,LOCAL-182000,Viseu,False,concelho
304,False,Tondela,LOCAL-182100,Viseu,False,concelho
305,False,Vila Nova de Paiva,LOCAL-182200,Viseu,False,concelho
306,False,Viseu,LOCAL-182300,Viseu,False,concelho


In [54]:
mkdir(counties)

In [55]:
parishes = drill_lv3(counties)
parishes


https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-430100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-450100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-490100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-470100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-420100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-480100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-460100.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-LOCAL-460200.json
https://www.eleicoes.mai.gov.pt/legislativas2024

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Altares,LOCAL-430101,Açores,Angra do Heroísmo,freguesia
1,False,Angra (Nossa Senhora da Conceição),LOCAL-430102,Açores,Angra do Heroísmo,freguesia
2,False,Angra (Santa Luzia),LOCAL-430103,Açores,Angra do Heroísmo,freguesia
3,False,Angra (São Pedro),LOCAL-430104,Açores,Angra do Heroísmo,freguesia
4,False,Angra (Sé),LOCAL-430105,Açores,Angra do Heroísmo,freguesia
...,...,...,...,...,...,...
3087,False,Fornelo do Monte,LOCAL-182407,Viseu,Vouzela,freguesia
3088,False,Queirã,LOCAL-182409,Viseu,Vouzela,freguesia
3089,False,São Miguel do Mato,LOCAL-182410,Viseu,Vouzela,freguesia
3090,False,Ventosa,LOCAL-182411,Viseu,Vouzela,freguesia


In [56]:
parishes =  (
    parishes.replace(parish_dict)
    .reset_index(drop=True)
)

In [57]:
save_codes = (
    parishes[['territoryKey', 'name', 'concelho', 'distrito']]
    .to_csv('datasets/territory_codes_local.csv', index=False)
)

In [58]:
fetch_results(parishes)

### Scraping Europe and Out of Europe

In [59]:
circles = drill_lv1(foreign)
circles

https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-600000.json


Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Europa,FOREIGN-800000,False,False,distrito
1,False,Fora da Europa,FOREIGN-900000,False,False,distrito


In [60]:
countries = drill_lv2(circles)
countries

https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-800000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-900000.json


Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Alemanha,FOREIGN-810200,Europa,False,concelho
1,False,Bélgica,FOREIGN-810400,Europa,False,concelho
2,False,Espanha,FOREIGN-810800,Europa,False,concelho
3,False,França,FOREIGN-811000,Europa,False,concelho
4,False,Luxemburgo,FOREIGN-811700,Europa,False,concelho
5,False,Reino Unido da Grã-bretanha e Irlanda do Norte,FOREIGN-812000,Europa,False,concelho
6,False,Restantes Países da Europa,FOREIGN-819900,Europa,False,concelho
7,False,Suíça,FOREIGN-813000,Europa,False,concelho
8,False,Brasil,FOREIGN-920300,Fora da Europa,False,concelho
9,False,Canadá,FOREIGN-920400,Fora da Europa,False,concelho


In [61]:
#Creates directories
mkdir(countries)

In [62]:
embassies = drill_lv3(countries)
embassies

https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-810200.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-810400.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-810800.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-811000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-811700.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-812000.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-819900.json
https://www.eleicoes.mai.gov.pt/legislativas2024/assets/static/territory-children/territory-children-FOREIGN-813000.json
https://www.eleicoes.mai.gov.pt/

Unnamed: 0,compensation,name,territoryKey,distrito,concelho,level
0,False,Postos Consulares da Alemanha,FOREIGN-810299,Europa,Alemanha,freguesia
1,False,Bruxelas,FOREIGN-810401,Europa,Bélgica,freguesia
2,False,Postos Consulares da Espanha,FOREIGN-810899,Europa,Espanha,freguesia
3,False,Bordéus,FOREIGN-811002,Europa,França,freguesia
4,False,Estrasburgo,FOREIGN-811004,Europa,França,freguesia
5,False,Lyon,FOREIGN-811006,Europa,França,freguesia
6,False,Marselha,FOREIGN-811007,Europa,França,freguesia
7,False,Paris,FOREIGN-811012,Europa,França,freguesia
8,False,Toulouse,FOREIGN-811015,Europa,França,freguesia
9,False,Luxemburgo,FOREIGN-811701,Europa,Luxemburgo,freguesia


In [63]:
save_codes = (
    embassies[['territoryKey', 'name', 'concelho', 'distrito']]
    .to_csv('datasets/territory_codes_foreign.csv', index=False)
)

In [64]:
fetch_results(embassies)