### Setup

In [3]:
import pandas as pd
import ast
import os

In [4]:
local = pd.read_csv('datasets/crawler/local_codes.csv')
foreign = pd.read_csv('datasets/crawler/foreign_codes.csv')
codes = (
    pd.concat([local, foreign])
    .rename(columns={'name': 'freguesia'})
    .reset_index(drop=True)
)

### Methods

In [64]:
# Returns resuts table df
def getResultsTable(file):
    resultsParty = file.loc['resultsParty', 'currentResults']

    results = (
        pd.DataFrame(ast.literal_eval(resultsParty))
        .drop(['absoluteMajority', 'constituenctyCounter', 'imageKey', 'mandates', 'presidents'], axis=1)
        .rename(columns={'acronym': 'party'})
        .set_index('party')
    )

    return results

# Returns metadata df
def getResults(file, row):
    md = pd.Series(file['currentResults'])
    md.index.name = None
    md = (
        md.drop(['availableMandates', 'compensation', 'displayMessage', 'hasNoVoting', 'resultsParty'
            , 'tie', 'tieMessage', 'totalBoycotts', 'totalForeignBoycotts', 'totalLocalBoycotts'
            , 'totalMandates', 'totalParishesApproved'])
        .to_frame()
        .T
        .assign(territoryKey = row['territoryKey'])
        .set_index('territoryKey')
    )
    
    loc = (
        pd.DataFrame(
            [row['parish'], row['county'], row['district'], row['territoryKey']], index=['parish', 'county', 'district', 'territoryKey'])
        .T
        .set_index('territoryKey')
    )
    
    r = getResultsTable(file)
    r['territoryKey'], r['parish'] = row['territoryKey'], row['parish']
    r = r.reset_index().set_index(['territoryKey', 'party'])

    return r, md, loc

# Creates directories
def mkdir():
        path = '/datasets/results/' 

        working_dir = os.getcwd()
        newpath = working_dir + path

        if not os.path.exists(newpath):
            os.makedirs(newpath)

#### Teste

In [63]:
# row = codes.iloc[0]
# results = pd.DataFrame()
# metadata = pd.DataFrame()
# location = pd.DataFrame()

# loc = row['territoryKey'].split('-')[0].capitalize()
# file = (
#     pd.read_csv('datasets/crawler/' + loc + '/' + row['district'] + '/' + row['county'] + '/' + row['parish'] + '.csv')
#     .set_index('index')
# )

# md = pd.Series(file['currentResults'])
# md.index.name = None
# md = (
#     md.drop(['availableMandates', 'compensation', 'displayMessage', 'hasNoVoting', 'resultsParty'
#         , 'tie', 'tieMessage', 'totalBoycotts', 'totalForeignBoycotts', 'totalLocalBoycotts'
#         , 'totalMandates', 'totalParishesApproved'])
#     .to_frame()
#     .T
#     .assign(territoryKey = row['territoryKey'])
#     .set_index('territoryKey')
# )

# md


Unnamed: 0,blankVotes,blankVotesPercentage,nullVotes,nullVotesPercentage,numberParishes,numberVoters,percentageVoters,subscribedVoters,totalVoters
currentResults,13,2.8200000000000003,3,0.65,1,461,54.36,848,461


### Create Results files

In [65]:
results = pd.DataFrame()
metadata = pd.DataFrame()
location = pd.DataFrame()

for i, row in codes.iterrows():
    file = (
        pd.read_csv('datasets/crawler/' + row['territoryKey'].split('-')[0].capitalize() + '/' + row['district'] + '/' + row['county'] + '/' + row['parish'] + '.csv')
        .set_index('index')
    )
    
    r, md, loc = getResults(file, row)

    results = pd.concat([results, r])    
    metadata = pd.concat([metadata, md])
    location = pd.concat([location, loc])

In [66]:
location

Unnamed: 0_level_0,parish,county,district
territoryKey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LOCAL-430101,Altares,Angra do Heroísmo,Açores
LOCAL-430102,Angra (Nossa Senhora da Conceição),Angra do Heroísmo,Açores
LOCAL-430103,Angra (Santa Luzia),Angra do Heroísmo,Açores
LOCAL-430104,Angra (São Pedro),Angra do Heroísmo,Açores
LOCAL-430105,Angra (Sé),Angra do Heroísmo,Açores
...,...,...,...
FOREIGN-930299,Postos Consulares da China,China,Fora da Europa
FOREIGN-920799,Postos Consulares dos Estados Unidos da América,Estados Unidos da América,Fora da Europa
FOREIGN-919999,Postos Consulares dos países de África,Países de África,Fora da Europa
FOREIGN-929999,Restantes Postos Consulares Dos Países da América,Restantes Países da América,Fora da Europa


In [67]:
metadata

Unnamed: 0_level_0,blankVotes,blankVotesPercentage,nullVotes,nullVotesPercentage,numberParishes,numberVoters,percentageVoters,subscribedVoters,totalVoters
territoryKey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
LOCAL-430101,13,2.8200000000000003,3,0.65,1,461,54.36,848,461
LOCAL-430102,31,1.8,19,1.1,1,1720,46.24,3720,1720
LOCAL-430103,33,2.73,14,1.16,1,1210,52.95,2285,1210
LOCAL-430104,35,1.85,10,0.53,1,1887,56.96,3313,1887
LOCAL-430105,7,1.1400000000000001,9,1.46,1,615,55.91,1100,615
...,...,...,...,...,...,...,...,...,...
FOREIGN-930299,128,2.08,1937,31.46,1,6157,12.57,48982,6157
FOREIGN-920799,19,0.17,5592,51.26,1,10910,16.72,65242,10910
FOREIGN-919999,28,0.8,562,16.14,1,3483,5.48,63584,3483
FOREIGN-929999,92,2.68,1068,31.08,1,3436,5.47,62856,3436


In [68]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,percentage,validVotesPercentage,votes,parish
territoryKey,party,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LOCAL-430101,PPD/PSD.CDS-PP.PPM,43.17,44.72,199,Altares
LOCAL-430101,PS,34.27,35.51,158,Altares
LOCAL-430101,CH,12.15,12.58,56,Altares
LOCAL-430101,B.E.,2.17,2.25,10,Altares
LOCAL-430101,IL,1.95,2.02,9,Altares
...,...,...,...,...,...
FOREIGN-939999,JPP,0.22,0.33,13,Restantes Postos Consulares Dos Países da Ásia...
FOREIGN-939999,MPT.A,0.17,0.26,10,Restantes Postos Consulares Dos Países da Ásia...
FOREIGN-939999,ND,0.13,0.21,8,Restantes Postos Consulares Dos Países da Ásia...
FOREIGN-939999,R.I.R.,0.12,0.18,7,Restantes Postos Consulares Dos Países da Ásia...


In [69]:
mkdir()

results.to_csv('datasets/results/results.csv')
metadata.to_csv('datasets/results/metadata.csv')
location.to_csv('datasets/results/location.csv')

### Explore Data

In [30]:
results = pd.read_csv('datasets/results/results.csv')
metadata = pd.read_csv('datasets/results/metadata.csv')

parties = results['party'].unique()
results = results.set_index(['parish', 'party'])

idx = pd.IndexSlice

In [31]:
results.loc['Angra (Sé)', 'PS']

  results.loc['Angra (Sé)', 'PS']


Unnamed: 0_level_0,Unnamed: 1_level_0,tkey,percentage,validVotesPercentage,votes
parish,party,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Angra (Sé),PS,LOCAL-430105,24.88,25.54,153


In [32]:
# Results by Party
# for i, party in parties:
#     results.loc[idx[:, party], idx[:]]

## Database

In [33]:
# results
# metadata


In [34]:
# # Send to SQL
# import mysql.connector
# from dotenv import load_dotenv

# load_dotenv()
# user = os.getenv('USER')
# password = os.getenv('PW')

# cnx = mysql.connector.connect(
#     user=user,
#     password=password,
#     host="localhost",
#     database="legislativas"
# )


