### Setup

In [11]:
import pandas as pd
import ast
import os

In [None]:
local = pd.read_csv('datasets/crawler/local_codes.csv')
foreign = pd.read_csv('datasets/crawler/foreign_codes.csv')
codes = (
    pd.concat([local, foreign])
    .rename(columns={'name': 'freguesia'})
    .reset_index(drop=True)
)

### Methods

In [None]:
# Returns resuts table df
def getResultsTable(file):
    resultsParty = file.loc['resultsParty', 'currentResults']

    results = (
        pd.DataFrame(ast.literal_eval(resultsParty))
        .drop(['absoluteMajority', 'constituenctyCounter', 'imageKey', 'mandates', 'presidents'], axis=1)
        .rename(columns={'acronym': 'party'})
        .set_index('party')
    )

    return results

# Returns metadata df
def getResults(file, row):
    md = pd.Series(file['currentResults'])
    md.index.name = None
    md = (
        md.drop(
            ['availableMandates', 'compensation', 'displayMessage', 'hasNoVoting', 'resultsParty'
            , 'tie', 'tieMessage', 'totalBoycotts', 'totalForeignBoycotts', 'totalLocalBoycotts'
            , 'totalMandates', 'totalParishesApproved'])
        .to_frame()
        .T
        .assign(territoryKey = row['territoryKey'])
        .set_index('territoryKey')
    )
    
    loc = (
        pd.DataFrame(
            [row['parish'], row['county'], row['district'], row['territoryKey']]
            , index=['parish', 'county', 'district', 'territoryKey'])
        .T
        .set_index('territoryKey')
    )
    
    r = getResultsTable(file)
    r['territoryKey'] = row['territoryKey']
    r = r.reset_index().set_index(['territoryKey', 'party'])

    return r, md, loc

# Creates directories
def mkdir():
        path = '/datasets/results/' 

        working_dir = os.getcwd()
        newpath = working_dir + path

        if not os.path.exists(newpath):
            os.makedirs(newpath)

In [12]:
def load_files():
    results = pd.read_csv('datasets/results/results.csv').set_index(['territoryKey', 'party'])
    metadata = pd.read_csv('datasets/results/metadata.csv').set_index('territoryKey')
    location = pd.read_csv('datasets/results/location.csv').set_index('territoryKey')

    return results, metadata, location

#### Teste

In [None]:
# row = codes.iloc[0]
# results = pd.DataFrame()
# metadata = pd.DataFrame()
# location = pd.DataFrame()

# loc = row['territoryKey'].split('-')[0].capitalize()
# file = (
#     pd.read_csv('datasets/crawler/' + loc + '/' + row['district'] + '/' + row['county'] + '/' + row['parish'] + '.csv')
#     .set_index('index')
# )

# md = pd.Series(file['currentResults'])
# md.index.name = None
# md = (
#     md.drop(['availableMandates', 'compensation', 'displayMessage', 'hasNoVoting', 'resultsParty'
#         , 'tie', 'tieMessage', 'totalBoycotts', 'totalForeignBoycotts', 'totalLocalBoycotts'
#         , 'totalMandates', 'totalParishesApproved'])
#     .to_frame()
#     .T
#     .assign(territoryKey = row['territoryKey'])
#     .set_index('territoryKey')
# )

# md


### Create Results files

In [2]:
results = pd.DataFrame()
metadata = pd.DataFrame()
location = pd.DataFrame()

for i, row in codes.iterrows():
    file = (
        pd.read_csv('datasets/crawler/' + row['territoryKey'].split('-')[0].capitalize() + '/' + row['district'] + '/' + row['county'] + '/' + row['parish'] + '.csv')
        .set_index('index')
    )
    
    r, md, loc = getResults(file, row)

    results = pd.concat([results, r])    
    metadata = pd.concat([metadata, md])
    location = pd.concat([location, loc])

NameError: name 'pd' is not defined

In [None]:
mkdir()

results.to_csv('datasets/results/results.csv')
metadata.to_csv('datasets/results/metadata.csv')
location.to_csv('datasets/results/location.csv')

### Explore Data

In [30]:
results, metadata, location = load_files()

parties = results.index.unique(level='party')

idx = pd.IndexSlice

#### Exploration

In [33]:
#results
# metadata
# location
#parties

array(['PPD/PSD.CDS-PP.PPM', 'PS', 'CH', 'B.E.', 'IL', 'ADN', 'L', 'PAN',
       'PCP-PEV', 'VP', 'E', 'JPP', 'R.I.R.', 'ND', 'PCTP/MRPP', 'MPT.A',
       'NC', 'PTP', 'PPD/PSD.CDS-PP', 'PPM'], dtype=object)

In [34]:
results.loc['Angra (Sé)', 'PS']

  results.loc['Angra (Sé)', 'PS']


Unnamed: 0_level_0,Unnamed: 1_level_0,territoryKey,percentage,validVotesPercentage,votes
parish,party,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Angra (Sé),PS,LOCAL-430105,24.88,25.54,153


In [None]:
# Results by Party
# for i, party in parties:
#     results.loc[idx[:, party], idx[:]]

## Database

In [13]:
# Send to SQL
from dotenv import load_dotenv
from sqlalchemy import (create_engine, Table, Column, Integer, String, MetaData)

In [14]:
load_dotenv()
user = os.getenv('USER')
password = os.getenv('PW')

results, metadata, location = load_files()

con = (
    create_engine('mysql+pymysql://' + user + ':' + password + '@localhost/legislativas', pool_recycle=3600)
    .connect()
)

### Old

In [9]:
# from sqlalchemy.sql import text

# con.execute(text('DROP TABLE IF EXISTS results, metadata, location;'))
# con.execute(text('''CREATE TABLE results(
#                                 territoryKey VARCHAR(16) PRIMARY KEY, 
#                                 party TEXT, 
#                                 percentage INTEGER,
#                                 validVotesPercentage INTEGER,
#                                 votes INTEGER
# )'''))
# con.execute(text('''CREATE TABLE metadata(
#                                 territoryKey VARCHAR(16) PRIMARY KEY, 
#                                 blankVotes INTEGER, 
#                                 blankVotesPercentage INTEGER,
#                                 nullVotes INTEGER,
#                                 nullVotesPercentage INTEGER,
#                                 numberParishes INTEGER,
#                                 numberVoters INTEGER,
#                                 percentageVoters INTEGER,
#                                 subscribedVoters INTEGER,
#                                 totalVoters INTEGER
# )'''))
# con.execute(text('''CREATE TABLE location(
#                                 territoryKey VARCHAR(16) PRIMARY KEY, 
#                                 parish TEXT, 
#                                 county TEXT,
#                                 district TEXT
# )'''))

<sqlalchemy.engine.cursor.CursorResult at 0x1fdf4428280>

### Table creation

In [15]:
meta = MetaData()
resultsTable = Table('results', meta,
    Column('territoryKey', String(16), primary_key=True),
    Column('party', String(16)),
    Column('percentage', Integer),
    Column('validVotesPercentage', Integer),
    Column('votes', Integer)
)
metadataTable = Table('metadata', meta,
    Column('territoryKey', String(16), primary_key=True),
    Column('blankVotes', Integer),
    Column('blankVotesPercentage', Integer),
    Column('nullVotes', Integer),
    Column('nullVotesPercentage', Integer),
    Column('numberParishes', Integer),
    Column('numberVoters', Integer),
    Column('percentageVoters', Integer),
    Column('subscribedVoters', Integer),
    Column('totalVoters', Integer)
)
locationTable = Table('location', meta,
    Column('territoryKey', String(16), primary_key=True),
    Column('parish', String),
    Column('county', String),
    Column('district', String)
)

In [18]:
resultsTable

Table('results', MetaData(), Column('territoryKey', String(length=16), table=<results>, primary_key=True, nullable=False), Column('party', String(length=16), table=<results>), Column('percentage', Integer(), table=<results>), Column('validVotesPercentage', Integer(), table=<results>), Column('votes', Integer(), table=<results>), schema=None)