# Import & Load Election results data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, inline_dimension_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [4]:
download_zip_file(remote_path,
                  local_path,
                  "politics3.zip")

extract_zip_file(local_path, "temp.zip")

Downloading... http://pacha.datawheel.us/politics3.zip
Unzipping... ../data/temp.zip


True

In [5]:
m2016 = download_file(remote_path,local_path,'m2016-procesado.csv')

s2009 = download_file(remote_path,local_path,'s2009-procesado.csv')
s2013 = download_file(remote_path,local_path,'s2013-procesado.csv')
s2017 = download_file(remote_path,local_path,'s2017-procesado.csv')

d2013 = download_file(remote_path,local_path,'d2013-procesado.csv')
d2017 = download_file(remote_path,local_path,'d2017-procesado.csv')

p20131 = download_file(remote_path,local_path,'p20131-procesado.csv')
p20132 = download_file(remote_path,local_path,'p20132-procesado.csv')
p20171 = download_file(remote_path,local_path,'p20171-procesado.csv')
p20172 = download_file(remote_path,local_path,'p20172-procesado.csv')


Already downloaded. Using: ../data/m2016-procesado.csv
Already downloaded. Using: ../data/s2009-procesado.csv
Already downloaded. Using: ../data/s2013-procesado.csv
Already downloaded. Using: ../data/s2017-procesado.csv
Already downloaded. Using: ../data/d2013-procesado.csv
Already downloaded. Using: ../data/d2017-procesado.csv
Already downloaded. Using: ../data/p20131-procesado.csv
Already downloaded. Using: ../data/p20132-procesado.csv
Already downloaded. Using: ../data/p20171-procesado.csv
Already downloaded. Using: ../data/p20172-procesado.csv


In [6]:
#Parlamentarias
dfParla = pd.concat([s2009,s2013,s2017,d2013,d2017], axis=0, ignore_index=True)
dfParla = dfParla[['year','election_id','comuna_datachile_id','candidato_id','partido_id','votos_candidato','electo']]
dfParla = dfParla.rename(columns={'votos_candidato':'votes','candidato_id':'candidate_id','partido_id':'party_id','circ_senatorial':'circunscription_id','electo':'elected'})
list(dfParla)

['year',
 'election_id',
 'comuna_datachile_id',
 'candidate_id',
 'party_id',
 'votes',
 'elected']

In [7]:
#presidencial
dfPresi = pd.concat([p20131,p20132,p20171,p20172], axis=0, ignore_index=True)

#dfPresi2013 = dfPresi2013.rename(columns={'votos':'votes','candidato_id':'candidate_id','partido_id':'party_id','electo':'elected','lugar':'place'})
#dfPresi2017 = dfPresi2017.rename(columns={'votos':'votes','candidato_id':'candidate_id','partido_id':'party_id','electo':'elected','lugar':'place'})
dfPresi = dfPresi.rename(columns={'votos_candidato':'votes','candidato_id':'candidate_id','partido_id':'party_id','electo':'elected','lugar':'place'})
list(dfPresi)

['candidate_id',
 'comuna_datachile_id',
 'election_id',
 'elected',
 'party_id',
 'votes',
 'year']

In [8]:
#comunal
dfMuni2016 = m2016.rename(columns={'votos_candidato':'votes','candidato_id':'candidate_id','partido_id':'party_id','electo':'elected','lugar':'place'})
list(dfMuni2016)

['comuna_datachile_id',
 'candidate_id',
 'party_id',
 'votes',
 'elected',
 'year',
 'election_id']

In [9]:
#all together
df = pd.concat([dfParla,dfMuni2016,dfPresi], axis=0, ignore_index=True)

#df[['elected']] = df[['elected']].fillna(value=0)
#df[['place']] = df[['place']].fillna(value=-1)
#df[['circunscription_id']] = df[['circunscription_id']].fillna(value=-1)

df = df.astype({'election_id':'int','votes':'int','year':'int','elected':'int','comuna_datachile_id':'int','party_id':'int','candidate_id':'int'})
list(df)

['candidate_id',
 'comuna_datachile_id',
 'elected',
 'election_id',
 'party_id',
 'votes',
 'year']

### Ingest

In [10]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'politics', 'fact_election_results_update')

DROP TABLE IF EXISTS politics.fact_election_results_update;
CREATE TABLE "politics"."fact_election_results_update" (
"candidate_id" INTEGER,
  "comuna_datachile_id" INTEGER,
  "elected" INTEGER,
  "election_id" INTEGER,
  "party_id" INTEGER,
  "votes" INTEGER,
  "year" INTEGER
)
COPY "politics"."fact_election_results_update" ("candidate_id","comuna_datachile_id","elected","election_id","party_id","votes","year") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Update empty values

In [11]:
#engine.execute("""
#UPDATE politics.fact_election_results_update SET place = NULL where place = -1;
#""")

### Indexes & FK

In [12]:
engine.execute("""
CREATE INDEX fact_election_results_update_comuna_datachile_id 
ON politics.fact_election_results_update (comuna_datachile_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x11ae647f0>