In [1]:
import pandas as pd
import numpy as np
import os

import psycopg2

In [2]:
file = open("db_login.txt", 'r')
logins = file.readlines()

for i, line in enumerate(logins):
    logins[i] = line.split('\n')[0]
    
file.close()

connect_params = {
     "host"      : logins[0],
     "database"  : logins[1],
     "user"      : logins[2],
     "password"  : logins[3]
}

In [3]:
def connect(conn_params):
    conn = None
    
    try:
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**conn_params)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1)
        
    print("Connection successful")
    
    return conn

In [4]:
conn = connect(connect_params)

Connecting to the PostgreSQL database...
Connection successful


In [5]:
def postgresql_to_dataframe(conn, select_query, column_headers):
    cursor = conn.cursor()
    
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    tuples = cursor.fetchall()
    cursor.close()
    
    df = pd.DataFrame(tuples, columns=column_headers)
    return df

## Candidates Database

#### Congressional Candidates

In [6]:
# All Congressional Candidates
query1 = """
SELECT cycle, cid, first_last_party, party, dist_id_run_for, current_candidate, cycle_candidate, recip_code,
raised_from_pacs, raised_from_individuals, raised_total, raised_unitemized
FROM candidates
WHERE dist_id_run_for NOT LIKE 'PRES' AND dist_id_run_for NOT LIKE '__S_'
"""

In [7]:
col_names_1 = ['cycle', 'id', 'candidate_name', 'party', 'district', 'current_candidate', 'cycle_candidate',
               'recip_code', 'raised_from_pacs', 'raised_from_individuals', 'raised_total', 'raised_unitemized']

In [8]:
all_congr_candids = postgresql_to_dataframe(conn, query1, col_names_1)
all_congr_candids

Unnamed: 0,cycle,id,candidate_name,party,district,current_candidate,cycle_candidate,recip_code,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
0,2000,N00005009,Richmond A Soluade Sr (R),R,MO01,,Y,RL,,,0,
1,1996,N00004126,Mark Alan Behnke (R),R,MI07,,,RN,,,0,0.0
2,1996,N00000718,Jim Ford (R),R,NJ10,,,RN,,,0,0.0
3,1996,N00005551,Ernest J Istook (R),R,OK05,Y,Y,RW,130384.0,99600.0,399980,169996.0
4,1996,N00008523,Stephen Wayne Hofman (R),R,MI16,,Y,RL,,5576.0,18050,12474.0
...,...,...,...,...,...,...,...,...,...,...,...,...
51143,2016,N00033316,Joaquin Castro (D),D,TX20,Y,Y,DI,211035.0,188278.0,431521,32208.0
51144,2016,N00025284,Raul M Grijalva (D),D,AZ03,Y,Y,DI,102018.0,41449.0,184909,41442.0
51145,2016,N00034130,Randal Wallace (R),R,SC07,,,RN,1000.0,4800.0,7575,1775.0
51146,2016,N00013846,Jeff Miller (R),R,FL01,Y,Y,RI,319963.0,403622.0,730935,7350.0


In [9]:
all_congr_candids.to_csv('../data/all_congr_candids.csv')

#### Senate Candidates

In [10]:
# All Senate Candidates
query2 = """
SELECT cycle, cid, first_last_party, party, dist_id_run_for, current_candidate, cycle_candidate, recip_code,
raised_from_pacs, raised_from_individuals, raised_total, raised_unitemized
FROM candidates
WHERE dist_id_run_for NOT LIKE 'PRES' AND dist_id_run_for LIKE '__S_'
"""

In [11]:
col_names_2 = ['cycle', 'id', 'candidate_name', 'party', 'state', 'current_candidate', 'cycle_candidate',
               'recip_code', 'raised_from_pacs', 'raised_from_individuals', 'raised_total', 'raised_unitemized']

In [12]:
all_sen_candids = postgresql_to_dataframe(conn, query2, col_names_2)
all_sen_candids

Unnamed: 0,cycle,id,candidate_name,party,state,current_candidate,cycle_candidate,recip_code,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
0,1996,N00004451,Stephen Bonsal Young (R),R,MNS1,,Y,RL,492.0,97408.0,264254,166354.0
1,1996,N00001670,Raymond J Clatworthy (R),R,DES2,Y,Y,RL,202815.0,618935.0,1636414,814664.0
2,1996,N00000525,Gavin Terence Mills (I),I,VTS1,,,3L,,266.0,266,0.0
3,1996,N00006284,James A McClure (R),R,IDS1,,,RN,,,0,0.0
4,1996,N00000421,Dick Swett (D),D,NHS2,Y,Y,DL,528713.0,1161345.0,3020794,1330736.0
...,...,...,...,...,...,...,...,...,...,...,...,...
10728,1996,N00005453,Mark Brown (D),D,ARS1,,,DL,,720.0,720,
10729,2016,N00037568,Sean Guthrie (I),I,FLS2,Y,Y,3O,,101.0,101,
10730,2016,N00013820,Chris Van Hollen (D),D,MDS2,,,DO,209184.0,4032126.0,4643648,402338.0
10731,2016,N00007836,Maria Cantwell (D),D,WAS1,,,DI,-1750.0,122060.0,302913,182603.0


In [13]:
all_sen_candids.to_csv('../data/all_sen_candids.csv')

## Individual Donations Database

In [44]:
cycle = 1990

#### Senate Campaigns

In [45]:
# All Senate Individual Donations
query3 = """
SELECT DISTINCT(i.fec_trans_id), i.cycle, i.date_donated, i.recipient_id, i.contributor_id, i.contributor_name,
i.org_name, i.ult_org, i.amount, i.city, i.state, i.recip_code, i.type, i.gender, i.occupation, i.employer
FROM individual_contributions AS i
INNER JOIN candidates AS c on i.recipient_id = c.cid
WHERE i.cycle="""+str(cycle)+""" AND i.recipient_id LIKE 'N%' AND
c.dist_id_run_for NOT LIKE 'PRES' AND c.dist_id_run_for LIKE '__S_'
"""

In [46]:
col_names_3 = ['transaction_id', 'cycle', 'date', 'recipient_id', 'contributor_id', 'contributor_name',
               'org_name', 'ult_org', 'amount', 'city', 'state', 'recip_code', 'type',
               'gender', 'occupation', 'employer']

In [47]:
df1 = postgresql_to_dataframe(conn, query3, col_names_3)
df1

Unnamed: 0,transaction_id,cycle,date,recipient_id,contributor_id,contributor_name,org_name,ult_org,amount,city,state,recip_code,type,gender,occupation,employer
0,0000103,1990,1989-01-08,N00003072,a0000000103,"WILLIAMSON, J H",Independent Oil Dealer,,500.0,ANNISTON,AL,DW,15,I,,
1,0000114,1990,1989-01-07,N00003072,a00000886641,"BUTTRAM, H DEAN JR",Attorney,,1000.0,CENTRE,AL,DW,15,M,,
2,0000132,1990,1989-01-03,N00003072,a0000000132,"TREDAWAY, FLOYD P",Retired,,500.0,JACKSONVILLE,AL,DW,15,M,,
3,0000133,1990,1989-01-08,N00003072,a00000001331,"KLIMASEWSKI, LINDA",Ft McClellan School,,250.0,JACKSONVILLE,AL,DW,15,F,,
4,0000135,1990,1989-01-08,N00003072,a00000001331,"KLIMASEWSKI, LINDA",Ft McClellan School,,250.0,JACKSONVILLE,AL,DW,15,F,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176749,0777663,1990,1990-05-15,N00001871,,,[Candidate Contribution],,707.0,,,DI,15C,,,
176750,0777665,1990,1989-12-18,N00001871,,,[Candidate Contribution],,874.0,,,DI,15C,,,
176751,0777666,1990,1990-04-03,N00001871,,,[Candidate Contribution],,12932.0,,,DI,15C,,,
176752,0777667,1990,1990-05-15,N00001871,,,[Candidate Contribution],,707.0,,,DI,15C,,,


In [48]:
df1.to_csv('../data/individual_contributions/senate/sen_'+str(cycle)+'_individ_donations.csv')

#### Congressional Campaigns

In [49]:
# All Congressional Individual Donations
query4 = """
SELECT DISTINCT(i.fec_trans_id), i.cycle, i.date_donated, i.recipient_id, i.contributor_id, i.contributor_name, i.org_name,
i.ult_org, i.amount, i.city, i.state, i.recip_code, i.type, i.gender, i.occupation, i.employer
FROM individual_contributions AS i
INNER JOIN candidates AS c on i.recipient_id = c.cid
WHERE i.cycle="""+str(cycle)+""" AND i.recipient_id LIKE 'N%' AND
c.dist_id_run_for NOT LIKE 'PRES' AND c.dist_id_run_for NOT LIKE '__S_'
"""

In [50]:
df2 = postgresql_to_dataframe(conn, query4, col_names_3)
df2

Unnamed: 0,transaction_id,cycle,date,recipient_id,contributor_id,contributor_name,org_name,ult_org,amount,city,state,recip_code,type,gender,occupation,employer
0,0000099,1990,1989-02-06,N00010104,,,[Candidate Contribution],,699.0,,,DN,15C,,,
1,0000100,1990,1989-01-23,N00003644,,,[Candidate Contribution],,560.0,,,RN,15C,,,
2,0000101,1990,1989-01-27,N00012272,a0000174711,"BROWN, DICK",Consultant,,300.0,AUSTIN,TX,RN,15,M,,
3,0000102,1990,1989-01-30,N00012272,,,[Candidate Contribution],,1518.0,,,RN,15C,,,
4,0000103,1990,1989-01-08,N00003072,a0000000103,"WILLIAMSON, J H",Independent Oil Dealer,,500.0,ANNISTON,AL,DW,15,I,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168083,0777711,1990,1989-06-23,N00005998,a0001130787,"NEAL, LOYD",Hrh Insurance,,1000.0,CORPUS CHRISTI,TX,DW,15,M,,
168084,0777712,1990,1989-06-20,N00005998,a0000933875,"REYES, JOSEPH",Reyes & Assoc,,500.0,POTOMAC,MD,DW,15,M,,
168085,0777713,1990,1989-06-28,N00005998,a0000961572,"STORM, JAMES C",Oil Drilling,,1000.0,CORPUS CHRISTI,TX,DW,15,M,,
168086,0777714,1990,1989-06-28,N00005998,a0001156677,"STORM, RALPH",Drilling,,300.0,CORPUS CHRISTI,TX,DW,15,M,,


In [51]:
df2.to_csv('../data/individual_contributions/house/congr_'+str(cycle)+'_individ_donations.csv')

In [53]:
conn.close()