In [1]:
# Import dependencies
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import json
import pandas as pd
from pandas.io import sql
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
import numpy
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from consts import *

In [2]:
# Connecting to Postgres instance
engine = create_engine(CREATE_ENGINE_STR)

In [3]:
# Printing info for table names 
print (engine.table_names())

['committees', 'candidates', 'education', 'committee_summary_2020', 'donations', 'six_state_donations', 'agg_county_votes', 'agg_county_donors', 'fec_donor_az', 'health_metrics', 'birth_death_rate', 'postal_codes', 'fec_donor_mi', 'fec_donor_wi', 'fec_committee', 'fec_donor_pa', 'pres_votes_6t', 'unemployment', 'fec_donor_nc', 'fec_donor_fl']


In [4]:
#Get all donation records for a single state and return it in a dataframe
def donor_states_query(table_name):
    #Run queries to get all donation records from the states into dfs
    donor_select_sql = 'select * from {}'.format(table_name)
    donor_df = pd.read_sql_query(donor_select_sql,con=engine)
    return donor_df

In [None]:
#Get all donation records for a single state and return it in a dataframe
def donor_state_query(table_name, state):
    #Run queries to get all donation records from the states into dfs
    select_sql = f"select * from {table_name} where \"STATE\"= '{state.upper()}';"
    donor_df = pd.read_sql_query(select_sql, con=engine)
    return donor_df

In [5]:
def main():
    print("Main 1")
    #donor_df = donor_states_query("six_state_donations")
    print("Main 2")
    donor_df["trans_dt"] = pd.to_datetime(donor_df['TRANSACTION_DT'], errors='coerce')
    print("Main 3")
    donor_df["trans_date_str"] = donor_df["trans_dt"].dt.strftime('%m%d%Y')
    print("Main 4")
    donor_df["trans_date"] = pd.to_datetime(donor_df["trans_date_str"])
    print("Main 5")
    return donor_df

In [6]:
donor_df = main()

Main 1
Main 2
Main 3
Main 4
Main 5


In [7]:
donor_df.head()

Unnamed: 0,CMTE_ID,AMNDT_IND,RPT_TP,TRANSACTION_PGI,IMAGE_NUM,TRANSACTION_TP,ENTITY_TP,NAME,CITY,STATE,...,TRANSACTION_AMT,OTHER_ID,TRAN_ID,FILE_NUM,MEMO_CD,MEMO_TEXT,SUB_ID,trans_dt,trans_date_str,trans_date
0,C00577130,A,YE,P2016,201606159017971998,15,IND,"BUCKLEY, MATT",SOUDERTON,PA,...,15,,VPF7BGDBP39,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299555548,NaT,,NaT
1,C00577130,A,YE,P2016,201606159017998100,15E,IND,"HALL, ALICE",DURHAM,NC,...,250,C00401224,VPF7BGNSF64,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299633853,NaT,,NaT
2,C00577130,A,YE,P2016,201606159017971847,15,IND,"STANFORD, KENT",BARABOO,WI,...,35,,VPF7BGD85J6,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299555094,NaT,,NaT
3,C00577130,A,YE,P2016,201606159017966546,15,IND,"MONKS, DENNIS",PITTSBURGH,PA,...,30,,VPF7BGCKFN6,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299539190,NaT,,NaT
4,C00577130,A,YE,P2016,201606159017971825,15,IND,"SHEPELAVY, DAN",PHILADELPHIA,PA,...,20,,VPF7BGD5Q68,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299555028,NaT,,NaT


In [8]:
donor_df['year'] = pd.DatetimeIndex(donor_df['trans_date']).year

In [9]:
donor_df.head()

Unnamed: 0,CMTE_ID,AMNDT_IND,RPT_TP,TRANSACTION_PGI,IMAGE_NUM,TRANSACTION_TP,ENTITY_TP,NAME,CITY,STATE,...,OTHER_ID,TRAN_ID,FILE_NUM,MEMO_CD,MEMO_TEXT,SUB_ID,trans_dt,trans_date_str,trans_date,year
0,C00577130,A,YE,P2016,201606159017971998,15,IND,"BUCKLEY, MATT",SOUDERTON,PA,...,,VPF7BGDBP39,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299555548,NaT,,NaT,
1,C00577130,A,YE,P2016,201606159017998100,15E,IND,"HALL, ALICE",DURHAM,NC,...,C00401224,VPF7BGNSF64,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299633853,NaT,,NaT,
2,C00577130,A,YE,P2016,201606159017971847,15,IND,"STANFORD, KENT",BARABOO,WI,...,,VPF7BGD85J6,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299555094,NaT,,NaT,
3,C00577130,A,YE,P2016,201606159017966546,15,IND,"MONKS, DENNIS",PITTSBURGH,PA,...,,VPF7BGCKFN6,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299539190,NaT,,NaT,
4,C00577130,A,YE,P2016,201606159017971825,15,IND,"SHEPELAVY, DAN",PHILADELPHIA,PA,...,,VPF7BGD5Q68,1077572,,* EARMARKED CONTRIBUTION: SEE BELOW,4061720161299555028,NaT,,NaT,


In [10]:
donor_df["TRANSACTION_DT"].head()

0          11192015
1          12152015
2          11182015
3          11152015
4          11182015
             ...   
4745975    11162015
4745976    11152015
4745977    11162015
4745978    11192015
4745979    11192015
Name: TRANSACTION_DT, Length: 4745980, dtype: object

In [19]:
donor_df["trans_dt"] = pd.to_datetime(donor_df['TRANSACTION_DT'], format='%m%d%Y', errors="ignore")

In [20]:
donor_df["trans_dt"].head()

0    11192015
1    12152015
2    11182015
3    11152015
4    11182015
Name: trans_dt, dtype: object

In [21]:
print(type(donor_df["trans_dt"][0]))

<class 'str'>


In [23]:
donor_df['trans_dt'] = donor_df['trans_dt'].astype('datetime64[ns]') 

ParserError: month must be in 1..12: 11192015

In [24]:
print(type(donor_df["trans_dt"][0]))

<class 'str'>


In [29]:
donor_df['trans_dt_2'] = pd.to_datetime(donor_df['TRANSACTION_DT'], format='%m%d%Y', errors='coerce')
print (donor_df)
print (donor_df.dtypes)

           CMTE_ID AMNDT_IND RPT_TP TRANSACTION_PGI           IMAGE_NUM  \
0        C00577130         A     YE           P2016  201606159017971998   
1        C00577130         A     YE           P2016  201606159017998100   
2        C00577130         A     YE           P2016  201606159017971847   
3        C00577130         A     YE           P2016  201606159017966546   
4        C00577130         A     YE           P2016  201606159017971825   
...            ...       ...    ...             ...                 ...   
4745975  C00577130         A     YE           P2016  201606159017969741   
4745976  C00577130         A     YE           P2016  201606159017968172   
4745977  C00577130         A     YE           P2016  201606159017969269   
4745978  C00577130         A     YE           P2016  201606159017972392   
4745979  C00577130         A     YE           P2016  201606159017972393   

        TRANSACTION_TP ENTITY_TP               NAME          CITY STATE  ...  \
0                  

In [40]:
donor_df['year'] = donor_df['trans_dt_2'].dt.year

In [39]:
print (donor_df.dtypes)

CMTE_ID                    object
AMNDT_IND                  object
RPT_TP                     object
TRANSACTION_PGI            object
IMAGE_NUM                  object
TRANSACTION_TP             object
ENTITY_TP                  object
NAME                       object
CITY                       object
STATE                      object
ZIP                        object
EMPLOYER                   object
OCCUPATION                 object
TRANSACTION_DT             object
TRANSACTION_AMT             int64
OTHER_ID                   object
TRAN_ID                    object
FILE_NUM                   object
MEMO_CD                    object
MEMO_TEXT                  object
SUB_ID                     object
trans_dt                   object
trans_date_str            float64
trans_date         datetime64[ns]
year                      float64
trans_dt_2         datetime64[ns]
dtype: object


In [45]:
donor_df.groupby("year").count().tail(30)

Unnamed: 0_level_0,CMTE_ID,AMNDT_IND,RPT_TP,TRANSACTION_PGI,IMAGE_NUM,TRANSACTION_TP,ENTITY_TP,NAME,CITY,STATE,...,OTHER_ID,TRAN_ID,FILE_NUM,MEMO_CD,MEMO_TEXT,SUB_ID,trans_dt,trans_date_str,trans_date,trans_dt_2
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1916.0,1,1,1,1,1,1,1,1,1,1,...,0,1,1,1,1,1,1,0,0,1
1994.0,2,2,2,0,2,2,0,2,2,2,...,0,0,0,0,0,2,2,0,0,2
1997.0,21,21,21,0,21,21,0,21,21,21,...,0,0,0,0,0,21,21,0,0,21
1998.0,35,35,35,0,35,35,0,35,35,35,...,0,0,0,0,0,35,35,0,0,35
1999.0,77192,77192,77192,1692,77192,77192,1,77192,77179,77192,...,96,1,2,1,1693,77192,77192,0,0,77192
2000.0,113926,113922,113926,2584,113926,113926,33,113926,113913,113926,...,212,33,36,29,2583,113926,113926,0,0,113926
2001.0,59175,59173,59175,834,59175,59175,9,59175,59172,59175,...,365,9,68,9,835,59175,59175,0,0,59175
2002.0,104056,104036,104056,2629,104056,104056,83,104052,104051,104056,...,2224,83,2221,73,2639,104056,104056,0,0,104056
2003.0,96945,95676,96945,19703,96945,96945,77140,96942,96938,96945,...,1239,77480,76995,2133,2281,96945,96945,0,0,96945
2004.0,193482,193482,193482,38922,193482,193482,154759,193482,193477,193482,...,3500,154771,153787,5258,5519,193482,193482,0,0,193482


In [None]:
donor_df["trans_dt"].head()

In [None]:
donor_df["trans_date_str"] = donor_df["trans_dt"].dt.strftime('%m%d%Y')

In [None]:
donor_df["trans_dt"] = pd.to_datetime(donor_df['TRANSACTION_DT'], errors='coerce')
    print("Main 3")
    donor_df["trans_date_str"] = donor_df["trans_dt"].dt.strftime('%m%d%Y')
    print("Main 4")
    donor_df["trans_date"] = pd.to_datetime(donor_df["trans_date_str"])
    print("Main 5")
    return donor_df