# Committee Spending Per Year

*Author: Alan Leggitt (leggitta3@gmail.com)*  
*Date: 2017-06-10*

Parses expenditures by committee, date, and amount

Writes output to file: **expenditures_by_committee.csv**

NOTE: In its current state, this notebook will take several hours to run.

In [1]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import FixedTicker
import os
import sys
import numpy as np
import pandas as pd

In [2]:
output_notebook()

In [3]:
# define the data directory
data_dir = "../data"

# define the shortened version of expenditure data (filing_ids and amounts only)
exp_fname = "%s/expenditures_by_committee.csv" % data_dir

# read the filing data
filings = pd.read_csv('%s/filings.csv' % data_dir)

In [4]:
# get the unique committee ids
committee_ids = filings.fec_committee_id.unique()
n_committees = len(committee_ids)

In [5]:
# read or generate file
if not os.path.exists(exp_fname):
    # read the expenditures data
    col_names = (
        'filing_id', 'recip_name', 'address',
        'city', 'state', 'date', 'amount', 'purpose')
    expenditures = pd.read_csv(
        '%s/expenditures.csv' % data_dir, names=col_names,
        usecols=('filing_id', 'amount', 'date'))
    
    # add column for committee id
    expenditures['committee_id'] = ''
    counter = 0
    
    for n, committee_id in enumerate(committee_ids):
        
        # get the filing ids for this committee
        committee_ix = filings.fec_committee_id.values == committee_id
        committee_filings = filings[committee_ix].filing_id.values

        # get filing indices in expenditures
        expenditure_ix = np.in1d(
            expenditures.filing_id.values,
            committee_filings)

        # get the total amount for this committee
        expenditures.loc[expenditure_ix, 'committee_id'] = committee_id

        counter += 1
        sys.stdout.write('\r%d of %d processed' % (counter, n_committees))
    
    # remove filing id column to save space
    expenditures.drop('filing_id', 1, inplace=True)

    # write to file
    expenditures.to_csv(exp_fname)
else:
    expenditures = pd.read_csv(exp_fname, index_col=0)