# Committee Spending

*Author: Alan Leggitt (leggitta3@gmail.com)*  
*Date: 2017-06-10*

Computes the total amount spent by each unique committee id.

Writes output to file: **committee_spending.csv**


In [1]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import FixedTicker
import os
import sys
import numpy as np
import pandas as pd

In [None]:
output_notebook()

In [None]:
# define the data directory
data_dir = "../data"

# define the shortened version of expenditure data (filing_ids and amounts only)
exp_fname = "%s/expenditure_ids_amounts.csv" % data_dir

# read or generate file
if not os.path.exists(exp_fname):
    # read the expenditures data
    col_names = (
        'filing_id', 'recip_name', 'address',
        'city', 'state', 'date', 'amount', 'purpose')
    expenditures = pd.read_csv(
        'expenditures.csv', names=col_names,
        usecols=('filing_id', 'amount'))
    expenditures.to_csv(fname)
else:
    expenditures = pd.read_csv(exp_fname, index_col=0)

In [None]:
# read the filing data
filings = pd.read_csv('%s/filings.csv' % data_dir)

In [None]:
# look for committee spending data file
committee_spending_fname = "%s/committee_spending.csv" % data_dir

if not os.path.exists(committee_spending_fname):

    # compute number of filings and names per committee id
    n_filings = filings.groupby('fec_committee_id').filing_id.nunique()
    n_names = filings.groupby('fec_committee_id').committee_name.nunique()
    n_committees = len(n_filings)

    committee_data = pd.concat([n_filings, n_names], axis=1)
    committee_data.columns = ('n_filings', 'n_names')
    committee_data['total_spent'] = np.zeros(n_committees)

    # start a counter
    counter = 0

    # loop through the unique committee ids
    for n, row in committee_data.iterrows():

        # get the filing ids for this committee
        committee_ix = filings.fec_committee_id.values == row.name
        committee_filings = filings[committee_ix].filing_id.values

        # get filing indices in expenditures
        expenditure_ix = np.in1d(
            expenditures.filing_id.values,
            committee_filings)

        # get the total amount for this committee
        total_amount = expenditures[expenditure_ix].amount.sum()
        committee_data.loc[n, 'total_spent'] = total_amount

        counter += 1
        sys.stdout.write('\r%d of %d processed' % (counter, n_committees))
    
    # save output file
    committee_data.to_csv(committee_spending_fname)

else:
    
    # load from already processed data
    committee_data = pd.read_csv(committee_spending_fname, index_col=0)

In [None]:
# compute the log histogram
x = committee_data.total_spent.values
hist, edges = np.histogram(x, bins=np.logspace(0, 10, 10))

# plot log histogram
fig = figure(
    width=600, height=500, toolbar_location=None, x_axis_type="log",
    title="Committee Spending", background_fill_color='oldlace')
fig.title.text_font = "times"
fig.title.text_font_size = "16pt"

fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
         line_color='black', fill_color='seagreen')

fig.xaxis.axis_label = "Amount Spent"
fig.xaxis.axis_label_text_font = "times"
fig.xaxis.axis_label_text_font_size = "12pt"

fig.yaxis.axis_label = "Committee Count"
fig.yaxis.axis_label_text_font = "times"
fig.yaxis.axis_label_text_font_size = "12pt"

show(fig)

In [None]:
print('%d committees spent zero dollars' % (x == 0).sum())
print('%d committees spent negative dollars (!!!)' % (x < 0).sum())