# Committee Filings

*Author: Alan Leggitt (leggitta3@gmail.com)*  
*Date: 2017-06-10*

Counts the number of filings per unique committee id (not name, since some committee ids are linked to multiple names).  

Writes output to file: **committee_filings.csv**

In [1]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import FixedTicker
import numpy as np
import pandas as pd

In [2]:
output_notebook()

In [3]:
# read the filing data
data_dir = "../data"  # may need to change this line, depending on your directory structure
filings = pd.read_csv('%s/filings.csv' % data_dir)

In [4]:
# compute number of filings and names per committee id
n_filings = filings.groupby('fec_committee_id').filing_id.nunique()
n_names = filings.groupby('fec_committee_id').committee_name.nunique()
n_committees = len(n_filings)

# save the committee data
committee_data = pd.concat([n_filings, n_names], axis=1)
committee_data.columns = ('n_filings', 'n_names')
committee_data.to_csv('%s/committee_filings.csv' % data_dir)

In [5]:
# compute the log histogram
hist, edges = np.histogram(n_filings, bins=np.logspace(0, 3, 15))

# plot log histogram
fig = figure(
    width=600, height=500, toolbar_location=None, x_axis_type="log",
    title="Filings Per Committee", background_fill_color='oldlace')
fig.title.text_font = "times"
fig.title.text_font_size = "16pt"

fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
         line_color='black', fill_color='seagreen')

fig.xaxis.axis_label = "Number of Filings"
fig.xaxis.axis_label_text_font = "times"
fig.xaxis.axis_label_text_font_size = "12pt"

fig.yaxis.axis_label = "Committee Count"
fig.yaxis.axis_label_text_font = "times"
fig.yaxis.axis_label_text_font_size = "12pt"

show(fig)