# US Region vs Senate Party Timeline

data obtained from https://github.com/unitedstates/congress-legislators

In [1]:
import json
from pprint import pprint

START_YEAR = 1914
END_YEAR = 2020
TERM_TYPE = 'sen'
PARTY_COLORS = {
    'Republican': '#de0100',
    'Democrat': '#0015bc',
    'Independent': '#ffffff',
    'Conservative': '#ffb6c1',
    'Farmer-Labor': '#3285c3',
    'Progressive': '#01ff00'
}


def extract_info(members, output):
    for member in members:
        for term in member['terms']:
            if term['type'] == TERM_TYPE:
                start_year = int(term['start'][:4])
                end_year = int(term['end'][:4])
                
                for year in range(start_year, end_year):
                    if START_YEAR <= year <= END_YEAR:
                        state = term['state']
                        
                        if year not in output:
                            output[year] = {}
                        if state not in output[year]:
                            output[year][state] = []
                        output[year][state].append(term['party'])


def fill_data(filename, output):
    with open(filename) as data:
        extract_info(json.load(data), output)

In [2]:
senate_data = {}
fill_data('legislators-historical.json', senate_data)
fill_data('legislators-current.json', senate_data)

In [3]:
regions = {
    "Northeast": ['CT', 'ME', 'MA', 'NH', 'RI', 'VT', 'NJ', 'NY', 'PA'],
    "Midwest": ['MN', 'MO', 'IL', 'IN', 'MI', 'OH', 'WI', 'IA', 'KS', 'NE', 'ND', 'SD'],
    "South": ['AL', 'MS', 'TN', 'DE', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV', 'KY', 'AR', 'LA', 'OK', 'TX'],
    "West": ['AZ', 'UT', 'CO', 'ID', 'MT', 'NV', 'NM', 'WY', 'AK', 'CA', 'HI', 'OR', 'WA']
}

def party_ratio(year, region):
    counts = {}
    for state in regions[region]:
        parties = senate_data[year].get(state)
        if parties:
            for party in parties:
                if party not in counts:
                    counts[party] = 0
                counts[party] += 1
    total_senators = sum(counts.values())
    return {k: (val / total_senators) for k, val in counts.items()}

In [4]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt

years = range(START_YEAR, END_YEAR + 1)

In [5]:
plt.rcParams['figure.figsize'] = (9, 9)
def add_plot(region, figure):
    ax = fig.add_subplot(4, 1, list(regions.keys()).index(region) + 1)
    data = pd.DataFrame([party_ratio(year, region) for year in years]).fillna(0)
    colors = [PARTY_COLORS[party] for party in data.keys()]
    ax.stackplot(years, data.values.T, labels=data.keys(), colors=colors)
    ax.legend(loc='upper right', bbox_to_anchor=(1, 1.25), facecolor='black', framealpha=0.4, ncol=len(colors))
    ax.set_title(region, loc='left')
    ax.margins(0,0)
    ax.set_yticks([0, 1])
    ax.set_xticks([tick for tick in years if tick % 8 == 2])
    ax.set_xticks([tick for tick in years if tick % 2 == 0], minor=True)
    ax.grid(axis='x', which='both', linestyle=':')

In [6]:
fig = plt.figure()
for region in regions.keys():
    add_plot(region, fig)
fig.tight_layout()

<IPython.core.display.Javascript object>