In [1]:
import os
import io
import requests

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

PROJ_PATH = '/home/reese56/w266_final/'
VOTEVIEW = os.path.join(PROJ_PATH, 'data/voteview/')
VOTEVIEW_RAW = os.path.join(VOTEVIEW, 'data/voteview/raw/')
SAVE_PATH = os.path.join(VOTEVIEW, 'gen/')

In [2]:
# import voteview congressional members dw-nominate score data
try:
    member_ideology = pd.read_csv(os.path.join(VOTEVIEW_RAW, 'raw/member_ideology.csv'))
except FileNotFoundError:
    url= 'https://voteview.com/static/data/out/members/HSall_members.csv'
    r = requests.get(url).content
    member_ideology = pd.read_csv(io.StringIO(r.decode('utf-8')))

In [3]:
# select major political parties
party_codes = member_ideology['party_code'].isin([100,200])

# select desired congressional sessions
period = member_ideology['congress'] >= 75

# select desired attributes
attributes = ['congress','chamber','state_icpsr','last_means',
            'district_code','party_code','nominate_dim1',
            'nominate_number_of_votes','nominate_number_of_errors']

# select chambers
chambers = member_ideology['chamber'].isin(['House','Senate'])

# apply selections
member_ideology = member_ideology[party_codes & period & chambers].filter(attributes)

In [4]:
# aggregate by party and election status
member_count = (member_ideology
                .groupby(['congress','chamber','party_code','last_means'], as_index= False)
                .agg({'district_code':'count'})
               )

In [5]:
# create party specific data frames
dem_count = member_count[member_count['party_code'] == 100].rename({'district_code':'dem_count'}, axis = 1)
gop_count = member_count[member_count['party_code'] == 200].rename({'district_code':'gop_count'}, axis = 1)

In [6]:
# create party data frames for election status

# democrats
dem_count_gen = dem_count[dem_count['last_means'] == 1.0].drop(['last_means','party_code'], axis = 1)         
dem_count_oth = (dem_count[dem_count['last_means'] != 1.0]
                 .groupby(['congress','chamber'], as_index= False)
                 .agg({'dem_count':np.sum})
                )

# republicans
gop_count_gen = gop_count[gop_count['last_means'] == 1.0].drop(['last_means','party_code'], axis = 1)
gop_count_oth = (gop_count[gop_count['last_means'] != 1.0]
                 .groupby(['congress','chamber'], as_index= False)
                 .agg({'gop_count':np.sum})
                )

In [7]:
# merge party data frames for general election status
gen_count = dem_count_gen.merge(gop_count_gen, how = 'inner', on =['congress','chamber'])
gen_count['majority_gen'] = np.where(gen_count['dem_count'] > gen_count['gop_count'], 100, 200)
gen_count['margin'] = np.where(gen_count['majority_gen'] == 100, 
                              gen_count['dem_count'] - gen_count['gop_count'],
                              gen_count['gop_count'] - gen_count['dem_count'])

A slight loss of sessions is anticipated on merge of the other and special elctins data frames. The Democrats have entries for these other elections types in for a subset of congressional sessions where the GOP does not. To get around this, I generate blank GOP entries for the special elections for the prurpose of using the `inner` merge without loss of entries.

In [8]:
# other elections missing on megre
missing_dem = dem_count_oth[~dem_count_oth['congress'].isin(gop_count_oth['congress'].values)]

# construct missing entries
missing_dem_comp = missing_dem.copy()
missing_dem_comp = missing_dem_comp.rename({'dem_count':'gop_count'}, axis = 1)
missing_dem_comp['gop_count'] = 0

# add missing entries to complementry data frame
gop_count_oth = pd.concat([gop_count_oth,missing_dem_comp], axis = 0)

In [9]:
# create other elections df
oth_count = dem_count_oth.merge(gop_count_oth, how = 'inner', on =['congress','chamber'])

oth_count['majority_oth'] = np.where(oth_count['dem_count'] > oth_count['gop_count'], 100, 200)
oth_count['margin'] = np.where(oth_count['majority_oth'] == 100, 
                              oth_count['dem_count'] - oth_count['gop_count'],
                              oth_count['gop_count'] - oth_count['dem_count'])
oth_count = (oth_count
             .rename({'majority_oth':'gainer', 'margin':'gain'}, axis = 1)
             .drop(['dem_count','gop_count'], axis = 1)
            )

In [10]:
# mergeing all elections and determining post general election congressional chamber majorities
majority = gen_count.merge(oth_count, how = 'outer', on = ['congress','chamber'])
majority['majority_sp'] = np.where(majority['margin'] > majority['gain'],
                                majority['majority_gen'], majority['gainer'])
majority['flipped'] = majority['majority_gen'] != majority['majority_sp']

In [11]:
print('gen_count:',gen_count.shape[0])
print('oth_count:',oth_count.shape[0])
print('majority:',majority.shape[0])

gen_count: 77
oth_count: 69
majority: 80


Note that in this subset of congressional sessions neither the House nor the Senate flipped their majority party as a result of special elections and/or appointments. The columns `majority_gen` is therefore the same as `majority_oth`.

In [12]:
majority.to_csv(os.path.join(SAVE_PATH, 'majority.csv'), index = False)

In [13]:
(member_ideology['congress'].max() - 75)*2

82