# A Jypyter Notebook For Analyzing the Relative Unfairness of the Electoral College Over Time

## Import the dependencies for the project.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Generate a list of filenames.

In [2]:
filenames = []
for i in range(1790, 2020, 10):
    filenames.append('Census_Data_By_Year - {0}.csv'.format(i))

## Import census and apportionment data for each census

In [4]:
# Arrays for storage
years = []
max_overrep_state = []
max_overrep_value = []
centroid = []

# Loop through the data files.
for i in filenames:
    print(i[22:26])
    years.append(i[22:26])
    
    # Read the data file
    census_df = pd.read_csv(i)

    #find index of state with highest relative unfairness
    large_idx = census_df['RelativeUnfairness'].idxmax()

    print('Most unfairly represented state: {0}, relative unfairness: {1}'.format(census_df.at[large_idx,'State'], census_df.at[large_idx,'RelativeUnfairness']))

    worst_reps = census_df.at[large_idx,'ElectoralVotes']
    worst_pop = census_df.at[large_idx,'Population']

    states = []
    overreps = []
    moments = []

    for row_index, row in census_df.iterrows():
        if(row['ElectoralVotes'] > 0):
            states.append(row['State'])
            overreps.append((row['ElectoralVotes']*worst_pop) / (worst_reps*row['Population']) - 1)
            moments.append(overreps[-1] * row['ElectoralVotes'])

    d = {'State':states, 'Overrepresentation':overreps, 'Moments':moments}
    overrep_df = pd.DataFrame(data=d)
    
    overrep_idx = overrep_df['Overrepresentation'].idxmax()
    max_overrep_value.append(overrep_df.at[overrep_idx,'Overrepresentation'])
    max_overrep_state.append(overrep_df.at[overrep_idx,'State'])
    
    centroid.append(overrep_df['Moments'].sum() / census_df['ElectoralVotes'].sum())
    print('Centroid: {0}'.format(centroid[-1]))
    
    #print(overrep_df)
    ax = overrep_df['Overrepresentation'].plot.hist(range=[0,13.5],bins=27,title=i,figsize=(16,9))
    ax.set_ylim(0,35)
    plt.savefig(i.replace("csv","png"))
    plt.clf()
    

1790
Most unfairly represented state: Virginia, relative unfairness: 1.069043
Centroid: 0.3147974086889945
1800
Most unfairly represented state: Kentucky, relative unfairness: 2.1965939999999997
Centroid: 0.5938710061154103
1810
Most unfairly represented state: Virginia, relative unfairness: 1.16452
Centroid: 0.30311880746057623
1820
Most unfairly represented state: Ohio, relative unfairness: 2.996165
Centroid: 0.9062439657059841
1830
Most unfairly represented state: SouthCarolina, relative unfairness: 1.174302
Centroid: 0.22457459184244896
1840
Most unfairly represented state: Missouri, relative unfairness: 2.6854259999999996
Centroid: 0.7619730062592163
1850
Most unfairly represented state: Virginia, relative unfairness: 3.094189
Centroid: 0.31457871911683793
1860
Most unfairly represented state: Iowa, relative unfairness: 8.648046
Centroid: 0.8341614006983658
1870
Most unfairly represented state: NewYork, relative unfairness: 7.7484
Centroid: 0.336710960942463
1880
Most unfairly rep

<Figure size 1152x648 with 0 Axes>

## Collect summaries of data by decade.

In [14]:
decade_dict = {'Year':years, 'State': max_overrep_state, 'Overrepresentation Value': max_overrep_value, 'Centroid': centroid}
decade_df = pd.DataFrame(decade_dict)

print(decade_df)

    Year        State  Overrepresentation Value  Centroid
0   1790  RhodeIsland                  1.069043  0.314797
1   1800  RhodeIsland                  2.196594  0.593871
2   1810     Delaware                  1.164520  0.303119
3   1820     Delaware                  2.996165  0.906244
4   1830  RhodeIsland                  1.174302  0.224575
5   1840     Delaware                  2.685426  0.761973
6   1850   California                  3.094189  0.314579
7   1860       Oregon                  8.648046  0.834161
8   1870       Nevada                  7.748400  0.336711
9   1880       Nevada                  8.598458  0.629457
10  1890       Nevada                  9.564133  0.364474
11  1900       Nevada                 13.402787  0.396865
12  1910       Nevada                  6.420754  0.267159
13  1920       Nevada                  9.216312  0.445123
14  1930       Nevada                  7.823976  0.254793
15  1940       Nevada                  6.804029  0.253659
16  1950      

## Plot trends over time

In [16]:
decade_df.plot.line('Year', 'Overrepresentation Value', figsize=(16,9))
plt.savefig("Overrepresentation Value.png")
plt.clf()

decade_df.plot.line('Year', 'Centroid', figsize=(16,9))
plt.savefig('Centroid.png')
plt.clf()

<Figure size 1152x648 with 0 Axes>

<Figure size 1152x648 with 0 Axes>

## Percent of Population over 1.0 Representation