In [1]:
### Run this workbook THIRD in sequence.

### The goal of this workbook is to take the combined and cleaned "AidWithTotals" database
### and compare US votes to foreign votes.

In [2]:
import pandas as pd
pd.options.display.float_format = '{:.0f}'.format
pd.options.display.max_rows = 800

data = pd.read_csv('Modified_Data/AidWithTotals.csv')
data.index.name = 'record'
data = data[['issue', 'membership', 'vote', 'code', 'name', 'year', 'aid', 'total_aid']]
data

Unnamed: 0_level_0,issue,membership,vote,code,name,year,aid,total_aid
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,3,1,9,AFG,Afghanistan,1946,0,0
1,4,1,9,AFG,Afghanistan,1946,0,0
2,5,1,9,AFG,Afghanistan,1946,0,0
3,6,1,9,AFG,Afghanistan,1946,0,0
4,7,1,9,AFG,Afghanistan,1946,0,0
5,8,1,9,AFG,Afghanistan,1946,0,0
6,9,1,9,AFG,Afghanistan,1946,0,0
7,10,1,9,AFG,Afghanistan,1946,0,0
8,11,1,9,AFG,Afghanistan,1946,0,0
9,12,1,9,AFG,Afghanistan,1946,0,0


In [3]:
#Test for duplicate votes by the same country on the same issue
test = data.groupby(['code', 'issue']).nunique()
test[test > 1].any()

issue         False
membership    False
vote          False
code          False
name          False
year          False
aid           False
total_aid     False
dtype: bool

In [4]:
# Replace absences with abstentions
data['vote'] = data['vote'].replace(to_replace=8, value=2)
data

Unnamed: 0_level_0,issue,membership,vote,code,name,year,aid,total_aid
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,3,1,9,AFG,Afghanistan,1946,0,0
1,4,1,9,AFG,Afghanistan,1946,0,0
2,5,1,9,AFG,Afghanistan,1946,0,0
3,6,1,9,AFG,Afghanistan,1946,0,0
4,7,1,9,AFG,Afghanistan,1946,0,0
5,8,1,9,AFG,Afghanistan,1946,0,0
6,9,1,9,AFG,Afghanistan,1946,0,0
7,10,1,9,AFG,Afghanistan,1946,0,0
8,11,1,9,AFG,Afghanistan,1946,0,0
9,12,1,9,AFG,Afghanistan,1946,0,0


In [5]:
# Delete all rows where the country was not yet a member
nonmember = [data['vote'] == 9][0]
nonmember[nonmember].index
data = data.drop(labels = nonmember[nonmember].index)

In [6]:
# Test for nulls in dataset
data.isnull().sum()

issue         0
membership    0
vote          0
code          0
name          0
year          0
aid           0
total_aid     0
dtype: int64

In [7]:
usa_lookup = data[data['code'] == 'USA']
usa_lookup = usa_lookup[['issue', 'vote']]
usa_lookup.head(50)

Unnamed: 0_level_0,issue,vote
record,Unnamed: 1_level_1,Unnamed: 2_level_1
1034742,3,1
1034743,4,3
1034744,5,3
1034745,6,3
1034746,7,3
1034747,8,3
1034748,9,1
1034749,10,1
1034750,11,1
1034751,12,1


In [8]:
data = data.merge(usa_lookup, how='left', on='issue', suffixes=('_foreign', '_usa'))
data

Unnamed: 0,issue,membership,vote_foreign,code,name,year,aid,total_aid,vote_usa
0,20,1,1,AFG,Afghanistan,1946,0,0,1
1,21,1,2,AFG,Afghanistan,1946,0,0,1
2,22,1,1,AFG,Afghanistan,1946,0,0,3
3,23,1,2,AFG,Afghanistan,1946,0,0,1
4,24,1,1,AFG,Afghanistan,1946,0,0,1
5,25,1,1,AFG,Afghanistan,1946,0,0,1
6,26,1,2,AFG,Afghanistan,1946,0,0,3
7,27,1,2,AFG,Afghanistan,1946,0,0,1
8,28,1,2,AFG,Afghanistan,1946,0,0,1
9,29,1,2,AFG,Afghanistan,1946,0,0,1


In [9]:
# Test for remaining nulls
data.isnull().sum()

issue           0
membership      0
vote_foreign    0
code            0
name            0
year            0
aid             0
total_aid       0
vote_usa        0
dtype: int64

In [10]:
# Add a new column to the database that compares the usa_vote to the foreign vote
data['vote_diff'] = (data['vote_foreign'] - data['vote_usa']) ** 2
data

Unnamed: 0,issue,membership,vote_foreign,code,name,year,aid,total_aid,vote_usa,vote_diff
0,20,1,1,AFG,Afghanistan,1946,0,0,1,0
1,21,1,2,AFG,Afghanistan,1946,0,0,1,1
2,22,1,1,AFG,Afghanistan,1946,0,0,3,4
3,23,1,2,AFG,Afghanistan,1946,0,0,1,1
4,24,1,1,AFG,Afghanistan,1946,0,0,1,0
5,25,1,1,AFG,Afghanistan,1946,0,0,1,0
6,26,1,2,AFG,Afghanistan,1946,0,0,3,1
7,27,1,2,AFG,Afghanistan,1946,0,0,1,1
8,28,1,2,AFG,Afghanistan,1946,0,0,1,1
9,29,1,2,AFG,Afghanistan,1946,0,0,1,1


In [11]:
# Test the vote_diff column to make sure all rogue votes have been eliminated
max(data['vote_diff'])

4L

In [12]:
min(data['vote_diff'])

0L

In [13]:
data['vote_diff'].mean()

1.9057458446802518

In [14]:
# Export to new file
data.to_csv("Modified_Data/VoteDiffsWithTotalAid.csv")