In [2]:
# import and preview a tab-separated database of all UN General Assembly Resolutions

import pandas as pd
data = pd.read_csv('UNVotes.tab', sep='\t')
data.head()

Unnamed: 0,rcid,ccode,session,member,vote,Country,Countryname,year
0,3.0,2,1.0,1.0,1,USA,United States of America,1946
1,3.0,20,1.0,1.0,3,CAN,Canada,1946
2,3.0,31,1.0,,9,BHS,Bahamas,1946
3,3.0,40,1.0,1.0,1,CUB,Cuba,1946
4,3.0,41,1.0,1.0,1,HTI,Haiti,1946


In [3]:
data.shape

(1099156, 8)

In [4]:
# preview a list of only votes cast by the USA

usa = data[data['Country'] == 'USA']
usa = usa.sort_values(['session', 'rcid'])
usa.head(50)

Unnamed: 0,rcid,ccode,session,member,vote,Country,Countryname,year
0,3.0,2,1.0,1.0,1,USA,United States of America,1946
197,4.0,2,1.0,1.0,3,USA,United States of America,1946
394,5.0,2,1.0,1.0,3,USA,United States of America,1946
591,6.0,2,1.0,1.0,3,USA,United States of America,1946
788,7.0,2,1.0,1.0,3,USA,United States of America,1946
985,8.0,2,1.0,1.0,3,USA,United States of America,1946
1182,9.0,2,1.0,1.0,1,USA,United States of America,1946
1379,10.0,2,1.0,1.0,1,USA,United States of America,1946
1576,11.0,2,1.0,1.0,1,USA,United States of America,1946
1773,12.0,2,1.0,1.0,1,USA,United States of America,1946


In [5]:
# store a short test sample of votes from the USA only

usa_votes = usa[['year', 'rcid', 'vote']].head(50)
usa_votes

Unnamed: 0,year,rcid,vote
0,1946,3.0,1
197,1946,4.0,3
394,1946,5.0,3
591,1946,6.0,3
788,1946,7.0,3
985,1946,8.0,3
1182,1946,9.0,1
1379,1946,10.0,1
1576,1946,11.0,1
1773,1946,12.0,1


In [6]:
# remove records of abstensions from countries who were not members of the UN at the time

member_votes = data[data['member'] == 1]
member_votes.head()

Unnamed: 0,rcid,ccode,session,member,vote,Country,Countryname,year
0,3.0,2,1.0,1.0,1,USA,United States of America,1946
1,3.0,20,1.0,1.0,3,CAN,Canada,1946
3,3.0,40,1.0,1.0,1,CUB,Cuba,1946
4,3.0,41,1.0,1.0,1,HTI,Haiti,1946
5,3.0,42,1.0,1.0,1,DOM,Dominican Republic,1946


In [7]:
# Prepare a small sample of votes cast by all countries in 1946 and part of 1947

test_votes = member_votes[['year', 'rcid', 'Countryname', 'vote']]
test_votes = test_votes[test_votes['rcid'] < 53]
test_votes

Unnamed: 0,year,rcid,Countryname,vote
0,1946,3.0,United States of America,1
1,1946,3.0,Canada,3
3,1946,3.0,Cuba,1
4,1946,3.0,Haiti,1
5,1946,3.0,Dominican Republic,1
15,1946,3.0,Mexico,1
17,1946,3.0,Guatemala,1
18,1946,3.0,Honduras,1
19,1946,3.0,El Salvador,1
20,1946,3.0,Nicaragua,1


In [8]:
def find_usa_vote(target_rcid):
    usa_vote = usa_votes[usa_votes['rcid'] == target_rcid]['vote']
    return usa_vote.iloc[0]
find_usa_vote(51)

3

In [9]:
test_votes['usa_vote'] = None
test_votes.head()

Unnamed: 0,year,rcid,Countryname,vote,usa_vote
0,1946,3.0,United States of America,1,
1,1946,3.0,Canada,3,
3,1946,3.0,Cuba,1,
4,1946,3.0,Haiti,1,
5,1946,3.0,Dominican Republic,1,


In [10]:
test_votes.loc[1, 'usa_vote'] = 4
test_votes.head()

Unnamed: 0,year,rcid,Countryname,vote,usa_vote
0,1946,3.0,United States of America,1,
1,1946,3.0,Canada,3,4.0
3,1946,3.0,Cuba,1,
4,1946,3.0,Haiti,1,
5,1946,3.0,Dominican Republic,1,


In [11]:
find_usa_vote(4)

3

In [12]:
test_votes.at[1, 'usa_vote'] = find_usa_vote(4)
test_votes.head()

Unnamed: 0,year,rcid,Countryname,vote,usa_vote
0,1946,3.0,United States of America,1,
1,1946,3.0,Canada,3,3.0
3,1946,3.0,Cuba,1,
4,1946,3.0,Haiti,1,
5,1946,3.0,Dominican Republic,1,


In [13]:
# Create test database with the usa's vote stacked up next to each country's vote

test_votes['usa_vote'] = test_votes['rcid'].apply(find_usa_vote)
test_votes[test_votes['rcid'] == 51]

Unnamed: 0,year,rcid,Countryname,vote,usa_vote
9456,1947,51.0,United States of America,3,3
9457,1947,51.0,Canada,3,3
9459,1947,51.0,Cuba,2,3
9460,1947,51.0,Haiti,1,3
9461,1947,51.0,Dominican Republic,3,3
9471,1947,51.0,Mexico,2,3
9473,1947,51.0,Guatemala,2,3
9474,1947,51.0,Honduras,8,3
9475,1947,51.0,El Salvador,2,3
9476,1947,51.0,Nicaragua,3,3


In [14]:
test_votes['usa_vote'] = test_votes['rcid'].apply(find_usa_vote, axis=1)
test_votes

TypeError: find_usa_vote() got an unexpected keyword argument 'axis'

In [None]:
test_dyads = pd.merge(test_votes,
                     usa_votes['vote'],
                     left_on=['rcid'],
                     right_on=['rcid'],
                     how='left')
test_dyads