In [16]:
# Import Lookup Table from Notebook #2

import pandas as pd
pd.options.display.float_format = '{:.0f}'.format
data = pd.read_csv('Modified_Data/AidTotalLookup.csv')
data.index.name = 'record'
data = data.fillna(0)
pd.options.display.max_rows = 800
data

Unnamed: 0_level_0,record,code,year,amount
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1087786,0,2016,0
1,1087825,0,2017,0
2,153,AFG,1946,0
3,8624,AFG,1947,0
4,16110,AFG,1948,0
5,27339,AFG,1949,0
6,49206,AFG,1950,8340
7,59056,AFG,1951,791387
8,60435,AFG,1952,2283030
9,74225,AFG,1953,16443254


In [17]:
# Define a function that will total up recently received aid for a country

# Interest rate must be a number between 0 (no interest; all years count the same) 
# and 1 (only the first year counts at all). For example, 0.2 means 20% interest rate, like a credit card.

# Lookback is the number of years to look back through the database, e.g. with year = 1950 and lookback = 3,
# the function will return data for 1947, 1948, 1949, and 1950.

def recent_aid(code, year, int_rate, lookback):    
    discount = 1.0
    total = 0.0
    
    # Start by looking at the year specified by the user, then go back one year at a time
    for annum in range(year, year - lookback - 1, -1):
        
        # Try to find the amount of aid from that year, and then apply the interest rate as a discount factor.
        try:
            x = data[(data['code'] == code) & (data['year'] == annum)]['amount'].values[0]
            total = total + (x * discount)
            discount = discount * (1 - int_rate)
            
        # If there is no aid from a particular year, just apply the discount factor and move on.
        except:
            discount = discount * int_rate
    
    return total

In [18]:
# Show some limited data for France to assist with testing.
data[(data['code'] == 'FRA') & (data['year'] < 1960)]['amount'] / 1000000

record
4257    3117
4258     398
4259    3088
4260   10794
4261    9694
4262   13666
4263   12925
4264    4529
4265    1841
4266    1612
4267    1937
4268     321
4269    1181
4270     640
Name: amount, dtype: float64

In [19]:
# Test the function by calcuating 5 years' worth of aid to France, discounting at 25% per year
recent_aid('FRA', 1950, 0.25, 5) / 1000000

20680.420436648437

In [20]:
# Apply the 'recent aid' function to all rows of the database
data['recent_aid'] = data.apply(lambda x: recent_aid(x['code'], x['year'], 0.25, 5), axis=1)
data

Unnamed: 0_level_0,record,code,year,amount,recent_aid
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1087786,0,2016,0,0
1,1087825,0,2017,0,0
2,153,AFG,1946,0,0
3,8624,AFG,1947,0,0
4,16110,AFG,1948,0,0
5,27339,AFG,1949,0,0
6,49206,AFG,1950,8340,8340
7,59056,AFG,1951,791387,797642
8,60435,AFG,1952,2283030,2881262
9,74225,AFG,1953,16443254,18604200


In [21]:
# Write the recent aid data to a separate file
data.to_csv("Modified_Data/RecentAid.csv")

In [22]:
# Import the general database
data2 = pd.read_csv('Modified_Data/VoteDiffsWithTotalAid.csv')
data2.index.name = 'record'
data2

Unnamed: 0_level_0,Unnamed: 0,issue,membership,vote_foreign,code,name,year,aid,total_aid,vote_usa,vote_diff
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,20,1,1,AFG,Afghanistan,1946,0,0,1,0
1,1,21,1,2,AFG,Afghanistan,1946,0,0,1,1
2,2,22,1,1,AFG,Afghanistan,1946,0,0,3,4
3,3,23,1,2,AFG,Afghanistan,1946,0,0,1,1
4,4,24,1,1,AFG,Afghanistan,1946,0,0,1,0
5,5,25,1,1,AFG,Afghanistan,1946,0,0,1,0
6,6,26,1,2,AFG,Afghanistan,1946,0,0,3,1
7,7,27,1,2,AFG,Afghanistan,1946,0,0,1,1
8,8,28,1,2,AFG,Afghanistan,1946,0,0,1,1
9,9,29,1,2,AFG,Afghanistan,1946,0,0,1,1


In [23]:
# Merge the general database with the recent aid data
data3 = data2.merge(data, on=['code', 'year'], how='left')
data3

Unnamed: 0.1,Unnamed: 0,issue,membership,vote_foreign,code,name,year,aid,total_aid,vote_usa,vote_diff,record,amount,recent_aid
0,0,20,1,1,AFG,Afghanistan,1946,0,0,1,0,153,0,0
1,1,21,1,2,AFG,Afghanistan,1946,0,0,1,1,153,0,0
2,2,22,1,1,AFG,Afghanistan,1946,0,0,3,4,153,0,0
3,3,23,1,2,AFG,Afghanistan,1946,0,0,1,1,153,0,0
4,4,24,1,1,AFG,Afghanistan,1946,0,0,1,0,153,0,0
5,5,25,1,1,AFG,Afghanistan,1946,0,0,1,0,153,0,0
6,6,26,1,2,AFG,Afghanistan,1946,0,0,3,1,153,0,0
7,7,27,1,2,AFG,Afghanistan,1946,0,0,1,1,153,0,0
8,8,28,1,2,AFG,Afghanistan,1946,0,0,1,1,153,0,0
9,9,29,1,2,AFG,Afghanistan,1946,0,0,1,1,153,0,0


In [24]:
# Perform linear, single-variable regression using recent aid instead of total aid

import statsmodels.api as sm
from statsmodels.formula.api import ols

vote_endog = data3[['vote_diff']]
vote_exog = data3[['recent_aid']]
vote_exog = sm.add_constant(vote_exog, has_constant='add')
mod = sm.OLS(vote_endog, vote_exog, missing='drop')
result = mod.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              vote_diff   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     719.4
Date:                Sun, 24 Nov 2019   Prob (F-statistic):          2.07e-158
Time:                        01:04:45   Log-Likelihood:            -1.6745e+06
No. Observations:              851920   AIC:                         3.349e+06
Df Residuals:                  851918   BIC:                         3.349e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.9175      0.002    997.565      0.0

In [25]:
#Import the World Bank data

bankdata = pd.read_csv('Modified_Data/AidVotesBank.csv')
data4 = bankdata.merge(data, on=['code', 'year'], how='left')
data4

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,issue,membership,vote_foreign,code,name_x,year,aid,total_aid,...,life_exp,literacy,exports,pop_density,pop,rural,gdp,record,amount,recent_aid
0,0,641,627,1,3,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
1,1,642,628,1,2,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
2,2,643,629,1,1,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
3,3,644,630,1,3,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
4,4,645,631,1,1,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
5,5,646,632,1,1,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
6,6,647,633,1,1,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
7,7,648,634,1,3,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
8,8,649,635,1,3,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191
9,9,650,636,1,3,AFG,Afghanistan,1960,69454739,673372854,...,32,,4,,8996973,92,,129779,69454739,364326191


In [26]:
# Perform multi-variable regression using both (recently received) foreign aid and domestic economic variables

vote_endog = data4[['vote_diff']]
vote_exog = data4[['recent_aid', 'GDPpercap', 'debt', 'gini', 'exports']]
vote_exog = sm.add_constant(vote_exog, has_constant='add')
mod = sm.OLS(vote_endog, vote_exog, missing='drop')
result = mod.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              vote_diff   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.044
Method:                 Least Squares   F-statistic:                     266.3
Date:                Sun, 24 Nov 2019   Prob (F-statistic):          2.70e-279
Time:                        01:04:50   Log-Likelihood:                -56964.
No. Observations:               28780   AIC:                         1.139e+05
Df Residuals:                   28774   BIC:                         1.140e+05
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5936      0.064     24.726      0.0

In [27]:
# Test the recent aid depedent variable against just the set of vote alignments available in the World Bank data, i.e.,
# only look at rows where there is not too much missing data from the World Bank.

vote_endog = data4[['vote_diff']]
vote_exog = data4[['recent_aid']]
vote_exog = sm.add_constant(vote_exog, has_constant='add')
mod = sm.OLS(vote_endog, vote_exog, missing='drop')
result = mod.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              vote_diff   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     195.0
Date:                Sun, 24 Nov 2019   Prob (F-statistic):           2.56e-44
Time:                        01:04:51   Log-Likelihood:            -1.5544e+06
No. Observations:              790903   AIC:                         3.109e+06
Df Residuals:                  790901   BIC:                         3.109e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.9626      0.002    984.122      0.0

In [28]:
# Divide recent aid by 100 million in case the low R2 is somehow being driven by the relatively extreme aid constants.
data4['recent_aid'] = data4['recent_aid'] / 100000000

In [29]:
# Rerun the single-variable regression using the scaled recent aid variable (no significant difference)
vote_endog = data4[['vote_diff']]
vote_exog = data4[['recent_aid']]
vote_exog = sm.add_constant(vote_exog, has_constant='add')
mod = sm.OLS(vote_endog, vote_exog, missing='drop')
result = mod.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              vote_diff   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     195.0
Date:                Sun, 24 Nov 2019   Prob (F-statistic):           2.56e-44
Time:                        01:04:51   Log-Likelihood:            -1.5544e+06
No. Observations:              790903   AIC:                         3.109e+06
Df Residuals:                  790901   BIC:                         3.109e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.9626      0.002    984.122      0.0

In [30]:
# Rerun the multi-variable regression using the scaled recent aid variable (no significant difference)
vote_endog = data4[['vote_diff']]
vote_exog = data4[['GDPpercap', 'debt', 'gini', 'exports']]
vote_exog = sm.add_constant(vote_exog, has_constant='add')
mod = sm.OLS(vote_endog, vote_exog, missing='drop')
result = mod.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              vote_diff   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.044
Method:                 Least Squares   F-statistic:                     330.6
Date:                Sun, 24 Nov 2019   Prob (F-statistic):          1.10e-278
Time:                        01:04:51   Log-Likelihood:                -56968.
No. Observations:               28780   AIC:                         1.139e+05
Df Residuals:                   28775   BIC:                         1.140e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5765      0.064     24.556      0.0