In [37]:
### Use this notebook FIFTH in sequence.

### The goal of this notebook is to combine and compile all of the World Bank data 
### into a single dataframe.

# Initial setup

import pandas as pd
pd.options.display.float_format = '{:.3f}'.format
pd.options.display.max_rows = 800



In [38]:
# List of datasets to read:
# Debt Share
# Energy Usage
# Fertility Rate
# GDP Per Capita
# GINI
# Life Expectancy
# Literacy Rate
# Percent Exports
# Pop Density
# Population
# Rural
# Total GDP

In [39]:
non_countries = [5, 34, 47, 59, 60, 61, 62, 63, 66, 71, 72, 93, 96, 100, 101, 102, 103, 105, 108, 
                 126, 132, 133, 134, 137, 138, 140, 151, 154, 159, 168, 179, 181, 189, 195, 196, 
                 213, 215, 216, 228, 229, 234, 236, 238, 239, 247, 257]

def process(csv, var_name):
    filename = 'Original_Data/WorldBank/' + csv + '.csv'
    output = pd.read_csv(filename, header=2)
    output.index.name = 'record'
    output = output.iloc[:, 0:63]
    output = output.drop(['Indicator Name', 'Indicator Code'], axis=1)
    output = output.drop(non_countries)
    output = output.melt(id_vars=['Country Name', 'Country Code'])
    output.columns = ['name', 'code', 'year', var_name]
    return output

debt = process('Debt Share', 'debt')
debt

Unnamed: 0,name,code,year,debt
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [40]:
energy = process('Energy Usage', 'energy')
energy

Unnamed: 0,name,code,year,energy
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [41]:
fertility = process('Fertility Rate', 'fertility')
fertility

Unnamed: 0,name,code,year,fertility
0,Aruba,ABW,1960,4.820
1,Afghanistan,AFG,1960,7.450
2,Angola,AGO,1960,6.708
3,Albania,ALB,1960,6.489
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,6.929
6,Argentina,ARG,1960,3.109
7,Armenia,ARM,1960,4.786
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,4.425


In [42]:
GDPperCAP = process('GDP Per Capita', 'GDPpercap')
GDPperCAP

Unnamed: 0,name,code,year,GDPpercap
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,5642.765
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [43]:
gini = process('GINI', 'gini')
gini

Unnamed: 0,name,code,year,gini
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [44]:
life = process('Life Expectancy', 'life_exp')
life

Unnamed: 0,name,code,year,life_exp
0,Aruba,ABW,1960,65.662
1,Afghanistan,AFG,1960,32.446
2,Angola,AGO,1960,37.524
3,Albania,ALB,1960,62.283
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,51.537
6,Argentina,ARG,1960,65.055
7,Armenia,ARM,1960,65.972
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,61.968


In [45]:
literacy = process('Literacy Rate', 'literacy')
literacy

Unnamed: 0,name,code,year,literacy
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [46]:
exports = process('Percent Exports', 'exports')
exports

Unnamed: 0,name,code,year,exports
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,4.132
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,7.604
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [47]:
popdensity = process('Pop Density', 'pop_density')
popdensity

Unnamed: 0,name,code,year,pop_density
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [48]:
population = process('Population', 'pop')
population

Unnamed: 0,name,code,year,pop
0,Aruba,ABW,1960,54211.000
1,Afghanistan,AFG,1960,8996973.000
2,Angola,AGO,1960,5454933.000
3,Albania,ALB,1960,1608800.000
4,Andorra,AND,1960,13411.000
5,United Arab Emirates,ARE,1960,92418.000
6,Argentina,ARG,1960,20481779.000
7,Armenia,ARM,1960,1874121.000
8,American Samoa,ASM,1960,20123.000
9,Antigua and Barbuda,ATG,1960,54131.000


In [49]:
rural = process('Rural', 'rural')
rural

Unnamed: 0,name,code,year,rural
0,Aruba,ABW,1960,49.224
1,Afghanistan,AFG,1960,91.599
2,Angola,AGO,1960,89.565
3,Albania,ALB,1960,69.295
4,Andorra,AND,1960,41.550
5,United Arab Emirates,ARE,1960,26.500
6,Argentina,ARG,1960,26.389
7,Armenia,ARM,1960,48.725
8,American Samoa,ASM,1960,33.789
9,Antigua and Barbuda,ATG,1960,60.344


In [50]:
totalGDP = process('Total GDP', 'gdp')
totalGDP

Unnamed: 0,name,code,year,gdp
0,Aruba,ABW,1960,
1,Afghanistan,AFG,1960,
2,Angola,AGO,1960,
3,Albania,ALB,1960,
4,Andorra,AND,1960,
5,United Arab Emirates,ARE,1960,
6,Argentina,ARG,1960,115573868508.150
7,Armenia,ARM,1960,
8,American Samoa,ASM,1960,
9,Antigua and Barbuda,ATG,1960,


In [51]:
totalGDP.merge(fertility, on=['name', 'code', 'year'])

Unnamed: 0,name,code,year,gdp,fertility
0,Aruba,ABW,1960,,4.820
1,Afghanistan,AFG,1960,,7.450
2,Angola,AGO,1960,,6.708
3,Albania,ALB,1960,,6.489
4,Andorra,AND,1960,,
5,United Arab Emirates,ARE,1960,,6.929
6,Argentina,ARG,1960,115573868508.150,3.109
7,Armenia,ARM,1960,,4.786
8,American Samoa,ASM,1960,,
9,Antigua and Barbuda,ATG,1960,,4.425


In [52]:
# Debt Share
# Energy Usage
# Fertility Rate
# GDP Per Capita
# GINI
# Life Expectancy
# Literacy Rate
# Percent Exports
# Pop Density
# Population
# Rural
# Total GDP

worldbank = debt.merge(energy, on=['name', 'code', 'year'])
worldbank = worldbank.merge(fertility, on=['name', 'code', 'year'])
worldbank = worldbank.merge(GDPperCAP, on=['name', 'code', 'year'])
worldbank = worldbank.merge(gini, on=['name', 'code', 'year'])
worldbank = worldbank.merge(life, on=['name', 'code', 'year'])
worldbank = worldbank.merge(literacy, on=['name', 'code', 'year'])
worldbank = worldbank.merge(exports, on=['name', 'code', 'year'])
worldbank = worldbank.merge(popdensity, on=['name', 'code', 'year'])
worldbank = worldbank.merge(population, on=['name', 'code', 'year'])
worldbank = worldbank.merge(rural, on=['name', 'code', 'year'])
worldbank = worldbank.merge(totalGDP, on=['name', 'code', 'year'])
worldbank

Unnamed: 0,name,code,year,debt,energy,fertility,GDPpercap,gini,life_exp,literacy,exports,pop_density,pop,rural,gdp
0,Aruba,ABW,1960,,,4.820,,,65.662,,,,54211.000,49.224,
1,Afghanistan,AFG,1960,,,7.450,,,32.446,,4.132,,8996973.000,91.599,
2,Angola,AGO,1960,,,6.708,,,37.524,,,,5454933.000,89.565,
3,Albania,ALB,1960,,,6.489,,,62.283,,,,1608800.000,69.295,
4,Andorra,AND,1960,,,,,,,,,,13411.000,41.550,
5,United Arab Emirates,ARE,1960,,,6.929,,,51.537,,,,92418.000,26.500,
6,Argentina,ARG,1960,,,3.109,5642.765,,65.055,,7.604,,20481779.000,26.389,115573868508.150
7,Armenia,ARM,1960,,,4.786,,,65.972,,,,1874121.000,48.725,
8,American Samoa,ASM,1960,,,,,,,,,,20123.000,33.789,
9,Antigua and Barbuda,ATG,1960,,,4.425,,,61.968,,,,54131.000,60.344,


In [53]:
worldbank_codes = worldbank['code'].unique()
worldbank_codes

array(['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ASM',
       'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD',
       'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA',
       'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHI', 'CHL',
       'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI',
       'CUB', 'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK',
       'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN',
       'FJI', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR', 'GEO', 'GHA', 'GIB',
       'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUM',
       'GUY', 'HKG', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND',
       'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN',
       'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO',
       'LBN', 'LBR', 'LBY', 'LCA', 'LIE', 'LKA', 'LSO', 'LTU', 'LUX',
       'LVA', 'MAC', 'MAF', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEX',
       'MHL', 'MKD',

In [54]:
data = pd.read_csv('Modified_Data/VoteDiffsWithTotalAid.csv')
data.index.name = 'record'
data_codes = data['code'].unique()
data_codes

array(['AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'ARM', 'ATG', 'AUS',
       'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR',
       'BHS', 'BIH', 'BLR', 'BLZ', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN',
       'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN', 'CIV', 'CMR', 'COD',
       'COG', 'COL', 'COM', 'CPV', 'CRI', 'CSK', 'CUB', 'CYP', 'CZE',
       'DDR', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DZA', 'EAZ', 'ECU',
       'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FRA', 'FSM',
       'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC',
       'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IND',
       'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN',
       'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAO',
       'LBN', 'LBR', 'LBY', 'LCA', 'LIE', 'LKA', 'LSO', 'LTU', 'LUX',
       'LVA', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEX', 'MHL', 'MKD',
       'MLI', 'MLT', 'MMR', 'MNE', 'MNG', 'MOZ', 'MRT', 'MUS', 'MWI',
       'MYS', 'NAM',

In [55]:
missing_from_data = set(worldbank_codes) - set(data_codes)
missing_from_data

{'ABW',
 'ASM',
 'BMU',
 'CHI',
 'CUW',
 'CYM',
 'FRO',
 'GIB',
 'GRL',
 'GUM',
 'HKG',
 'IMN',
 'MAC',
 'MAF',
 'MNP',
 'NCL',
 'PRI',
 'PSE',
 'PYF',
 'SAS',
 'SRB',
 'SXM',
 'TCA',
 'VGB',
 'VIR',
 'XKX'}

In [56]:
missing_from_worldbank = set(data_codes) - set(worldbank_codes)
missing_from_worldbank

{'CSK', 'DDR', 'EAZ', 'NYEMEN', 'TWN', 'WGERMANY', 'YMD', 'YUG'}

In [57]:
foo = data[data['code'].isin(missing_from_worldbank)]
uniqueDATAnames = foo['name'].unique()
uniqueDATAnames

array(['Czechoslovakia', 'German Democratic Republic', 'Zanzibar',
       'Yemen Arab Republic', 'Taiwan, Province of China',
       'Federal Republic of Germany', "Yemen People's Republic",
       'Yugoslavia'], dtype=object)

In [58]:
bar = worldbank[worldbank['code'].isin(missing_from_data)]
uniqueWBnames = bar['name'].unique()
uniqueWBnames

array(['Aruba', 'American Samoa', 'Bermuda', 'Channel Islands', 'Curacao',
       'Cayman Islands', 'Faroe Islands', 'Gibraltar', 'Greenland',
       'Guam', 'Hong Kong SAR, China', 'Isle of Man', 'Macao SAR, China',
       'St. Martin (French part)', 'Northern Mariana Islands',
       'New Caledonia', 'Puerto Rico', 'West Bank and Gaza',
       'French Polynesia', 'South Asia', 'Serbia',
       'Sint Maarten (Dutch part)', 'Turks and Caicos Islands',
       'British Virgin Islands', 'Virgin Islands (U.S.)', 'Kosovo'],
      dtype=object)

In [59]:
# build informal spreadsheet of all codes and names to help identify possible substitutions or variations

datacodes = pd.Series(data['code'].unique())
datanames = pd.Series(data['name'].unique())
bankcodes = pd.Series(worldbank['code'].unique())
banknames = pd.Series(worldbank['name'].unique())

In [60]:
a = {'datacodes': datacodes, 'datanames': datanames}
a = pd.DataFrame(data=a)
a.fillna(0)
a

Unnamed: 0,datacodes,datanames
0,AFG,Afghanistan
1,AGO,Angola
2,ALB,Albania
3,AND,Andorra
4,ARE,United Arab Emirates
5,ARG,Argentina
6,ARM,Armenia
7,ATG,Antigua and Barbuda
8,AUS,Australia
9,AUT,Austria


In [61]:
b = {'bankcodes': bankcodes, 'banknames': banknames}
b = pd.DataFrame(data=b)
b.fillna(0)
b

Unnamed: 0,bankcodes,banknames
0,ABW,Aruba
1,AFG,Afghanistan
2,AGO,Angola
3,ALB,Albania
4,AND,Andorra
5,ARE,United Arab Emirates
6,ARG,Argentina
7,ARM,Armenia
8,ASM,American Samoa
9,ATG,Antigua and Barbuda


In [62]:
# There do not appear to be any duplicates or variations. Kosovo, Bermuda, and the other items on the 
# World Bank database that do not appear in the UN database are not recognized as UN member countries.
# Conversely, the World Bank did not recognize the split in the governments of Germany or Yemen.

a.merge(b, left_on='datacodes', right_on='bankcodes', how='outer')

Unnamed: 0,datacodes,datanames,bankcodes,banknames
0,AFG,Afghanistan,AFG,Afghanistan
1,AGO,Angola,AGO,Angola
2,ALB,Albania,ALB,Albania
3,AND,Andorra,AND,Andorra
4,ARE,United Arab Emirates,ARE,United Arab Emirates
5,ARG,Argentina,ARG,Argentina
6,ARM,Armenia,ARM,Armenia
7,ATG,Antigua and Barbuda,ATG,Antigua and Barbuda
8,AUS,Australia,AUS,Australia
9,AUT,Austria,AUT,Austria


In [63]:
# Drop rows from the main worldbank table where there is no corresponding UN country

nomatch = missing_from_worldbank | missing_from_data
nomatch

{'ABW',
 'ASM',
 'BMU',
 'CHI',
 'CSK',
 'CUW',
 'CYM',
 'DDR',
 'EAZ',
 'FRO',
 'GIB',
 'GRL',
 'GUM',
 'HKG',
 'IMN',
 'MAC',
 'MAF',
 'MNP',
 'NCL',
 'NYEMEN',
 'PRI',
 'PSE',
 'PYF',
 'SAS',
 'SRB',
 'SXM',
 'TCA',
 'TWN',
 'VGB',
 'VIR',
 'WGERMANY',
 'XKX',
 'YMD',
 'YUG'}

In [64]:
disposables = worldbank['code'].isin(nomatch)
disposables = disposables[disposables].index
disposables

Int64Index([    0,     8,    24,    34,    46,    47,    65,    71,    78,
               80,
            ...
            12801, 12805, 12806, 12811, 12821, 12829, 12832, 12852, 12853,
            12857],
           dtype='int64', length=1534)

In [65]:
worldbank = worldbank.drop(disposables)
worldbank

Unnamed: 0,name,code,year,debt,energy,fertility,GDPpercap,gini,life_exp,literacy,exports,pop_density,pop,rural,gdp
1,Afghanistan,AFG,1960,,,7.450,,,32.446,,4.132,,8996973.000,91.599,
2,Angola,AGO,1960,,,6.708,,,37.524,,,,5454933.000,89.565,
3,Albania,ALB,1960,,,6.489,,,62.283,,,,1608800.000,69.295,
4,Andorra,AND,1960,,,,,,,,,,13411.000,41.550,
5,United Arab Emirates,ARE,1960,,,6.929,,,51.537,,,,92418.000,26.500,
6,Argentina,ARG,1960,,,3.109,5642.765,,65.055,,7.604,,20481779.000,26.389,115573868508.150
7,Armenia,ARM,1960,,,4.786,,,65.972,,,,1874121.000,48.725,
9,Antigua and Barbuda,ATG,1960,,,4.425,,,61.968,,,,54131.000,60.344,
10,Australia,AUS,1960,,3063.554,3.453,19378.144,,70.817,,12.991,,10276477.000,18.471,199139047718.850
11,Austria,AUT,1960,,1546.261,2.690,13031.032,,68.586,,23.248,,7047539.000,35.280,91836703339.523


In [66]:
worldbank['year'] = worldbank['year'].astype(int)

In [67]:
# Merge World Bank data into general voting / aid database
combined_data = data.merge(worldbank, on=['code', 'year'])
combined_data

Unnamed: 0.1,Unnamed: 0,issue,membership,vote_foreign,code,name_x,year,aid,total_aid,vote_usa,...,fertility,GDPpercap,gini,life_exp,literacy,exports,pop_density,pop,rural,gdp
0,641,627.000,1.000,3,AFG,Afghanistan,1960,69454739.000,673372854.000,1,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
1,642,628.000,1.000,2,AFG,Afghanistan,1960,69454739.000,673372854.000,1,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
2,643,629.000,1.000,1,AFG,Afghanistan,1960,69454739.000,673372854.000,3,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
3,644,630.000,1.000,3,AFG,Afghanistan,1960,69454739.000,673372854.000,1,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
4,645,631.000,1.000,1,AFG,Afghanistan,1960,69454739.000,673372854.000,3,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
5,646,632.000,1.000,1,AFG,Afghanistan,1960,69454739.000,673372854.000,3,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
6,647,633.000,1.000,1,AFG,Afghanistan,1960,69454739.000,673372854.000,3,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
7,648,634.000,1.000,3,AFG,Afghanistan,1960,69454739.000,673372854.000,1,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
8,649,635.000,1.000,3,AFG,Afghanistan,1960,69454739.000,673372854.000,1,...,7.450,,,32.446,,4.132,,8996973.000,91.599,
9,650,636.000,1.000,3,AFG,Afghanistan,1960,69454739.000,673372854.000,1,...,7.450,,,32.446,,4.132,,8996973.000,91.599,


In [68]:
# Export combined data to new file
combined_data.to_csv("Modified_Data/AidVotesBank.csv")

In [69]:
combined_data.describe()

Unnamed: 0.1,Unnamed: 0,issue,membership,vote_foreign,year,aid,total_aid,vote_usa,vote_diff,debt,...,fertility,GDPpercap,gini,life_exp,literacy,exports,pop_density,pop,rural,gdp
count,790903.0,790903.0,790903.0,790903.0,790903.0,790903.0,790903.0,790903.0,790903.0,91287.0,...,771601.0,693105.0,109955.0,772575.0,61942.0,653501.0,772524.0,790238.0,790446.0,693313.0
mean,421832.579,3339.162,1.0,1.32,1990.755,148387622.282,6418645108.371,2.31,1.956,58.596,...,4.105,10401.672,39.533,63.809,77.766,34.259,176.926,30552226.378,50.737,252759947967.113
std,243855.008,1446.835,0.0,0.576,14.91,667522295.043,17147946489.653,0.825,1.727,90.283,...,2.019,16865.791,9.589,11.245,22.644,24.873,921.989,113625100.005,24.518,1006875883935.275
min,641.0,627.0,1.0,1.0,1960.0,-16211959.0,0.0,1.0,0.0,1.89,...,1.052,133.967,21.0,18.907,5.405,0.005,0.632,9394.0,0.0,29010251.779
25%,212772.5,2121.0,1.0,1.0,1980.0,98893.0,101368922.0,2.0,0.0,29.998,...,2.14,1108.586,32.0,55.139,65.145,17.81,17.697,1823216.0,30.846,3988772862.249
50%,422440.0,3422.0,1.0,1.0,1989.0,11467648.0,834375199.0,3.0,1.0,48.353,...,3.843,3358.096,37.7,66.911,87.804,28.552,51.552,6443751.0,52.085,16181972308.499
75%,634207.5,4538.0,1.0,2.0,2004.0,69910006.0,3910736331.0,3.0,4.0,69.734,...,5.979,12328.156,46.8,72.605,94.9,44.346,122.21,19413000.0,71.465,124233662847.219
max,851919.0,9056.0,1.0,3.0,2017.0,14712672876.0,227634027836.0,3.0,4.0,2007.964,...,8.462,193745.575,65.8,85.417,99.998,228.994,19196.0,1386395000.0,97.846,17348626599500.0


In [70]:
# Data quality check -- confirm that a random sample of GDP per capita is actually equal to GDP divided by population
test = combined_data.loc[10500:15600, ['gdp', 'pop', 'GDPpercap']]
test = test['gdp'] / test['pop'] / test['GDPpercap']
test.describe()

count   5101.000
mean       1.000
std        0.000
min        1.000
25%        1.000
50%        1.000
75%        1.000
max        1.000
dtype: float64

In [71]:
# Display 'pandas profiling' overview of data to attempt to confirm that all variables are working as intended.
# This tool does not esem particularly useful for assessing the data; there are either too many variables, or
# they are not quantitative enough. For some reason the tool is claiming that 100.0% of variables ar emissing, even
# though it then goes on to provide non-zero summary statistics for the supposedly missing variables.

import pandas_profiling
# don't use combined_data.profile_report()
pandas_profiling.ProfileReport(combined_data)

0,1
Number of variables,24
Number of observations,790903
Total Missing (%),0.0%
Total size in memory,170.9 MiB
Average record size in memory,226.0 B

0,1
Numeric,19
Categorical,3
Boolean,0
Date,0
Text (Unique),0
Rejected,2
Unsupported,0

0,1
Distinct count,8007
Unique (%),0.0%
Missing (%),100.0%
Missing (n),97798
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,10402
Minimum,133.97
Maximum,193750
Zeros (%),0.0%

0,1
Minimum,133.97
5-th percentile,367.74
Q1,1108.6
Median,3358.1
Q3,12328.0
95-th percentile,42744.0
Maximum,193750.0
Range,193610.0
Interquartile range,11220.0

0,1
Standard deviation,16866
Coef of variation,1.6215
Kurtosis,19.398
Mean,10402
MAD,11150
Skewness,3.5095
Sum,7209500000
Variance,284450000
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
1580.22589398,178,0.0%,
6280.14824383,178,0.0%,
343.948157013,178,0.0%,
471.487504767,178,0.0%,
374.919194484,178,0.0%,
1701.09258633,178,0.0%,
1209.24779764,178,0.0%,
596.114674302,178,0.0%,
453.376551872,178,0.0%,
16955.2629228,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
133.967431356,155,0.0%,
137.595720053,155,0.0%,
137.616315023,151,0.0%,
144.986311733,55,0.0%,
149.506421201,149,0.0%,

Value,Count,Frequency (%),Unnamed: 3
172177.174976,60,0.0%,
182457.233647,80,0.0%,
185396.378109,94,0.0%,
189464.583635,78,0.0%,
193745.574986,75,0.0%,

0,1
Distinct count,790903
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,421830
Minimum,641
Maximum,851919
Zeros (%),0.0%

0,1
Minimum,641
5-th percentile,41926
Q1,212770
Median,422440
Q3,634210
95-th percentile,801420
Maximum,851919
Range,851278
Interquartile range,421440

0,1
Standard deviation,243860
Coef of variation,0.57808
Kurtosis,-1.1918
Mean,421830
MAD,210760
Skewness,-0.0014931
Sum,333628652432
Variance,59465000000
Memory size,32.1 MiB

Value,Count,Frequency (%),Unnamed: 3
2047,1,0.0%,
798511,1,0.0%,
194292,1,0.0%,
182002,1,0.0%,
188145,1,0.0%,
143087,1,0.0%,
141038,1,0.0%,
147181,1,0.0%,
145132,1,0.0%,
134891,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
641,1,0.0%,
642,1,0.0%,
643,1,0.0%,
644,1,0.0%,
645,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
851915,1,0.0%,
851916,1,0.0%,
851917,1,0.0%,
851918,1,0.0%,
851919,1,0.0%,

0,1
Distinct count,7267
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,148390000
Minimum,-16212000
Maximum,14713000000
Zeros (%),0.0%

0,1
Minimum,-16212000
5-th percentile,0
Q1,98893
Median,11468000
Q3,69910000
95-th percentile,539560000
Maximum,14713000000
Range,14729000000
Interquartile range,69811000

0,1
Standard deviation,667520000
Coef of variation,4.4985
Kurtosis,155.12
Mean,148390000
MAD,212810000
Skewness,10.907
Sum,1.1736e+14
Variance,4.4559e+17
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
0.0,163230,0.0%,
100529.0,426,0.0%,
2892748.0,378,0.0%,
17717.0,320,0.0%,
89250.0,310,0.0%,
98893.0,310,0.0%,
5349941.0,310,0.0%,
87266.0,310,0.0%,
1654041.0,310,0.0%,
828108.0,304,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-16211959.0,66,0.0%,
-2336039.0,75,0.0%,
-2329987.0,60,0.0%,
-1952463.0,66,0.0%,
-1768721.0,68,0.0%,

Value,Count,Frequency (%),Unnamed: 3
11596966791.0,87,0.0%,
12180603528.0,66,0.0%,
13529847564.0,147,0.0%,
14120554665.0,70,0.0%,
14712672876.0,68,0.0%,

0,1
Distinct count,192
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
TUN,5029
ESP,5029
PHL,5029
Other values (189),775816

Value,Count,Frequency (%),Unnamed: 3
TUN,5029,0.0%,
ESP,5029,0.0%,
PHL,5029,0.0%,
NPL,5029,0.0%,
HTI,5029,0.0%,
SEN,5029,0.0%,
GIN,5029,0.0%,
GTM,5029,0.0%,
SLV,5029,0.0%,
GAB,5029,0.0%,

0,1
Distinct count,1266
Unique (%),0.0%
Missing (%),100.0%
Missing (n),699616
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,58.596
Minimum,1.89
Maximum,2008
Zeros (%),0.0%

0,1
Minimum,1.89
5-th percentile,11.616
Q1,29.998
Median,48.353
Q3,69.734
95-th percentile,123.9
Maximum,2008.0
Range,2006.1
Interquartile range,39.736

0,1
Standard deviation,90.283
Coef of variation,1.5408
Kurtosis,335.88
Mean,58.596
MAD,30.821
Skewness,16.919
Sum,5349000
Variance,8151.1
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
1772.19904345,126,0.0%,
39.1284001999,117,0.0%,
48.6595642383,94,0.0%,
42.9255368597,87,0.0%,
9.89107286043,87,0.0%,
43.5560524508,87,0.0%,
50.4944776193,87,0.0%,
82.8374805428,87,0.0%,
40.5884641486,87,0.0%,
69.0268963477,87,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1.88995376231,74,0.0%,
1.89352238938,60,0.0%,
2.43434946546,86,0.0%,
2.53287319182,75,0.0%,
2.80255434133,65,0.0%,

Value,Count,Frequency (%),Unnamed: 3
244.380536439,86,0.0%,
277.530307987,74,0.0%,
289.844727084,86,0.0%,
1772.19904345,126,0.0%,
2007.96418969,67,0.0%,

0,1
Distinct count,5623
Unique (%),0.0%
Missing (%),100.0%
Missing (n),285753
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2154.3
Minimum,0
Maximum,22120
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,274.62
Q1,480.09
Median,997.53
Q3,2881.5
95-th percentile,7743.7
Maximum,22120.0
Range,22120.0
Interquartile range,2401.4

0,1
Standard deviation,2631.7
Coef of variation,1.2216
Kurtosis,9.0887
Mean,2154.3
MAD,1835.8
Skewness,2.6086
Sum,1088200000
Variance,6925800
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
0.0,819,0.0%,
434.2134617,178,0.0%,
603.489348474,178,0.0%,
289.316544034,178,0.0%,
360.235637656,178,0.0%,
449.724795855,178,0.0%,
472.094743356,178,0.0%,
299.104911698,178,0.0%,
350.058709862,178,0.0%,
547.642151085,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0,819,0.0%,
9.54803137182,87,0.0%,
9.56252937961,77,0.0%,
13.9585431269,72,0.0%,
16.5321219129,74,0.0%,

Value,Count,Frequency (%),Unnamed: 3
19621.8724187,67,0.0%,
19838.9066222,68,0.0%,
20727.9579576,74,0.0%,
21300.2069069,73,0.0%,
22120.3715749,72,0.0%,

0,1
Distinct count,7558
Unique (%),0.0%
Missing (%),100.0%
Missing (n),137402
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,34.259
Minimum,0.0053768
Maximum,228.99
Zeros (%),0.0%

0,1
Minimum,0.0053768
5-th percentile,8.2461
Q1,17.81
Median,28.552
Q3,44.346
95-th percentile,76.335
Maximum,228.99
Range,228.99
Interquartile range,26.536

0,1
Standard deviation,24.873
Coef of variation,0.72603
Kurtosis,10.999
Mean,34.259
MAD,17.41
Skewness,2.5008
Sum,22388000
Variance,618.68
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
16.077170418,243,0.0%,
37.2050049387,178,0.0%,
22.174090142,178,0.0%,
62.3317878548,178,0.0%,
14.5789534136,178,0.0%,
29.7245825438,178,0.0%,
17.9127725857,178,0.0%,
32.3814560197,178,0.0%,
17.9150533573,178,0.0%,
11.0057080159,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.00537675930738,79,0.0%,
0.00916177369601,76,0.0%,
0.0355659907118,68,0.0%,
0.0755486839761,65,0.0%,
0.0994649125402,68,0.0%,

Value,Count,Frequency (%),Unnamed: 3
223.616412561,75,0.0%,
224.840709934,78,0.0%,
225.159667737,74,0.0%,
228.037617061,87,0.0%,
228.993770935,76,0.0%,

0,1
Distinct count,4366
Unique (%),0.0%
Missing (%),100.0%
Missing (n),19302
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.1047
Minimum,1.052
Maximum,8.462
Zeros (%),0.0%

0,1
Minimum,1.052
5-th percentile,1.45
Q1,2.14
Median,3.843
Q3,5.979
95-th percentile,7.279
Maximum,8.462
Range,7.41
Interquartile range,3.839

0,1
Standard deviation,2.0187
Coef of variation,0.4918
Kurtosis,-1.349
Mean,4.1047
MAD,1.806
Skewness,0.22054
Sum,3167200
Variance,4.0752
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
1.75,2432,0.0%,
2.01,2409,0.0%,
1.66,2306,0.0%,
1.74,2212,0.0%,
1.93,2057,0.0%,
1.45,2006,0.0%,
1.9,1937,0.0%,
1.8,1886,0.0%,
1.71,1880,0.0%,
1.6,1873,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1.052,94,0.0%,
1.076,74,0.0%,
1.085,67,0.0%,
1.09,132,0.0%,
1.11,199,0.0%,

Value,Count,Frequency (%),Unnamed: 3
8.444,134,0.0%,
8.451,108,0.0%,
8.459,74,0.0%,
8.461,103,0.0%,
8.462,147,0.0%,

0,1
Distinct count,8001
Unique (%),0.0%
Missing (%),100.0%
Missing (n),97590
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2.5276e+11
Minimum,29010000
Maximum,1.7349e+13
Zeros (%),0.0%

0,1
Minimum,29010000.0
5-th percentile,433580000.0
Q1,3988800000.0
Median,16182000000.0
Q3,124230000000.0
95-th percentile,1148600000000.0
Maximum,17349000000000.0
Range,17349000000000.0
Interquartile range,120240000000.0

0,1
Standard deviation,1.0069e+12
Coef of variation,3.9835
Kurtosis,113.43
Mean,2.5276e+11
MAD,3.6223e+11
Skewness,9.4068
Sum,1.7524e+17
Variance,1.0138e+24
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
7803889933.5,315,0.0%,
25354505077.5,294,0.0%,
1311353247.76,258,0.0%,
1.97653949751e+11,203,0.0%,
1249712652.43,193,0.0%,
56223777003.8,178,0.0%,
2865823701.88,178,0.0%,
24644704390.5,178,0.0%,
23109494479.6,178,0.0%,
12938733999.3,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
29010251.7794,66,0.0%,
29186811.8705,74,0.0%,
29484310.5211,67,0.0%,
29685560.4745,77,0.0%,
29811099.7764,87,0.0%,

Value,Count,Frequency (%),Unnamed: 3
15853795607800.0,60,0.0%,
16242526401300.0,80,0.0%,
16710459044300.0,78,0.0%,
16972347893400.0,75,0.0%,
17348626599500.0,94,0.0%,

0,1
Distinct count,362
Unique (%),0.0%
Missing (%),100.0%
Missing (n),680948
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,39.533
Minimum,21
Maximum,65.8
Zeros (%),0.0%

0,1
Minimum,21.0
5-th percentile,26.6
Q1,32.0
Median,37.7
Q3,46.8
95-th percentile,57.4
Maximum,65.8
Range,44.8
Interquartile range,14.8

0,1
Standard deviation,9.589
Coef of variation,0.24256
Kurtosis,-0.73096
Mean,39.533
MAD,8.0775
Skewness,0.47863
Sum,4346800
Variance,91.948
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
33.2,974,0.0%,
32.5,863,0.0%,
26.8,833,0.0%,
35.7,822,0.0%,
29.0,811,0.0%,
35.4,802,0.0%,
32.4,801,0.0%,
32.6,799,0.0%,
33.6,796,0.0%,
35.8,743,0.0%,

Value,Count,Frequency (%),Unnamed: 3
21.0,142,0.0%,
23.3,117,0.0%,
23.7,76,0.0%,
24.0,80,0.0%,
24.4,164,0.0%,

Value,Count,Frequency (%),Unnamed: 3
63.3,191,0.0%,
63.4,66,0.0%,
64.7,73,0.0%,
64.8,74,0.0%,
65.8,70,0.0%,

0,1
Distinct count,4940
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3339.2
Minimum,627
Maximum,9056
Zeros (%),0.0%

0,1
Minimum,627
5-th percentile,1001
Q1,2121
Median,3422
Q3,4538
95-th percentile,5441
Maximum,9056
Range,8429
Interquartile range,2417

0,1
Standard deviation,1446.8
Coef of variation,0.43329
Kurtosis,-0.5598
Mean,3339.2
MAD,1236.3
Skewness,0.033767
Sum,2641000000
Variance,2093300
Memory size,32.1 MiB

Value,Count,Frequency (%),Unnamed: 3
1736.0,225,0.0%,
1696.0,225,0.0%,
1698.0,225,0.0%,
1699.0,225,0.0%,
1700.0,225,0.0%,
1701.0,225,0.0%,
1704.0,225,0.0%,
1705.0,225,0.0%,
1707.0,225,0.0%,
1708.0,225,0.0%,

Value,Count,Frequency (%),Unnamed: 3
627.0,96,0.0%,
628.0,96,0.0%,
629.0,96,0.0%,
630.0,96,0.0%,
631.0,96,0.0%,

Value,Count,Frequency (%),Unnamed: 3
9052.0,118,0.0%,
9053.0,118,0.0%,
9054.0,118,0.0%,
9055.0,118,0.0%,
9056.0,118,0.0%,

0,1
Distinct count,8137
Unique (%),0.0%
Missing (%),100.0%
Missing (n),18328
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,63.809
Minimum,18.907
Maximum,85.417
Zeros (%),0.0%

0,1
Minimum,18.907
5-th percentile,43.712
Q1,55.139
Median,66.911
Q3,72.605
95-th percentile,78.632
Maximum,85.417
Range,66.51
Interquartile range,17.466

0,1
Standard deviation,11.245
Coef of variation,0.17624
Kurtosis,-0.53201
Mean,63.809
MAD,9.5127
Skewness,-0.5755
Sum,49297000
Variance,126.46
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
62.869,406,0.0%,
67.723,400,0.0%,
51.888,387,0.0%,
72.3,385,0.0%,
45.63,369,0.0%,
58.671,345,0.0%,
68.283,333,0.0%,
62.079,327,0.0%,
74.5,317,0.0%,
55.772,310,0.0%,

Value,Count,Frequency (%),Unnamed: 3
18.907,98,0.0%,
19.725,108,0.0%,
20.317,89,0.0%,
22.744,147,0.0%,
23.595,70,0.0%,

Value,Count,Frequency (%),Unnamed: 3
83.6024390244,169,0.0%,
83.793902439,78,0.0%,
83.9848780488,75,0.0%,
84.0997560976,94,0.0%,
85.4170731707,70,0.0%,

0,1
Distinct count,764
Unique (%),0.0%
Missing (%),100.0%
Missing (n),728961
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,77.766
Minimum,5.4047
Maximum,99.998
Zeros (%),0.0%

0,1
Minimum,5.4047
5-th percentile,29.227
Q1,65.145
Median,87.804
Q3,94.9
95-th percentile,99.513
Maximum,99.998
Range,94.594
Interquartile range,29.755

0,1
Standard deviation,22.644
Coef of variation,0.29118
Kurtosis,0.33442
Mean,77.766
MAD,18.566
Skewness,-1.1503
Sum,4817000
Variance,512.74
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
98.7427368164,309,0.0%,
99.7,232,0.0%,
98.5999984741,219,0.0%,
99.5999984741,219,0.0%,
98.95,213,0.0%,
36.5183982849,178,0.0%,
55.3250389099,178,0.0%,
38.1967582703,178,0.0%,
41.2162094116,178,0.0%,
63.2081489563,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
5.40465021133,109,0.0%,
8.68515014648,70,0.0%,
9.43381023407,178,0.0%,
10.8946504593,65,0.0%,
12.8481702805,76,0.0%,

Value,Count,Frequency (%),Unnamed: 3
99.98055,80,0.0%,
99.98383,78,0.0%,
99.98657,75,0.0%,
99.99486,60,0.0%,
99.99819,76,0.0%,

0,1
Constant value,1

0,1
Distinct count,192
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Colombia,5029
Guinea,5029
Austria,5029
Other values (189),775816

Value,Count,Frequency (%),Unnamed: 3
Colombia,5029,0.0%,
Guinea,5029,0.0%,
Austria,5029,0.0%,
India,5029,0.0%,
Cyprus,5029,0.0%,
Tunisia,5029,0.0%,
Paraguay,5029,0.0%,
Democratic Republic of the Congo,5029,0.0%,
Afghanistan,5029,0.0%,
Turkey,5029,0.0%,

0,1
Distinct count,192
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Colombia,5029
Mexico,5029
Ecuador,5029
Other values (189),775816

Value,Count,Frequency (%),Unnamed: 3
Colombia,5029,0.0%,
Mexico,5029,0.0%,
Ecuador,5029,0.0%,
Brazil,5029,0.0%,
Honduras,5029,0.0%,
Nepal,5029,0.0%,
Morocco,5029,0.0%,
Greece,5029,0.0%,
"Congo, Dem. Rep.",5029,0.0%,
Sri Lanka,5029,0.0%,

0,1
Distinct count,9078
Unique (%),0.0%
Missing (%),100.0%
Missing (n),665
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,30552000
Minimum,9394
Maximum,1386400000
Zeros (%),0.0%

0,1
Minimum,9394
5-th percentile,108040
Q1,1823200
Median,6443800
Q3,19413000
95-th percentile,104510000
Maximum,1386400000
Range,1386400000
Interquartile range,17590000

0,1
Standard deviation,113630000
Coef of variation,3.719
Kurtosis,80.153
Mean,30552000
MAD,38893000
Skewness,8.5241
Sum,2.4144e+13
Variance,1.2911e+16
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
61786.0,404,0.0%,
104506.0,225,0.0%,
18198844.0,178,0.0%,
305774.0,178,0.0%,
557805.0,178,0.0%,
6713950.0,178,0.0%,
7187787.0,178,0.0%,
2055365.0,178,0.0%,
3770871.0,178,0.0%,
2894972.0,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
9394.0,66,0.0%,
9484.0,67,0.0%,
9596.0,73,0.0%,
9726.0,74,0.0%,
9828.0,87,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1357380000.0,60,0.0%,
1364270000.0,80,0.0%,
1371220000.0,78,0.0%,
1378665000.0,75,0.0%,
1386395000.0,94,0.0%,

0,1
Distinct count,8849
Unique (%),0.0%
Missing (%),100.0%
Missing (n),18379
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,176.93
Minimum,0.63221
Maximum,19196
Zeros (%),0.0%

0,1
Minimum,0.63221
5-th percentile,3.1071
Q1,17.697
Median,51.552
Q3,122.21
95-th percentile,457.91
Maximum,19196.0
Range,19195.0
Interquartile range,104.51

0,1
Standard deviation,921.99
Coef of variation,5.2111
Kurtosis,275.73
Mean,176.93
MAD,206.8
Skewness,15.671
Sum,136680000
Variance,850060
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
140.422727273,306,0.0%,
39.602923301,178,0.0%,
22.1794619198,178,0.0%,
33.128149,178,0.0%,
18.5032275909,178,0.0%,
204.99527027,178,0.0%,
37.950658956,178,0.0%,
146.840771028,178,0.0%,
71.1609307359,178,0.0%,
21.1130503145,178,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.632211179485,53,0.0%,
0.650971961173,66,0.0%,
0.670964108242,32,0.0%,
0.712636782615,42,0.0%,
0.733773397873,54,0.0%,

Value,Count,Frequency (%),Unnamed: 3
18450.5,60,0.0%,
18661.0,80,0.0%,
18859.0,78,0.0%,
19035.0,75,0.0%,
19196.0,94,0.0%,

0,1
Distinct count,8480
Unique (%),0.0%
Missing (%),100.0%
Missing (n),457
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,50.737
Minimum,0
Maximum,97.846
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,10.865
Q1,30.846
Median,52.085
Q3,71.465
95-th percentile,87.612
Maximum,97.846
Range,97.846
Interquartile range,40.619

0,1
Standard deviation,24.518
Coef of variation,0.48323
Kurtosis,-1.0305
Mean,50.737
MAD,20.984
Skewness,-0.129
Sum,40105000
Variance,601.12
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
0.0,9070,0.0%,
16.9,1388,0.0%,
78.8,1328,0.0%,
20.2,445,0.0%,
77.67,427,0.0%,
57.215,341,0.0%,
59.961,325,0.0%,
25.356,321,0.0%,
74.586,315,0.0%,
10.172,310,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0,9070,0.0%,
0.092,67,0.0%,
0.922,94,0.0%,
0.985,75,0.0%,
1.0,67,0.0%,

Value,Count,Frequency (%),Unnamed: 3
97.499,55,0.0%,
97.605,54,0.0%,
97.706,42,0.0%,
97.807,32,0.0%,
97.846,45,0.0%,

0,1
Distinct count,7372
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,6418600000
Minimum,0
Maximum,2.2763e+11
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,0.0
Q1,101370000.0
Median,834380000.0
Q3,3910700000.0
95-th percentile,38627000000.0
Maximum,227630000000.0
Range,227630000000.0
Interquartile range,3809400000.0

0,1
Standard deviation,17148000000
Coef of variation,2.6716
Kurtosis,41.002
Mean,6418600000
MAD,8627100000
Skewness,5.4122
Sum,5.0765e+15
Variance,2.9405e+2
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
0.0,65131,0.0%,
904400464.0,4284,0.0%,
9052308041.0,3616,0.0%,
62853319.0,3559,0.0%,
219785377.0,3495,0.0%,
6909157029.0,3307,0.0%,
1510662604.0,3200,0.0%,
67015426711.0,3004,0.0%,
30046670152.0,2986,0.0%,
202695049.0,2980,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0,65131,0.0%,
2215.0,160,0.0%,
4411.0,94,0.0%,
4733.0,134,0.0%,
6403.0,106,0.0%,

Value,Count,Frequency (%),Unnamed: 3
214631475424.0,60,0.0%,
217887419244.0,80,0.0%,
221276121409.0,78,0.0%,
224442954774.0,75,0.0%,
227634027836.0,94,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.9563
Minimum,0
Maximum,4
Zeros (%),0.0%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,1
Q3,4
95-th percentile,4
Maximum,4
Range,4
Interquartile range,4

0,1
Standard deviation,1.7273
Coef of variation,0.88294
Kurtosis,-1.7521
Mean,1.9563
MAD,1.6535
Skewness,0.23888
Sum,1547239
Variance,2.9835
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
4,319941,0.0%,
1,267475,0.0%,
0,203487,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,203487,0.0%,
1,267475,0.0%,
4,319941,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,203487,0.0%,
1,267475,0.0%,
4,319941,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.32
Minimum,1
Maximum,3
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,1
Q3,2
95-th percentile,3
Maximum,3
Range,2
Interquartile range,1

0,1
Standard deviation,0.57551
Coef of variation,0.436
Kurtosis,1.5983
Mean,1.32
MAD,0.47154
Skewness,1.6271
Sum,1043970
Variance,0.33121
Memory size,32.1 MiB

Value,Count,Frequency (%),Unnamed: 3
1,582769,0.0%,
2,163201,0.0%,
3,44933,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,582769,0.0%,
2,163201,0.0%,
3,44933,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,582769,0.0%,
2,163201,0.0%,
3,44933,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2.3097
Minimum,1
Maximum,3
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,2
Median,3
Q3,3
95-th percentile,3
Maximum,3
Range,2
Interquartile range,1

0,1
Standard deviation,0.82477
Coef of variation,0.35709
Kurtosis,-1.2462
Mean,2.3097
MAD,0.74957
Skewness,-0.62741
Sum,1826741
Variance,0.68024
Memory size,12.1 MiB

Value,Count,Frequency (%),Unnamed: 3
3,429397,0.0%,
1,184462,0.0%,
2,177044,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,184462,0.0%,
2,177044,0.0%,
3,429397,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,184462,0.0%,
2,177044,0.0%,
3,429397,0.0%,

0,1
Correlation,0.94264

Unnamed: 0.1,Unnamed: 0,issue,membership,vote_foreign,code,name_x,year,aid,total_aid,vote_usa,vote_diff,name_y,debt,energy,fertility,GDPpercap,gini,life_exp,literacy,exports,pop_density,pop,rural,gdp
0,641,627.0,1.0,3,AFG,Afghanistan,1960,69454739.0,673372854.0,1,4,Afghanistan,,,7.45,,,32.446,,4.132,,8996973.0,91.599,
1,642,628.0,1.0,2,AFG,Afghanistan,1960,69454739.0,673372854.0,1,1,Afghanistan,,,7.45,,,32.446,,4.132,,8996973.0,91.599,
2,643,629.0,1.0,1,AFG,Afghanistan,1960,69454739.0,673372854.0,3,4,Afghanistan,,,7.45,,,32.446,,4.132,,8996973.0,91.599,
3,644,630.0,1.0,3,AFG,Afghanistan,1960,69454739.0,673372854.0,1,4,Afghanistan,,,7.45,,,32.446,,4.132,,8996973.0,91.599,
4,645,631.0,1.0,1,AFG,Afghanistan,1960,69454739.0,673372854.0,3,4,Afghanistan,,,7.45,,,32.446,,4.132,,8996973.0,91.599,
