In [49]:
import numpy as np
import pandas as pd
import wbdata
from sklearn.preprocessing import Imputer

In [50]:
columns = ['Country', 'Year', 'Status', 'Life_Expectancy', 
           'Adult_Mortality', 'Infant_Deaths', 'Alcohol', 
           'Percentage_Expenditure', 'Measles', 'BMI', 
           'Under-Five_Deaths ', 'Polio', 'Total_Expenditure', 
           'Diphtheria', 'HIV/AIDS', 'GDP', 'Population',
           'Thinness_1-19_years', 'Thinness_5-9_years',
           'Income_Composition_Of_Resources', 'Schooling']
main_data = pd.DataFrame(columns=columns)

In [51]:
main_data

Unnamed: 0,Country,Year,Status,Life_Expectancy,Adult_Mortality,Infant_Deaths,Alcohol,Percentage_Expenditure,Measles,BMI,...,Polio,Total_Expenditure,Diphtheria,HIV/AIDS,GDP,Population,Thinness_1-19_years,Thinness_5-9_years,Income_Composition_Of_Resources,Schooling


## Population Data

In [52]:
#wbdata.search_indicators('Population')

In [53]:
#set up the indicator I want (just build up the dict if you want more than one)
indicators = {'SP.POP.TOTL':'Population'}

In [54]:
#grab indicators above for all countries and load into data frame
pop_df_in = wbdata.get_dataframe(indicators, convert_date=False)

In [55]:
pop_df_in.to_csv("Data/New/Population/Population_Internet.csv", encoding='utf-8')

In [56]:
pop_df_get = pd.read_csv("Data/New/Population/Population_Internet.csv")
pop_df = pop_df_get.rename(index = str, columns={'date':'Year', 'country':'Country'})
len(pop_df['Country'].unique())

264

In [57]:
# Removing continents and other trivial rows
pop_df.drop(pop_df.index[:2773], inplace=True)
len(pop_df['Country'].unique())

217

In [58]:
# Year: 2000 - 2014
pop_df = pop_df[(lambda x: x['Year'].isin(range(2000,2015)))]

In [59]:
pop_df.reset_index(drop=True, inplace=True)

In [60]:
pop_df.head()

Unnamed: 0,Country,Year,Population
0,Afghanistan,2014,32758020.0
1,Afghanistan,2013,31731688.0
2,Afghanistan,2012,30696958.0
3,Afghanistan,2011,29708599.0
4,Afghanistan,2010,28803167.0


In [61]:
# Fixing Missing Values
pop_df.at[915, 'Population'] = 5054634
pop_df.at[916, 'Population'] = 4945529
pop_df.at[917, 'Population'] = 4840901

In [62]:
main_data[['Country', 'Year', 'Population']] = pop_df[['Country', 'Year', 'Population']]

In [63]:
main_data['Population'].isnull().sum()
#main_data.info()

0

## GDP

In [64]:
#set up the indicator I want (just build up the dict if you want more than one)
indicators = {'NY.GDP.PCAP.CD':'GDP'}

In [65]:
#grab indicators above for countires above and load into data frame
gdp_df = wbdata.get_dataframe(indicators, convert_date=False)

In [66]:
gdp_df.to_csv("Data/New/GDP/GDP_Internet.csv", encoding='utf-8')
gdp_data_get = pd.read_csv("Data/New/GDP/GDP_Internet.csv")
gdp_data = gdp_data_get.rename(index = str, columns={'date':'Year', 'country':'Country'})
#gdp_data[gdp_data['Country'] == "South Sudan"]

In [67]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(gdp_data[gdp_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            gdp_other_data = gdp_data[gdp_data['Country'] == main_data['Country'][val]][gdp_data['Year'] == main_data['Year'][val]]['GDP'].values[0]
            print(gdp_other_data, "pass: 1")
            main_data.at[val, 'GDP'] = gdp_other_data
        except IndexError:
            pass

0  625.3395388284999 pass: 1
1  647.966460473683 pass: 1
2  648.511069587633 pass: 1
3  599.29762975711 pass: 1
4  550.514973976336 pass: 1
5  444.18440407545 pass: 1
6  370.382293977374 pass: 1
7  366.230443242864 pass: 1
8  269.22969301812304 pass: 1
9  247.664139959638 pass: 1
10  216.708128851229 pass: 1
11  195.77663034155103 pass: 1
12  184.49471212204898 pass: 1
13  nan pass: 1
14  nan pass: 1
15  4578.6679344615895 pass: 1
16  4413.08288688408 pass: 1
17  4247.614342362921 pass: 1
18  4437.177794486521 pass: 1
19  4094.36020359235 pass: 1
20  4114.13489916342 pass: 1
21  4370.5399247769 pass: 1
22  3595.0380568289293 pass: 1
23  2972.74292399799 pass: 1
24  2673.78658429559 pass: 1
25  2373.58129170055 pass: 1
26  1846.12012081207 pass: 1
27  1425.12421860142 pass: 1
28  1281.6598256178 pass: 1
29  1126.68334010717 pass: 1
30  5466.42577841535 pass: 1
31  5471.123388787089 pass: 1
32  5565.134521048481 pass: 1
33  5432.413319781161 pass: 1
34  4463.39467488951 pass: 1
35  3868.

  """


8980.53641434813 pass: 1
48  8456.94799658994 pass: 1
49  8508.40691498359 pass: 1
50  8639.30885529158 pass: 1
51  8914.52543261668 pass: 1
52  8751.76652874972 pass: 1
53  nan pass: 1
54  nan pass: 1
55  11598.7517362051 pass: 1
56  11589.8530023324 pass: 1
57  11660.329531051999 pass: 1
58  10375.9942154736 pass: 1
59  10352.8227618312 pass: 1
60  42294.9947269717 pass: 1
61  40619.711297779504 pass: 1
62  38391.080866978504 pass: 1
63  41098.766941722795 pass: 1
64  39736.3540626699 pass: 1
65  43339.3798746543 pass: 1
66  47785.6590856793 pass: 1
67  48582.808455086604 pass: 1
68  43748.772158899905 pass: 1
69  41282.0201219785 pass: 1
70  38503.479614485695 pass: 1
71  32776.4422698769 pass: 1
72  24741.4935704562 pass: 1
73  22228.8464928922 pass: 1
74  21936.530101470802 pass: 1
75  5412.6923476178 pass: 1
76  5258.40737644433 pass: 1
77  5102.4899693158895 pass: 1
78  4615.86747457004 pass: 1
79  3585.90555256868 pass: 1
80  3117.89694392484 pass: 1
81  4068.97845646361 pass: 

342  885.638224530948 pass: 1
343  807.938767268667 pass: 1
344  765.8632357656951 pass: 1
345  3124.00030982516 pass: 1
346  2947.9385262805104 pass: 1
347  2645.2277526014605 pass: 1
348  2377.68877123935 pass: 1
349  1981.1701161847602 pass: 1
350  1776.86647562189 pass: 1
351  1736.93008403587 pass: 1
352  1389.62934991949 pass: 1
353  1233.59186917076 pass: 1
354  1046.4273841431698 pass: 1
355  978.334648481852 pass: 1
356  917.3643104861699 pass: 1
357  913.575642272268 pass: 1
358  958.236652408901 pass: 1
359  1007.0028688845099 pass: 1
360  5204.24371841169 pass: 1
361  5042.58221304612 pass: 1
362  4722.013403067271 pass: 1
363  5054.32534423757 pass: 1
364  4614.8290411405 pass: 1
365  4701.33442641347 pass: 1
366  5078.31457720914 pass: 1
367  4180.913531981741 pass: 1
368  3403.8152971587297 pass: 1
369  2967.8340564335604 pass: 1
370  2802.27491705489 pass: 1
371  2214.73224591872 pass: 1
372  1761.5376471886102 pass: 1
373  1524.4120215233402 pass: 1
374  1461.750520470

628  1053.1082430045199 pass: 1
629  959.372483639691 pass: 1
630  7974.406373025029 pass: 1
631  8065.960376234019 pass: 1
632  7904.495788647009 pass: 1
633  7207.0289047302795 pass: 1
634  6230.73836562837 pass: 1
635  5119.19164052311 pass: 1
636  5383.13269367517 pass: 1
637  4635.69227663314 pass: 1
638  3677.1340415731897 pass: 1
639  3354.0218037305895 pass: 1
640  2740.24944202769 pass: 1
641  2246.2574724306 pass: 1
642  2355.72585669892 pass: 1
643  2395.8565512517102 pass: 1
644  2472.19783098297 pass: 1
645  1511.82603748603 pass: 1
646  1504.93100711702 pass: 1
647  1403.4343352935598 pass: 1
648  1446.06801802192 pass: 1
649  1315.21480648394 pass: 1
650  1339.33719065958 pass: 1
651  1387.59462639938 pass: 1
652  1238.46151380518 pass: 1
653  1113.9577571960601 pass: 1
654  1068.60032134875 pass: 1
655  1059.1768842180802 pass: 1
656  935.6115180347881 pass: 1
657  744.475172433634 pass: 1
658  680.318541425182 pass: 1
659  645.8186908020559 pass: 1
660  487.08148688117

927  201.768798115624 pass: 1
928  215.13921025180198 pass: 1
929  208.19694865228297 pass: 1
930  19949.5813766971 pass: 1
931  19072.2385175669 pass: 1
932  17421.890222737802 pass: 1
933  17454.843424643503 pass: 1
934  14638.6048173457 pass: 1
935  14726.318278058701 pass: 1
936  18094.548052783102 pass: 1
937  16586.4052048847 pass: 1
938  12595.4106486304 pass: 1
939  10338.3132235799 pass: 1
940  8850.46511484791 pass: 1
941  7174.2374147336905 pass: 1
942  5308.34778059328 pass: 1
943  4498.95702743146 pass: 1
944  4070.0328269871397 pass: 1
945  3379.8964990926097 pass: 1
946  3587.0003161096297 pass: 1
947  3864.76043619193 pass: 1
948  3934.2733728356798 pass: 1
949  3690.23922847955 pass: 1
950  3032.5170490818605 pass: 1
951  2842.43853009194 pass: 1
952  3047.48803739736 pass: 1
953  2937.3607433106604 pass: 1
954  2873.86209077307 pass: 1
955  2529.63353509293 pass: 1
956  2020.99399397689 pass: 1
957  1324.99618408854 pass: 1
958  1437.6349079611903 pass: 1
959  1637.45

1237  481.335009991043 pass: 1
1238  418.951269310121 pass: 1
1239  424.956204432526 pass: 1
1240  393.313824367448 pass: 1
1241  360.571858534299 pass: 1
1242  321.481320392982 pass: 1
1243  309.486749001111 pass: 1
1244  297.75193359522 pass: 1
1245  4031.5319388481703 pass: 1
1246  3941.1146611860295 pass: 1
1247  3799.75336437721 pass: 1
1248  3438.8252777825396 pass: 1
1249  3044.94808313969 pass: 1
1250  2764.30668602753 pass: 1
1251  2568.61890785298 pass: 1
1252  2326.85195436274 pass: 1
1253  1945.6400342675897 pass: 1
1254  1098.45521561333 pass: 1
1255  1048.11053410866 pass: 1
1256  988.3050590665069 pass: 1
1257  965.749284085495 pass: 1
1258  946.7002572561411 pass: 1
1259  946.0599371665669 pass: 1
1260  830.113881624145 pass: 1
1261  810.265607301884 pass: 1
1262  766.8437623188879 pass: 1
1263  740.935845216079 pass: 1
1264  662.279518162433 pass: 1
1265  668.297604584601 pass: 1
1266  674.756414674589 pass: 1
1267  615.8202308278791 pass: 1
1268  505.470620633981 pass

1505  7165.2231748370305 pass: 1
1506  8513.564645068142 pass: 1
1507  6771.41479681885 pass: 1
1508  5291.575650186521 pass: 1
1509  3771.27895733845 pass: 1
1510  2874.28829127649 pass: 1
1511  2068.12411808473 pass: 1
1512  1658.03078544636 pass: 1
1513  1490.9270898716597 pass: 1
1514  1229.00095844501 pass: 1
1515  1335.1231484276 pass: 1
1516  1229.10106949388 pass: 1
1517  1155.0205827125599 pass: 1
1518  987.445396712381 pass: 1
1519  967.350454885973 pass: 1
1520  920.0816251749451 pass: 1
1521  916.8992515009979 pass: 1
1522  839.108111670398 pass: 1
1523  697.006638520849 pass: 1
1524  519.799934598691 pass: 1
1525  458.884355149169 pass: 1
1526  436.687535659748 pass: 1
1527  395.849351119404 pass: 1
1528  401.77636117917 pass: 1
1529  403.97971322111505 pass: 1
1530  1626.8938719032199 pass: 1
1531  1705.5689096281199 pass: 1
1532  1784.4289307933802 pass: 1
1533  1736.2119952859698 pass: 1
1534  1520.87533850854 pass: 1
1535  1316.7216139639302 pass: 1
1536  1432.77743092

1866  2925.9776322341 pass: 1
1867  2891.9289564033197 pass: 1
1868  2763.7390068743 pass: 1
1869  2649.67054077418 pass: 1
1870  2522.0762760686703 pass: 1
1871  2434.71486683552 pass: 1
1872  2391.4854864066897 pass: 1
1873  2206.6975835042103 pass: 1
1874  2126.91385954485 pass: 1
1875  1326.66865421644 pass: 1
1876  1450.57794904371 pass: 1
1877  1365.77782179907 pass: 1
1878  1389.67100999178 pass: 1
1879  1201.7562907111699 pass: 1
1880  1044.48982319468 pass: 1
1881  1182.97865482261 pass: 1
1882  1013.3101489158901 pass: 1
1883  944.1304850527811 pass: 1
1884  697.745286443756 pass: 1
1885  602.547285983233 pass: 1
1886  528.580661340667 pass: 1
1887  460.954232181985 pass: 1
1888  464.229758018367 pass: 1
1889  477.476102358603 pass: 1
1890  10153.9382184867 pass: 1
1891  9637.002650009581 pass: 1
1892  9291.22761861899 pass: 1
1893  9197.026971520609 pass: 1
1894  8000.376431821541 pass: 1
1895  7318.12640972422 pass: 1
1896  8030.063005373041 pass: 1
1897  6574.6543381149295

2175  5469.21789842179 pass: 1
2176  5211.49920049925 pass: 1
2177  4698.6894759834995 pass: 1
2178  5064.04110600499 pass: 1
2179  4542.9041044918295 pass: 1
2180  4543.89024355919 pass: 1
2181  4793.29295892064 pass: 1
2182  4036.1402372203706 pass: 1
2183  3325.61324190164 pass: 1
2184  3037.7545847471997 pass: 1
2185  2762.5617013497 pass: 1
2186  2408.8001100553197 pass: 1
2187  1961.20373553606 pass: 1
2188  1815.92009071121 pass: 1
2189  1854.14595610107 pass: 1
2190  15513.6961151502 pass: 1
2191  14471.8335924199 pass: 1
2192  13980.4162478127 pass: 1
2193  13628.0816569367 pass: 1
2194  14681.023078053799 pass: 1
2195  14279.555986636498 pass: 1
2196  16350.0548484268 pass: 1
2197  15761.262245223701 pass: 1
2198  16048.502139800301 pass: 1
2199  16644.703815261 pass: 1
2200  18427.4248815924 pass: 1
2201  18376.790958440903 pass: 1
2202  18672.8327734392 pass: 1
2203  nan pass: 1
2204  nan pass: 1
2205  97199.9190963458 pass: 1
2206  103059.24822759 pass: 1
2207  101668.1706

2497  16472.166046674898 pass: 1
2498  15334.6699396085 pass: 1
2499  13739.8294463775 pass: 1
2500  11138.807464899699 pass: 1
2501  9567.45828531486 pass: 1
2502  8655.31153127218 pass: 1
2503  8643.49400482948 pass: 1
2504  9126.95427680596 pass: 1
2505  1359.18935082349 pass: 1
2506  1342.75868900053 pass: 1
2507  1300.7918443547699 pass: 1
2508  1344.17721681353 pass: 1
2509  1255.40273833704 pass: 1
2510  1294.58372262483 pass: 1
2511  1388.8765311553 pass: 1
2512  1203.17526459328 pass: 1
2513  1025.1874356941198 pass: 1
2514  979.6961188072 pass: 1
2515  928.0297393332249 pass: 1
2516  813.7229318104311 pass: 1
2517  649.475864201459 pass: 1
2518  609.295002025278 pass: 1
2519  599.373586947528 pass: 1
2520  6200.17322102474 pass: 1
2521  6353.826382793321 pass: 1
2522  5659.38020403691 pass: 1
2523  6423.2917833458305 pass: 1
2524  5411.87740391911 pass: 1
2525  5821.30539705684 pass: 1
2526  6701.77391278013 pass: 1
2527  5458.12171842169 pass: 1
2528  4129.7587410282895 pass

2828  57579.5019538092 pass: 1
2829  54952.6737181498 pass: 1
2830  53340.147655778 pass: 1
2831  48087.5831393144 pass: 1
2832  41376.3939853959 pass: 1
2833  38538.9900530944 pass: 1
2834  37868.3230595798 pass: 1
2835  nan pass: 1
2836  nan pass: 1
2837  nan pass: 1
2838  nan pass: 1
2839  nan pass: 1
2840  nan pass: 1
2841  nan pass: 1
2842  2058.0352093943497 pass: 1
2843  1762.24610660537 pass: 1
2844  1577.4571821855802 pass: 1
2845  1408.85273756165 pass: 1
2846  1253.39140303912 pass: 1
2847  1263.01345505567 pass: 1
2848  1258.42195289081 pass: 1
2849  1177.62926773348 pass: 1
2850  1104.45901890381 pass: 1
2851  1040.13741165493 pass: 1
2852  954.720525255865 pass: 1
2853  834.541295306236 pass: 1
2854  738.347522660006 pass: 1
2855  666.345856945147 pass: 1
2856  706.087584631519 pass: 1
2857  520.037186594333 pass: 1
2858  404.289405943386 pass: 1
2859  337.360396992708 pass: 1
2860  309.28019842849 pass: 1
2861  236.49018001025902 pass: 1
2862  189.3945514973 pass: 1
2863

3121  1907.55123335561 pass: 1
3122  1740.4682979775 pass: 1
3123  1564.96694510875 pass: 1
3124  1377.0821404693802 pass: 1
3125  1213.26532816388 pass: 1
3126  1082.2860250169902 pass: 1
3127  830.4076942043221 pass: 1
3128  654.283837283285 pass: 1
3129  546.776850185552 pass: 1
3130  465.119886944025 pass: 1
3131  396.377979026633 pass: 1
3132  383.34306799074204 pass: 1
3133  456.706289511583 pass: 1
3134  558.226802377 pass: 1
3135  3148.3651032298 pass: 1
3136  3167.34305591771 pass: 1
3137  3158.58688044147 pass: 1
3138  3275.0916839105 pass: 1
3139  2965.80243434839 pass: 1
3140  2643.44142250605 pass: 1
3141  2697.96137456029 pass: 1
3142  2393.3672645751103 pass: 1
3143  2047.09782277757 pass: 1
3144  1886.4333588198301 pass: 1
3145  1787.9470230652898 pass: 1
3146  1580.5027243229401 pass: 1
3147  1353.9348192325 pass: 1
3148  1362.6017313140198 pass: 1
3149  1469.8491489428197 pass: 1
3150  15692.412877728999 pass: 1
3151  12237.1937410124 pass: 1
3152  12755.0000818807 pa

In [68]:
main_data[main_data['GDP'].isnull()]['Country'].unique()

array(['Afghanistan', 'American Samoa', 'Bermuda',
       'British Virgin Islands', 'Cayman Islands', 'Channel Islands',
       'Curacao', 'Eritrea', 'French Polynesia', 'Gibraltar', 'Guam',
       'Iraq', 'Korea, Dem. People���s Rep.', 'Nauru', 'New Caledonia',
       'Northern Mariana Islands', 'Sao Tome and Principe',
       'Sint Maarten (Dutch part)', 'Somalia', 'South Sudan',
       'St. Martin (French part)', 'Syrian Arab Republic',
       'Turks and Caicos Islands', 'Virgin Islands (U.S.)'], dtype=object)

In [69]:
#main_data[main_data['GDP'].isnull()][main_data['Country'] == "Afghanistan"][['Country','Year','GDP']]

In [70]:
main_data[main_data['Country'] == "South Sudan"][['Country', 'Year', 'GDP']]
len(main_data['Country'].unique())

217

In [71]:
# Dropping "South Sudan" - Many missing values
# main_data.drop(main_data[main_data['Country'] == "South Sudan"].index, inplace=True)

In [72]:
#af_test = main_data[main_data['Country'] == "Afghanistan"]
# len(main_data['Country'].unique())

In [73]:
# DEALING WITH MISSING VALUES
# imp=Imputer(missing_values="NaN", strategy="most_frequent" )
# imp.fit(af_test[["GDP"]])
# af_test["GDP"]=imp.transform(af_test[["GDP"]]).ravel()

In [74]:
# af_test["GDP"]

## Total Expenditure

In [75]:
totalex = pd.read_csv("Data/New/Total_Expenditure/data_internet.csv")

In [76]:
count = 0
totalex_data = pd.DataFrame(columns=['Country', 'Year', 'Total_Expenditure'])
for i in range(0, len(totalex['Country'].unique())):
    for j in range(1, len(totalex.columns)):
        #print(totalex.loc[i,'Country'], end='')
        totalex_data.at[count, 'Country'] = totalex.loc[i,'Country']
        totalex_data.at[count, 'Year'] = totalex.columns[j]
       # print(totalex.columns[j])
        totalex_data.at[count, 'Total_Expenditure'] = totalex[totalex['Country'] == totalex['Country'][i]].T[1:].loc[totalex.columns[j]].values[0]
        count+=1

In [77]:
# Dropping "South Sudan" - Many missing values
# totalex_data.drop(totalex_data[totalex_data['Country'] == "South Sudan"].index, inplace=True)

In [78]:
totalex_data['Year'] = totalex_data['Year'].astype(np.int64)
#totalex_data.info()

In [79]:
totalex_data.to_pickle("Data/New/Total_Expenditure/totalex_pickle_data.pkl")

In [80]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(totalex_data[totalex_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            totalex_other_data = totalex_data[totalex_data['Country'] == main_data['Country'][val]][totalex_data['Year'] == main_data['Year'][val]]['Total_Expenditure'].values[0]
            print(totalex_other_data, "pass: 1")
            main_data.at[val, 'Total_Expenditure'] = totalex_other_data
        except IndexError:
            pass
    else:
        try:
            totalex_other_data = totalex_data[totalex_data['Country'].str.startswith(main_data['Country'][val])][totalex_data['Year'] == main_data['Year'][val]]['Total_Expenditure'].values[0]
            print(totalex_other_data, "pass: 2")
            main_data.at[val, 'Total_Expenditure'] = totalex_other_data
        except IndexError:
            try:
                totalex_other_data = totalex_data[totalex_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][totalex_data['Year'] == main_data['Year'][val]]['Total_Expenditure'].values[0]
                print(totalex_other_data, "pass: 3")
                main_data.at[val, 'Total_Expenditure'] = totalex_other_data
            except IndexError:
                pass
        

0  12.0 pass: 1
1  10.59 pass: 1
2  11.67 pass: 1
3  10.17 pass: 1
4  14.4 pass: 1
5  12.73 pass: 1
6  6.93 pass: 1
7  2.95 pass: 1
8  6.3 pass: 1
9  5.49 pass: 1
10  6.98 pass: 1
11  5.58 pass: 1
12  8.99 pass: 1
13  nan pass: 1
14  nan pass: 1
15  9.37 pass: 1
16  9.76 pass: 1
17  9.71 pass: 1
18  9.85 pass: 1
19  8.46 pass: 1
20  8.46 pass: 1
21  8.37 pass: 1
22  8.88 pass: 1
23  9.04 pass: 1
24  9.67 pass: 1
25  9.18 pass: 1
26  7.76 pass: 1
27  7.19 pass: 1
28  7.25 pass: 1
29  7.08 pass: 1
30  9.9 pass: 1
31  9.95 pass: 1
32  9.99 pass: 1
33  9.37 pass: 1
34  9.65 pass: 1
35  9.01 pass: 1
36  8.07 pass: 1
37  8.14 pass: 1
38  7.8 pass: 1
39  8.19 pass: 1
40  8.48 pass: 1
41  8.5 pass: 1
42  8.37 pass: 1
43  8.54 pass: 1
44  8.79 pass: 1
45  46  47  48  49  50  51  52  53  54  55  56  

  """
  if sys.path[0] == '':


57  58  59  60  27.92 pass: 1
61  22.32 pass: 1
62  18.88 pass: 1
63  19.31 pass: 1
64  24.04 pass: 1
65  25.1 pass: 1
66  24.04 pass: 1
67  22.85 pass: 1
68  19.38 pass: 1
69  19.5 pass: 1
70  22.7 pass: 1
71  21.98 pass: 1
72  19.99 pass: 1
73  19.24 pass: 1
74  19.08 pass: 1
75  5.0 pass: 1
76  7.42 pass: 1
77  5.57 pass: 1
78  5.58 pass: 1
79  5.43 pass: 1
80  6.36 pass: 1
81  6.46 pass: 1
82  6.42 pass: 1
83  9.56 pass: 1
84  6.08 pass: 1
85  6.25 pass: 1
86  4.61 pass: 1
87  3.38 pass: 1
88  5.77 pass: 1
89  2.86 pass: 1
90  18.08 pass: 1
91  16.29 pass: 1
92  17.16 pass: 1
93  15.18 pass: 1
94  16.36 pass: 1
95  11.38 pass: 1
96  12.27 pass: 1
97  10.81 pass: 1
98  10.07 pass: 1
99  12.08 pass: 1
100  10.69 pass: 1
101  11.89 pass: 1
102  9.88 pass: 1
103  10.75 pass: 1
104  11.42 pass: 1
105  6.92 pass: 1
106  7.72 pass: 1
107  8.74 pass: 1
108  11.82 pass: 1
109  14.07 pass: 1
110  17.13 pass: 1
111  15.79 pass: 1
112  15.37 pass: 1
113  16.35 pass: 1
114  16.67 pass: 1
115  1

540  541  542  543  544  545  546  547  548  549  550  551  552  553  554  555  16.11 pass: 1
556  12.45 pass: 1
557  13.84 pass: 1
558  12.64 pass: 1
559  14.17 pass: 1
560  15.52 pass: 1
561  10.51 pass: 1
562  11.84 pass: 1
563  9.74 pass: 1
564  8.99 pass: 1
565  14.74 pass: 1
566  15.12 pass: 1
567  13.49 pass: 1
568  12.51 pass: 1
569  13.25 pass: 1
570  8.97 pass: 1
571  7.76 pass: 1
572  5.94 pass: 1
573  6.85 pass: 1
574  4.86 pass: 1
575  5.09 pass: 1
576  5.28 pass: 1
577  7.3 pass: 1
578  8.08 pass: 1
579  13.49 pass: 1
580  15.92 pass: 1
581  10.36 pass: 1
582  13.3 pass: 1
583  13.62 pass: 1
584  12.95 pass: 1
585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600  15.88 pass: 1
601  15.32 pass: 1
602  14.65 pass: 1
603  14.34 pass: 1
604  13.8 pass: 1
605  14.06 pass: 1
606  13.56 pass: 1
607  13.72 pass: 1
608  13.45 pass: 1
609  12.54 pass: 1
610  12.67 pass: 1
611  12.02 pass: 1
612  15.14 pass: 1
613  14.96 pass: 1
614  14.5 pass: 1
615  10.43

1088  5.88 pass: 1
1089  6.15 pass: 1
1090  5.6 pass: 1
1091  7.06 pass: 1
1092  7.77 pass: 1
1093  7.78 pass: 1
1094  6.91 pass: 1
1095  19.65 pass: 1
1096  19.25 pass: 1
1097  18.81 pass: 1
1098  18.58 pass: 1
1099  18.15 pass: 1
1100  18.31 pass: 1
1101  18.13 pass: 1
1102  18.06 pass: 1
1103  17.58 pass: 1
1104  17.33 pass: 1
1105  17.1 pass: 1
1106  17.34 pass: 1
1107  17.31 pass: 1
1108  17.13 pass: 1
1109  17.87 pass: 1
1110  6.82 pass: 1
1111  10.58 pass: 1
1112  9.32 pass: 1
1113  14.03 pass: 1
1114  14.93 pass: 1
1115  16.45 pass: 1
1116  13.15 pass: 1
1117  16.16 pass: 1
1118  13.98 pass: 1
1119  15.08 pass: 1
1120  12.58 pass: 1
1121  8.33 pass: 1
1122  8.45 pass: 1
1123  9.34 pass: 1
1124  7.85 pass: 1
1125  1126  1127  1128  1129  1130  1131  1132  1133  1134  1135  1136  1137  1138  1139  1140  9.98 pass: 1
1141  9.98 pass: 1
1142  11.37 pass: 1
1143  12.32 pass: 1
1144  11.85 pass: 1
1145  12.54 pass: 1
1146  11.51 pass: 1
1147  12.06 pass: 1
1148  12.84 pass: 1
1149  1

1592  5.77 pass: 1
1593  5.77 pass: 1
1594  5.22 pass: 1
1595  7.99 pass: 1
1596  3.74 pass: 1
1597  5.58 pass: 1
1598  5.72 pass: 1
1599  6.75 pass: 1
1600  6.43 pass: 1
1601  7.0 pass: 1
1602  6.8 pass: 1
1603  6.84 pass: 1
1604  5.22 pass: 1
1605  1606  1607  1608  1609  1610  1611  1612  1613  1614  1615  1616  1617  1618  1619  1620  1621  1622  1623  1624  1625  1626  1627  1628  1629  1630  1631  1632  1633  1634  1635  9.81 pass: 1
1636  9.81 pass: 1
1637  9.81 pass: 1
1638  10.08 pass: 1
1639  9.08 pass: 1
1640  9.31 pass: 1
1641  10.55 pass: 1
1642  11.84 pass: 1
1643  11.37 pass: 1
1644  10.15 pass: 1
1645  10.26 pass: 1
1646  9.31 pass: 1
1647  9.11 pass: 1
1648  9.03 pass: 1
1649  8.68 pass: 1
1650  10.72 pass: 1
1651  10.72 pass: 1
1652  10.72 pass: 1
1653  9.56 pass: 1
1654  9.19 pass: 1
1655  9.66 pass: 1
1656  9.07 pass: 1
1657  10.45 pass: 1
1658  9.95 pass: 1
1659  11.8 pass: 1
1660  11.04 pass: 1
1661  9.73 pass: 1
1662  10.29 pass: 1
1663  9.72 pass: 1
1664  7.58 p

2099  11.36 pass: 1
2100  2101  2102  2103  2104  2105  2106  2107  2108  2109  2110  2111  2112  2113  2114  2115  23.36 pass: 1
2116  23.36 pass: 1
2117  23.13 pass: 1
2118  21.71 pass: 1
2119  19.78 pass: 1
2120  22.47 pass: 1
2121  21.61 pass: 1
2122  18.06 pass: 1
2123  18.06 pass: 1
2124  17.69 pass: 1
2125  17.4 pass: 1
2126  16.71 pass: 1
2127  17.14 pass: 1
2128  15.89 pass: 1
2129  15.71 pass: 1
2130  23.95 pass: 1
2131  19.84 pass: 1
2132  18.68 pass: 1
2133  19.11 pass: 1
2134  19.83 pass: 1
2135  19.99 pass: 1
2136  18.11 pass: 1
2137  18.82 pass: 1
2138  17.13 pass: 1
2139  18.6 pass: 1
2140  19.22 pass: 1
2141  20.2 pass: 1
2142  18.77 pass: 1
2143  15.56 pass: 1
2144  13.12 pass: 1
2145  7.57 pass: 1
2146  7.57 pass: 1
2147  7.6 pass: 1
2148  8.58 pass: 1
2149  10.69 pass: 1
2150  10.66 pass: 1
2151  11.38 pass: 1
2152  12.24 pass: 1
2153  16.08 pass: 1
2154  14.78 pass: 1
2155  8.97 pass: 1
2156  9.39 pass: 1
2157  9.89 pass: 1
2158  10.1 pass: 1
2159  8.43 pass: 1
216

2569  9.75 pass: 1
2570  10.2 pass: 1
2571  8.57 pass: 1
2572  7.86 pass: 1
2573  7.86 pass: 1
2574  7.89 pass: 1
2575  7.44 pass: 1
2576  9.06 pass: 1
2577  6.41 pass: 1
2578  4.65 pass: 1
2579  7.08 pass: 1
2580  2581  2582  2583  2584  2585  2586  2587  2588  2589  2590  2591  2592  2593  2594  2595  2596  2597  2598  2599  2600  2601  2602  2603  2604  2605  2606  2607  2608  2609  2610  12.83 pass: 1
2611  11.22 pass: 1
2612  14.06 pass: 1
2613  13.38 pass: 1
2614  13.6 pass: 1
2615  14.23 pass: 1
2616  14.28 pass: 1
2617  13.66 pass: 1
2618  13.81 pass: 1
2619  13.78 pass: 1
2620  13.63 pass: 1
2621  13.66 pass: 1
2622  13.69 pass: 1
2623  13.27 pass: 1
2624  13.15 pass: 1
2625  12.54 pass: 1
2626  13.12 pass: 1
2627  13.34 pass: 1
2628  13.98 pass: 1
2629  20.32 pass: 1
2630  21.1 pass: 1
2631  15.07 pass: 1
2632  19.67 pass: 1
2633  25.35 pass: 1
2634  28.48 pass: 1
2635  23.89 pass: 1
2636  20.26 pass: 1
2637  19.91 pass: 1
2638  34.41 pass: 1
2639  20.75 pass: 1
2640  2641  2

3123  8.99 pass: 1
3124  8.67 pass: 1
3125  7.65 pass: 1
3126  7.62 pass: 1
3127  11.13 pass: 1
3128  11.81 pass: 1
3129  7.34 pass: 1
3130  7.04 pass: 1
3131  7.02 pass: 1
3132  6.52 pass: 1
3133  9.56 pass: 1
3134  8.69 pass: 1
3135  17.94 pass: 1
3136  14.13 pass: 1
3137  13.55 pass: 1
3138  13.98 pass: 1
3139  15.91 pass: 1
3140  13.66 pass: 1
3141  12.8 pass: 1
3142  14.31 pass: 1
3143  17.42 pass: 1
3144  17.88 pass: 1
3145  18.69 pass: 1
3146  17.22 pass: 1
3147  12.64 pass: 1
3148  11.97 pass: 1
3149  10.49 pass: 1
3150  5.8 pass: 3
3151  6.38 pass: 3
3152  5.91 pass: 3
3153  8.83 pass: 3
3154  9.11 pass: 3
3155  8.96 pass: 3
3156  4.33 pass: 3
3157  6.57 pass: 3
3158  8.49 pass: 3
3159  8.31 pass: 3
3160  8.33 pass: 3
3161  6.71 pass: 3
3162  7.32 pass: 3
3163  7.41 pass: 3
3164  7.32 pass: 3
3165  3166  3167  3168  3169  3170  3171  3172  3173  3174  3175  3176  3177  3178  3179  3180  3181  3182  3183  3184  3185  3186  3187  3188  3189  3190  3191  3192  3193  3194  3195  3

In [81]:
main_data[main_data['Total_Expenditure'].isnull()]['Country'].unique()

array(['Afghanistan', 'American Samoa', 'Aruba', 'Bermuda',
       'British Virgin Islands', 'Cayman Islands', 'Channel Islands',
       "Cote d'Ivoire", 'Curacao', 'Czech Republic', 'Faroe Islands',
       'French Polynesia', 'Gibraltar', 'Greenland', 'Guam',
       'Hong Kong SAR, China', 'Iraq', 'Isle of Man',
       'Korea, Dem. People���s Rep.', 'Korea, Rep.', 'Kosovo',
       'Kyrgyz Republic', 'Lao PDR', 'Liechtenstein', 'Macao SAR, China',
       'Moldova', 'New Caledonia', 'North Macedonia',
       'Northern Mariana Islands', 'Puerto Rico',
       'Sint Maarten (Dutch part)', 'Slovak Republic', 'Somalia',
       'South Sudan', 'St. Kitts and Nevis', 'St. Lucia',
       'St. Martin (French part)', 'St. Vincent and the Grenadines',
       'Tanzania', 'Turks and Caicos Islands', 'Vietnam',
       'Virgin Islands (U.S.)', 'West Bank and Gaza', 'Zimbabwe'],
      dtype=object)

In [82]:
totalex_data[totalex_data['Country'].str.startswith("Curacao")]

Unnamed: 0,Country,Year,Total_Expenditure


In [83]:
len(main_data[main_data['Total_Expenditure'].isnull()])

622

## MEASLES

In [84]:
measles = pd.read_csv("Data/New/Measles/Measles_Internet.csv")

In [85]:
count = 0
measles_data = pd.DataFrame(columns=['Country', 'Year', 'Measles'])
for i in range(0, len(measles['Country'].unique())):
    for j in range(1, len(measles.columns)):
        #print(measles.loc[i,'Country'], end='')
        measles_data.at[count, 'Country'] = measles.loc[i,'Country']
        measles_data.at[count, 'Year'] = measles.columns[j]
       # print(measles.columns[j])
        measles_data.at[count, 'Measles'] = measles[measles['Country'] == measles['Country'][i]].T[1:].loc[measles.columns[j]].values[0]
        count+=1

In [86]:
measles_data['Year'] = measles_data['Year'].astype(np.int64)

In [87]:
measles_data.to_pickle("Data/New/Measles/measles_pickle_data.pkl")

In [111]:
#measles_data

In [89]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(measles_data[measles_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            measles_other_data = measles_data[measles_data['Country'] == main_data['Country'][val]][measles_data['Year'] == main_data['Year'][val]]['Measles'].values[0]
            print(measles_other_data, "pass: 1")
            main_data.at[val, 'Measles'] = measles_other_data
        except IndexError:
            pass
    else:
        try:
            measles_other_data = measles_data[measles_data['Country'].str.startswith(main_data['Country'][val])][measles_data['Year'] == main_data['Year'][val]]['Measles'].values[0]
            print(measles_other_data, "pass: 2")
            main_data.at[val, 'Measles'] = measles_other_data
        except IndexError:
            try:
                measles_other_data = measles_data[measles_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][measles_data['Year'] == main_data['Year'][val]]['Measles'].values[0]
                print(measles_other_data, "pass: 3")
                main_data.at[val, 'Measles'] = measles_other_data
            except IndexError:
                pass
        

0  492.0 pass: 1
1  430.0 pass: 1
2  2787.0 pass: 1
3  3013.0 pass: 1
4  1989.0 pass: 1
5  2861.0 pass: 1
6  1599.0 pass: 1
7  1141.0 pass: 1
8  1990.0 pass: 1
9  1296.0 pass: 1
10  466.0 pass: 1
11  798.0 pass: 1
12  2486.0 pass: 1
13  8762.0 pass: 1
14  6532.0 pass: 1
15  nan pass: 1
16  0.0 pass: 1
17  9.0 pass: 1
18  28.0 pass: 1
19  10.0 pass: 1
20  0.0 pass: 1
21  nan pass: 1
22  22.0 pass: 1
23  68.0 pass: 1
24  6.0 pass: 1
25  7.0 pass: 1
26  8.0 pass: 1
27  16.0 pass: 1
28  18.0 pass: 1
29  662.0 pass: 1
30  0.0 pass: 1
31  25.0 pass: 1
32  18.0 pass: 1
33  112.0 pass: 1
34  103.0 pass: 1
35  107.0 pass: 1
36  217.0 pass: 1
37  0.0 pass: 1
38  944.0 pass: 1
39  2302.0 pass: 1
40  3289.0 pass: 1
41  15374.0 pass: 1
42  5862.0 pass: 1
43  2686.0 pass: 1
44  nan pass: 1
45  46  47  48  49  50  51  52  53  54  55  56  

  """
  if sys.path[0] == '':


57  58  59  60  nan pass: 1
61  0.0 pass: 1
62  0.0 pass: 1
63  0.0 pass: 1
64  0.0 pass: 1
65  0.0 pass: 1
66  0.0 pass: 1
67  0.0 pass: 1
68  0.0 pass: 1
69  0.0 pass: 1
70  0.0 pass: 1
71  0.0 pass: 1
72  1.0 pass: 1
73  5.0 pass: 1
74  2.0 pass: 1
75  11699.0 pass: 1
76  8523.0 pass: 1
77  4458.0 pass: 1
78  1449.0 pass: 1
79  1190.0 pass: 1
80  2807.0 pass: 1
81  265.0 pass: 1
82  1014.0 pass: 1
83  765.0 pass: 1
84  258.0 pass: 1
85  29.0 pass: 1
86  1196.0 pass: 1
87  11945.0 pass: 1
88  9046.0 pass: 1
89  2219.0 pass: 1
90  0.0 pass: 1
91  0.0 pass: 1
92  0.0 pass: 1
93  0.0 pass: 1
94  0.0 pass: 1
95  0.0 pass: 1
96  0.0 pass: 1
97  0.0 pass: 1
98  0.0 pass: 1
99  0.0 pass: 1
100  0.0 pass: 1
101  0.0 pass: 1
102  0.0 pass: 1
103  0.0 pass: 1
104  0.0 pass: 1
105  1.0 pass: 1
106  0.0 pass: 1
107  2.0 pass: 1
108  3.0 pass: 1
109  17.0 pass: 1
110  3.0 pass: 1
111  0.0 pass: 1
112  0.0 pass: 1
113  0.0 pass: 1
114  0.0 pass: 1
115  0.0 pass: 1
116  0.0 pass: 1
117  0.0 pass: 1

571  226.0 pass: 1
572  120.0 pass: 1
573  8650.0 pass: 1
574  194.0 pass: 1
575  165.0 pass: 1
576  63.0 pass: 1
577  441.0 pass: 1
578  1594.0 pass: 1
579  2.0 pass: 1
580  10324.0 pass: 1
581  15801.0 pass: 1
582  7277.0 pass: 1
583  24908.0 pass: 1
584  3546.0 pass: 1
585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600  0.0 pass: 1
601  0.0 pass: 1
602  0.0 pass: 1
603  6.0 pass: 1
604  0.0 pass: 1
605  1.0 pass: 1
606  0.0 pass: 1
607  0.0 pass: 1
608  0.0 pass: 1
609  0.0 pass: 1
610  0.0 pass: 1
611  1.0 pass: 1
612  0.0 pass: 1
613  0.0 pass: 1
614  0.0 pass: 1
615  52628.0 pass: 1
616  26883.0 pass: 1
617  6183.0 pass: 1
618  9943.0 pass: 1
619  38159.0 pass: 1
620  52461.0 pass: 1
621  131441.0 pass: 1
622  109023.0 pass: 1
623  99602.0 pass: 1
624  124219.0 pass: 1
625  70549.0 pass: 1
626  71879.0 pass: 1
627  58341.0 pass: 1
628  88962.0 pass: 1
629  71093.0 pass: 1
630  0.0 pass: 1
631  1.0 pass: 1
632  1.0 pass: 1
633  6.0 pass: 1
634  0.0 pass

1099  780.0 pass: 1
1100  574.0 pass: 1
1101  917.0 pass: 1
1102  567.0 pass: 1
1103  2307.0 pass: 1
1104  778.0 pass: 1
1105  121.0 pass: 1
1106  779.0 pass: 1
1107  4657.0 pass: 1
1108  6024.0 pass: 1
1109  nan pass: 1
1110  124.0 pass: 1
1111  319.0 pass: 1
1112  1613.0 pass: 1
1113  120.0 pass: 1
1114  641.0 pass: 1
1115  101.0 pass: 1
1116  82.0 pass: 1
1117  6.0 pass: 1
1118  420.0 pass: 1
1119  435.0 pass: 1
1120  60.0 pass: 1
1121  1939.0 pass: 1
1122  12289.0 pass: 1
1123  13476.0 pass: 1
1124  23068.0 pass: 1
1125  1126  1127  1128  1129  1130  1131  1132  1133  1134  1135  1136  1137  1138  1139  1140  1.0 pass: 1
1141  3.0 pass: 1
1142  3.0 pass: 1
1143  40.0 pass: 1
1144  149.0 pass: 1
1145  2.0 pass: 1
1146  1.0 pass: 1
1147  2.0 pass: 1
1148  nan pass: 1
1149  122.0 pass: 1
1150  1.0 pass: 1
1151  nan pass: 1
1152  5.0 pass: 1
1153  12.0 pass: 1
1154  56.0 pass: 1
1155  1156  1157  1158  1159  1160  1161  1162  1163  1164  1165  1166  1167  1168  1169  1170  0.0 pass: 1


1639  0.0 pass: 1
1640  0.0 pass: 1
1641  3.0 pass: 1
1642  0.0 pass: 1
1643  7.0 pass: 1
1644  2.0 pass: 1
1645  0.0 pass: 1
1646  nan pass: 1
1647  nan pass: 1
1648  1.0 pass: 1
1649  0.0 pass: 1
1650  112.0 pass: 1
1651  1761.0 pass: 1
1652  9.0 pass: 1
1653  9.0 pass: 1
1654  12.0 pass: 1
1655  22.0 pass: 1
1656  24.0 pass: 1
1657  373.0 pass: 1
1658  956.0 pass: 1
1659  618.0 pass: 1
1660  213.0 pass: 1
1661  526.0 pass: 1
1662  36.0 pass: 1
1663  8.0 pass: 1
1664  5.0 pass: 1
1665  0.0 pass: 1
1666  516.0 pass: 1
1667  179.0 pass: 1
1668  172.0 pass: 1
1669  2488.0 pass: 1
1670  0.0 pass: 1
1671  0.0 pass: 1
1672  2.0 pass: 1
1673  1.0 pass: 1
1674  nan pass: 1
1675  31.0 pass: 1
1676  1.0 pass: 1
1677  0.0 pass: 1
1678  217.0 pass: 1
1679  660.0 pass: 1
1680  34.0 pass: 1
1681  0.0 pass: 1
1682  43.0 pass: 1
1683  279.0 pass: 1
1684  2200.0 pass: 1
1685  6.0 pass: 1
1686  1.0 pass: 1
1687  1.0 pass: 1
1688  20.0 pass: 1
1689  8.0 pass: 1
1690  4.0 pass: 1
1691  142.0 pass: 1
169

2140  0.0 pass: 1
2141  0.0 pass: 1
2142  0.0 pass: 1
2143  0.0 pass: 1
2144  0.0 pass: 1
2145  1142.0 pass: 1
2146  1224.0 pass: 1
2147  272.0 pass: 1
2148  771.0 pass: 1
2149  372.0 pass: 1
2150  801.0 pass: 1
2151  1317.0 pass: 1
2152  282.0 pass: 1
2153  59.0 pass: 1
2154  2183.0 pass: 1
2155  63057.0 pass: 1
2156  54190.0 pass: 1
2157  31584.0 pass: 1
2158  61208.0 pass: 1
2159  21319.0 pass: 1
2160  6855.0 pass: 1
2161  52852.0 pass: 1
2162  6447.0 pass: 1
2163  18843.0 pass: 1
2164  8491.0 pass: 1
2165  1272.0 pass: 1
2166  9960.0 pass: 1
2167  2613.0 pass: 1
2168  704.0 pass: 1
2169  110927.0 pass: 1
2170  31521.0 pass: 1
2171  141258.0 pass: 1
2172  42007.0 pass: 1
2173  168107.0 pass: 1
2174  212183.0 pass: 1
2175  2176  2177  2178  2179  2180  2181  2182  2183  2184  2185  2186  2187  2188  2189  2190  2191  2192  2193  2194  2195  2196  2197  2198  2199  2200  2201  2202  2203  2204  2205  3.0 pass: 1
2206  8.0 pass: 1
2207  4.0 pass: 1
2208  39.0 pass: 1
2209  3.0 pass: 1


2691  297.0 pass: 1
2692  267.0 pass: 1
2693  362.0 pass: 1
2694  22.0 pass: 1
2695  26.0 pass: 1
2696  256.0 pass: 1
2697  67.0 pass: 1
2698  nan pass: 1
2699  152.0 pass: 1
2700  1686.0 pass: 1
2701  2107.0 pass: 1
2702  51.0 pass: 1
2703  60.0 pass: 1
2704  79.0 pass: 1
2705  21.0 pass: 1
2706  33.0 pass: 1
2707  44.0 pass: 1
2708  0.0 pass: 1
2709  3.0 pass: 1
2710  35.0 pass: 1
2711  65.0 pass: 1
2712  139.0 pass: 1
2713  309.0 pass: 1
2714  16527.0 pass: 1
2715  2716  2717  2718  2719  2720  2721  2722  2723  2724  2725  2726  2727  2728  2729  2730  2731  2732  2733  2734  2735  2736  2737  2738  2739  2740  2741  2742  2743  2744  2745  2746  2747  2748  2749  2750  2751  2752  2753  2754  2755  2756  2757  2758  2759  2760  2761  2762  2763  2764  2765  2766  2767  2768  2769  2770  2771  2772  2773  2774  2775  676.0 pass: 1
2776  2813.0 pass: 1
2777  8523.0 pass: 1
2778  5616.0 pass: 1
2779  680.0 pass: 1
2780  68.0 pass: 1
2781  129.0 pass: 1
2782  327.0 pass: 1
2783  228.0

3231  140.0 pass: 1
3232  535.0 pass: 1
3233  459.0 pass: 1
3234  45.0 pass: 1
3235  35.0 pass: 1
3236  881.0 pass: 1
3237  25036.0 pass: 1
3238  16997.0 pass: 1
3239  30930.0 pass: 1
3240  0.0 pass: 1
3241  0.0 pass: 1
3242  0.0 pass: 1
3243  0.0 pass: 1
3244  9696.0 pass: 1
3245  853.0 pass: 1
3246  0.0 pass: 1
3247  242.0 pass: 1
3248  212.0 pass: 1
3249  420.0 pass: 1
3250  31.0 pass: 1
3251  998.0 pass: 1
3252  304.0 pass: 1
3253  529.0 pass: 1
3254  1483.0 pass: 1


## Income composition of resources

In [135]:
icor = pd.read_csv("Data/New/Income_Composition_Of_Resources/ICOR_Internet.csv")

In [136]:
count = 0
icor_data = pd.DataFrame(columns=['Country', 'Year', 'Income_Composition_Of_Resources'])
for i in range(0, len(icor['Country'].unique())):
    for j in range(1, len(icor.columns)):
        #print(icor.loc[i,'Country'], end='')
        icor_data.at[count, 'Country'] = icor.loc[i,'Country']
        icor_data.at[count, 'Year'] = icor.columns[j]
        #print(icor.columns[j])
        icor_data.at[count, 'Income_Composition_Of_Resources'] = icor[icor['Country'] == icor['Country'][i]].T[1:].loc[icor.columns[j]].values[0]
        count+=1

In [144]:
icor_data['Year'] = icor_data['Year'].astype(np.int64)
icor_data['Income_Composition_Of_Resources'] = icor_data['Income_Composition_Of_Resources'].astype(np.float64)

In [145]:
icor_data.to_pickle("Data/New/Income_Composition_Of_Resources/ICOR_Internet.pkl")

In [131]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(icor_data[icor_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            icor_other_data = icor_data[icor_data['Country'] == main_data['Country'][val]][icor_data['Year'] == main_data['Year'][val]]['Income_Composition_Of_Resources'].values[0]
            print(icor_other_data, "pass: 1")
            main_data.at[val, 'Income_Composition_Of_Resources'] = icor_other_data
        except IndexError:
            pass
    else:
        try:
            icor_other_data = icor_data[icor_data['Country'].str.startswith(main_data['Country'][val])][icor_data['Year'] == main_data['Year'][val]]['Income_Composition_Of_Resources'].values[0]
            print(icor_other_data, "pass: 2")
            main_data.at[val, 'Income_Composition_Of_Resources'] = icor_other_data
        except IndexError:
            try:
                icor_other_data = icor_data[icor_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][icor_data['Year'] == main_data['Year'][val]]['Income_Composition_Of_Resources'].values[0]
                print(icor_other_data, "pass: 3")
                main_data.at[val, 'Income_Composition_Of_Resources'] = icor_other_data
            except IndexError:
                pass
            

0  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  

  if sys.path[0] == '':


29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187  188  189  190  191  192  193  194  195  196  197  198  199  200  201  202  203  204  205  206  207  208  209  210  211  212  213  214  215  216  217  218  219  220  221  222  223  224  225  226  227  228  229  230  231  232  233  234  235  236  237  238  239  240  241  242  2

KeyboardInterrupt: 

In [146]:
icor_data[icor_data['Country'] == main_data['Country'][10]].values

array([], shape=(0, 3), dtype=object)

In [143]:
icor_data.dtypes

Country                             object
Year                                 int64
Income_Composition_Of_Resources    float64
dtype: object

## Diptheria

In [147]:
diptheria = pd.read_csv("Data/New/Diptheria/Dip_Internet.csv")

In [148]:
count = 0
diptheria_data = pd.DataFrame(columns=['Country', 'Year', 'Diphtheria'])
for i in range(0, len(diptheria['Country'].unique())):
    for j in range(1, len(diptheria.columns)):
        #print(diptheria.loc[i,'Country'], end='')
        diptheria_data.at[count, 'Country'] = diptheria.loc[i,'Country']
        diptheria_data.at[count, 'Year'] = diptheria.columns[j]
        #print(diptheria.columns[j])
        diptheria_data.at[count, 'Diphtheria'] = diptheria[diptheria['Country'] == diptheria['Country'][i]].T[1:].loc[diptheria.columns[j]].values[0]
        count+=1

In [152]:
diptheria_data['Year'] = diptheria_data['Year'].astype(np.int64)
diptheria_data['Diphtheria'] = diptheria_data['Diphtheria'].astype(np.float64)

In [153]:
diptheria_data.to_pickle("Data/New/Diptheria/Dip_Internet.pkl")

In [154]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(diptheria_data[diptheria_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            diptheria_other_data = diptheria_data[diptheria_data['Country'] == main_data['Country'][val]][diptheria_data['Year'] == main_data['Year'][val]]['Diphtheria'].values[0]
            print(diptheria_other_data, "pass: 1")
            main_data.at[val, 'Diphtheria'] = diptheria_other_data
        except IndexError:
            pass
    else:
        try:
            diptheria_other_data = diptheria_data[diptheria_data['Country'].str.startswith(main_data['Country'][val])][diptheria_data['Year'] == main_data['Year'][val]]['Diphtheria'].values[0]
            print(diptheria_other_data, "pass: 2")
            main_data.at[val, 'Diphtheria'] = diptheria_other_data
        except IndexError:
            try:
                diptheria_other_data = diptheria_data[diptheria_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][diptheria_data['Year'] == main_data['Year'][val]]['Diphtheria'].values[0]
                print(diptheria_other_data, "pass: 3")
                main_data.at[val, 'Diphtheria'] = diptheria_other_data
            except IndexError:
                pass
            

0  62.0 pass: 1
1  64.0 pass: 1
2  67.0 pass: 1
3  68.0 pass: 1
4  66.0 pass: 1
5  63.0 pass: 1
6  64.0 pass: 1
7  63.0 pass: 1
8  58.0 pass: 1
9  58.0 pass: 1
10  50.0 pass: 1
11  41.0 pass: 1
12  36.0 pass: 1
13  33.0 pass: 1
14  24.0 pass: 1
15  98.0 pass: 1
16  99.0 pass: 1
17  99.0 pass: 1
18  99.0 pass: 1
19  99.0 pass: 1
20  98.0 pass: 1
21  99.0 pass: 1
22  98.0 pass: 1
23  97.0 pass: 1
24  98.0 pass: 1
25  97.0 pass: 1
26  97.0 pass: 1
27  98.0 pass: 1
28  97.0 pass: 1
29  97.0 pass: 1
30  95.0 pass: 1
31  95.0 pass: 1
32  95.0 pass: 1
33  95.0 pass: 1
34  95.0 pass: 1
35  95.0 pass: 1
36  93.0 pass: 1
37  95.0 pass: 1
38  95.0 pass: 1
39  88.0 pass: 1
40  86.0 pass: 1
41  87.0 pass: 1
42  86.0 pass: 1
43  89.0 pass: 1
44  86.0 pass: 1
45  46  47  48  49  50  51  52  53  

  """
  if sys.path[0] == '':


54  55  56  57  58  59  60  97.0 pass: 1
61  96.0 pass: 1
62  99.0 pass: 1
63  99.0 pass: 1
64  99.0 pass: 1
65  99.0 pass: 1
66  99.0 pass: 1
67  96.0 pass: 1
68  93.0 pass: 1
69  98.0 pass: 1
70  99.0 pass: 1
71  99.0 pass: 1
72  97.0 pass: 1
73  96.0 pass: 1
74  98.0 pass: 1
75  55.0 pass: 1
76  54.0 pass: 1
77  54.0 pass: 1
78  51.0 pass: 1
79  60.0 pass: 1
80  44.0 pass: 1
81  54.0 pass: 1
82  58.0 pass: 1
83  22.0 pass: 1
84  28.0 pass: 1
85  30.0 pass: 1
86  31.0 pass: 1
87  35.0 pass: 1
88  32.0 pass: 1
89  23.0 pass: 1
90  99.0 pass: 1
91  99.0 pass: 1
92  98.0 pass: 1
93  99.0 pass: 1
94  98.0 pass: 1
95  99.0 pass: 1
96  99.0 pass: 1
97  99.0 pass: 1
98  99.0 pass: 1
99  99.0 pass: 1
100  97.0 pass: 1
101  99.0 pass: 1
102  98.0 pass: 1
103  97.0 pass: 1
104  95.0 pass: 1
105  94.0 pass: 1
106  94.0 pass: 1
107  91.0 pass: 1
108  91.0 pass: 1
109  94.0 pass: 1
110  94.0 pass: 1
111  93.0 pass: 1
112  91.0 pass: 1
113  91.0 pass: 1
114  98.0 pass: 1
115  98.0 pass: 1
116  96.

585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600  95.0 pass: 1
601  91.0 pass: 1
602  90.0 pass: 1
603  94.0 pass: 1
604  92.0 pass: 1
605  94.0 pass: 1
606  95.0 pass: 1
607  96.0 pass: 1
608  94.0 pass: 1
609  91.0 pass: 1
610  93.0 pass: 1
611  96.0 pass: 1
612  97.0 pass: 1
613  97.0 pass: 1
614  91.0 pass: 1
615  99.0 pass: 1
616  99.0 pass: 1
617  99.0 pass: 1
618  99.0 pass: 1
619  99.0 pass: 1
620  99.0 pass: 1
621  97.0 pass: 1
622  93.0 pass: 1
623  93.0 pass: 1
624  87.0 pass: 1
625  87.0 pass: 1
626  86.0 pass: 1
627  86.0 pass: 1
628  86.0 pass: 1
629  85.0 pass: 1
630  90.0 pass: 1
631  91.0 pass: 1
632  91.0 pass: 1
633  85.0 pass: 1
634  88.0 pass: 1
635  92.0 pass: 1
636  92.0 pass: 1
637  93.0 pass: 1
638  93.0 pass: 1
639  93.0 pass: 1
640  89.0 pass: 1
641  92.0 pass: 1
642  81.0 pass: 1
643  80.0 pass: 1
644  79.0 pass: 1
645  87.0 pass: 1
646  87.0 pass: 1
647  86.0 pass: 1
648  83.0 pass: 1
649  74.0 pass: 1
650  83.0 pass: 1
651  81

1149  96.0 pass: 1
1150  95.0 pass: 1
1151  94.0 pass: 1
1152  92.0 pass: 1
1153  91.0 pass: 1
1154  89.0 pass: 1
1155  1156  1157  1158  1159  1160  1161  1162  1163  1164  1165  1166  1167  1168  1169  1170  97.0 pass: 1
1171  97.0 pass: 1
1172  97.0 pass: 1
1173  95.0 pass: 1
1174  97.0 pass: 1
1175  99.0 pass: 1
1176  99.0 pass: 1
1177  96.0 pass: 1
1178  91.0 pass: 1
1179  99.0 pass: 1
1180  83.0 pass: 1
1181  97.0 pass: 1
1182  98.0 pass: 1
1183  96.0 pass: 1
1184  97.0 pass: 1
1185  1186  1187  1188  1189  1190  1191  1192  1193  1194  1195  1196  1197  1198  1199  1200  73.0 pass: 1
1201  85.0 pass: 1
1202  96.0 pass: 1
1203  88.0 pass: 1
1204  94.0 pass: 1
1205  92.0 pass: 1
1206  95.0 pass: 1
1207  85.0 pass: 1
1208  89.0 pass: 1
1209  87.0 pass: 1
1210  87.0 pass: 1
1211  84.0 pass: 1
1212  82.0 pass: 1
1213  77.0 pass: 1
1214  81.0 pass: 1
1215  34.0 pass: 1
1216  44.0 pass: 1
1217  53.0 pass: 1
1218  63.0 pass: 1
1219  64.0 pass: 1
1220  57.0 pass: 1
1221  60.0 pass: 1
122

1674  89.0 pass: 1
1675  90.0 pass: 1
1676  90.0 pass: 1
1677  84.0 pass: 1
1678  78.0 pass: 1
1679  83.0 pass: 1
1680  50.0 pass: 1
1681  76.0 pass: 1
1682  80.0 pass: 1
1683  77.0 pass: 1
1684  70.0 pass: 1
1685  81.0 pass: 1
1686  75.0 pass: 1
1687  65.0 pass: 1
1688  60.0 pass: 1
1689  60.0 pass: 1
1690  31.0 pass: 1
1691  35.0 pass: 1
1692  39.0 pass: 1
1693  42.0 pass: 1
1694  46.0 pass: 1
1695  94.0 pass: 1
1696  96.0 pass: 1
1697  98.0 pass: 1
1698  98.0 pass: 1
1699  98.0 pass: 1
1700  98.0 pass: 1
1701  98.0 pass: 1
1702  98.0 pass: 1
1703  98.0 pass: 1
1704  98.0 pass: 1
1705  97.0 pass: 1
1706  95.0 pass: 1
1707  93.0 pass: 1
1708  94.0 pass: 1
1709  94.0 pass: 1
1710  1711  1712  1713  1714  1715  1716  1717  1718  1719  1720  1721  1722  1723  1724  1725  93.0 pass: 1
1726  93.0 pass: 1
1727  93.0 pass: 1
1728  92.0 pass: 1
1729  95.0 pass: 1
1730  98.0 pass: 1
1731  96.0 pass: 1
1732  95.0 pass: 1
1733  94.0 pass: 1
1734  94.0 pass: 1
1735  94.0 pass: 1
1736  94.0 pass: 

2175  2176  2177  2178  2179  2180  2181  2182  2183  2184  2185  2186  2187  2188  2189  2190  2191  2192  2193  2194  2195  2196  2197  2198  2199  2200  2201  2202  2203  2204  2205  93.0 pass: 1
2206  94.0 pass: 1
2207  95.0 pass: 1
2208  94.0 pass: 1
2209  93.0 pass: 1
2210  94.0 pass: 1
2211  94.0 pass: 1
2212  93.0 pass: 1
2213  94.0 pass: 1
2214  91.0 pass: 1
2215  92.0 pass: 1
2216  92.0 pass: 1
2217  93.0 pass: 1
2218  91.0 pass: 1
2219  90.0 pass: 1
2220  99.0 pass: 1
2221  98.0 pass: 1
2222  98.0 pass: 1
2223  99.0 pass: 1
2224  99.0 pass: 1
2225  98.0 pass: 1
2226  99.0 pass: 1
2227  99.0 pass: 1
2228  98.0 pass: 1
2229  99.0 pass: 1
2230  99.0 pass: 1
2231  99.0 pass: 1
2232  99.0 pass: 1
2233  99.0 pass: 1
2234  99.0 pass: 1
2235  69.0 pass: 1
2236  65.0 pass: 1
2237  64.0 pass: 1
2238  63.0 pass: 1
2239  52.0 pass: 1
2240  52.0 pass: 1
2241  53.0 pass: 1
2242  54.0 pass: 1
2243  59.0 pass: 1
2244  63.0 pass: 1
2245  68.0 pass: 1
2246  66.0 pass: 1
2247  63.0 pass: 1
224

2693  98.0 pass: 1
2694  96.0 pass: 1
2695  97.0 pass: 1
2696  98.0 pass: 1
2697  98.0 pass: 1
2698  96.0 pass: 1
2699  95.0 pass: 1
2700  99.0 pass: 1
2701  99.0 pass: 1
2702  99.0 pass: 1
2703  99.0 pass: 1
2704  99.0 pass: 1
2705  97.0 pass: 1
2706  98.0 pass: 1
2707  98.0 pass: 1
2708  98.0 pass: 1
2709  99.0 pass: 1
2710  97.0 pass: 1
2711  99.0 pass: 1
2712  98.0 pass: 1
2713  98.0 pass: 1
2714  99.0 pass: 1
2715  2716  2717  2718  2719  2720  2721  2722  2723  2724  2725  2726  2727  2728  2729  2730  2731  2732  2733  2734  2735  2736  2737  2738  2739  2740  2741  2742  2743  2744  2745  2746  2747  2748  2749  2750  2751  2752  2753  2754  2755  2756  2757  2758  2759  2760  2761  2762  2763  2764  2765  2766  2767  2768  2769  2770  2771  2772  2773  2774  2775  94.0 pass: 1
2776  93.0 pass: 1
2777  92.0 pass: 1
2778  93.0 pass: 1
2779  90.0 pass: 1
2780  81.0 pass: 1
2781  86.0 pass: 1
2782  84.0 pass: 1
2783  78.0 pass: 1
2784  78.0 pass: 1
2785  74.0 pass: 1
2786  69.0 pa

3217  79.0 pass: 3
3218  78.0 pass: 3
3219  79.0 pass: 3
3220  72.0 pass: 3
3221  61.0 pass: 3
3222  65.0 pass: 3
3223  73.0 pass: 3
3224  74.0 pass: 3
3225  86.0 pass: 1
3226  79.0 pass: 1
3227  78.0 pass: 1
3228  81.0 pass: 1
3229  83.0 pass: 1
3230  94.0 pass: 1
3231  87.0 pass: 1
3232  80.0 pass: 1
3233  81.0 pass: 1
3234  82.0 pass: 1
3235  83.0 pass: 1
3236  83.0 pass: 1
3237  84.0 pass: 1
3238  85.0 pass: 1
3239  85.0 pass: 1
3240  91.0 pass: 1
3241  95.0 pass: 1
3242  95.0 pass: 1
3243  93.0 pass: 1
3244  89.0 pass: 1
3245  73.0 pass: 1
3246  75.0 pass: 1
3247  73.0 pass: 1
3248  70.0 pass: 1
3249  68.0 pass: 1
3250  65.0 pass: 1
3251  68.0 pass: 1
3252  71.0 pass: 1
3253  75.0 pass: 1
3254  78.0 pass: 1


In [157]:
main_data[['Country','Year','Diphtheria']]
main_data.iloc[[3151]]

Unnamed: 0,Country,Year,Status,Life_Expectancy,Adult_Mortality,Infant_Deaths,Alcohol,Percentage_Expenditure,Measles,BMI,...,Polio,Total_Expenditure,Diphtheria,HIV/AIDS,GDP,Population,Thinness_1-19_years,Thinness_5-9_years,Income_Composition_Of_Resources,Schooling
3151,"Venezuela, RB",2013,,,,,,,0,,...,,6.38,82,,12237.2,30317848.0,,,,
