In [2]:
import numpy as np
import pandas as pd
import wbdata
from sklearn.preprocessing import Imputer

In [3]:
columns = ['Country', 'Year', 'Status', 'Life_Expectancy', 
           'Adult_Mortality', 'Infant_Deaths', 'Alcohol', 
           'Percentage_Expenditure', 'Measles', 'BMI', 
           'Under-Five_Deaths ', 'Polio', 'Total_Expenditure', 
           'Diphtheria', 'HIV/AIDS', 'GDP', 'Population',
           'Thinness_10-19_years', 'Thinness_5-9_years',
           'Income_Composition_Of_Resources', 'Schooling']
main_data = pd.DataFrame(columns=columns)

In [4]:
main_data

Unnamed: 0,Country,Year,Status,Life_Expectancy,Adult_Mortality,Infant_Deaths,Alcohol,Percentage_Expenditure,Measles,BMI,...,Polio,Total_Expenditure,Diphtheria,HIV/AIDS,GDP,Population,Thinness_10-19_years,Thinness_5-9_years,Income_Composition_Of_Resources,Schooling


## Population Data

In [5]:
#wbdata.search_indicators('Population')

In [6]:
#set up the indicator I want (just build up the dict if you want more than one)
indicators = {'SP.POP.TOTL':'Population'}

In [7]:
#grab indicators above for all countries and load into data frame
pop_df_in = wbdata.get_dataframe(indicators, convert_date=False)

In [8]:
pop_df_in.to_csv("Data/New/Population/Population_Internet.csv", encoding='utf-8')

In [9]:
pop_df_get = pd.read_csv("Data/New/Population/Population_Internet.csv")
pop_df = pop_df_get.rename(index = str, columns={'date':'Year', 'country':'Country'})
len(pop_df['Country'].unique())

264

In [10]:
# Removing continents and other trivial rows
pop_df.drop(pop_df.index[:2773], inplace=True)
len(pop_df['Country'].unique())

217

In [11]:
# Year: 2000 - 2014
pop_df = pop_df[(lambda x: x['Year'].isin(range(2000,2015)))]

In [12]:
pop_df.reset_index(drop=True, inplace=True)

In [13]:
pop_df.head()

Unnamed: 0,Country,Year,Population
0,Afghanistan,2014,32758020.0
1,Afghanistan,2013,31731688.0
2,Afghanistan,2012,30696958.0
3,Afghanistan,2011,29708599.0
4,Afghanistan,2010,28803167.0


In [14]:
# Fixing Missing Values
pop_df.at[915, 'Population'] = 5054634
pop_df.at[916, 'Population'] = 4945529
pop_df.at[917, 'Population'] = 4840901

In [15]:
main_data[['Country', 'Year', 'Population']] = pop_df[['Country', 'Year', 'Population']]

In [16]:
main_data['Population'].isnull().sum()
#main_data.info()

0

## GDP

In [17]:
#set up the indicator I want (just build up the dict if you want more than one)
indicators = {'NY.GDP.PCAP.CD':'GDP'}

In [18]:
#grab indicators above for countires above and load into data frame
gdp_df = wbdata.get_dataframe(indicators, convert_date=False)

In [19]:
gdp_df.to_csv("Data/New/GDP/GDP_Internet.csv", encoding='utf-8')
gdp_data_get = pd.read_csv("Data/New/GDP/GDP_Internet.csv")
gdp_data = gdp_data_get.rename(index = str, columns={'date':'Year', 'country':'Country'})
#gdp_data[gdp_data['Country'] == "South Sudan"]

In [20]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(gdp_data[gdp_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            gdp_other_data = gdp_data[gdp_data['Country'] == main_data['Country'][val]][gdp_data['Year'] == main_data['Year'][val]]['GDP'].values[0]
            print(gdp_other_data, "pass: 1")
            main_data.at[val, 'GDP'] = gdp_other_data
        except IndexError:
            pass

0  625.3395388284999 pass: 1
1  647.966460473683 pass: 1
2  648.511069587633 pass: 1
3  599.29762975711 pass: 1
4  550.514973976336 pass: 1
5  444.18440407545 pass: 1
6  370.382293977374 pass: 1
7  366.230443242864 pass: 1
8  269.22969301812304 pass: 1
9  247.664139959638 pass: 1
10  216.708128851229 pass: 1
11  195.77663034155103 pass: 1
12  184.49471212204898 pass: 1
13  nan pass: 1
14  nan pass: 1
15  4578.6679344615895 pass: 1
16  4413.08288688408 pass: 1
17  4247.614342362921 pass: 1
18  4437.177794486521 pass: 1
19  4094.36020359235 pass: 1
20  4114.13489916342 pass: 1
21  4370.5399247769 pass: 1
22  3595.0380568289293 pass: 1
23  2972.74292399799 pass: 1
24  2673.78658429559 pass: 1
25  2373.58129170055 pass: 1
26  1846.12012081207 pass: 1
27  1425.12421860142 pass: 1
28  1281.6598256178 pass: 1
29  1126.68334010717 pass: 1
30  5466.42577841535 pass: 1
31  5471.123388787089 pass: 1
32  5565.134521048481 pass: 1
33  5432.413319781161 pass: 1
34  4463.39467488951 pass: 1
35  3868.

  """


11660.329531051999 pass: 1
58  10375.9942154736 pass: 1
59  10352.8227618312 pass: 1
60  42294.9947269717 pass: 1
61  40619.711297779504 pass: 1
62  38391.080866978504 pass: 1
63  41098.766941722795 pass: 1
64  39736.3540626699 pass: 1
65  43339.3798746543 pass: 1
66  47785.6590856793 pass: 1
67  48582.808455086604 pass: 1
68  43748.772158899905 pass: 1
69  41282.0201219785 pass: 1
70  38503.479614485695 pass: 1
71  32776.4422698769 pass: 1
72  24741.4935704562 pass: 1
73  22228.8464928922 pass: 1
74  21936.530101470802 pass: 1
75  5412.6923476178 pass: 1
76  5258.40737644433 pass: 1
77  5102.4899693158895 pass: 1
78  4615.86747457004 pass: 1
79  3585.90555256868 pass: 1
80  3117.89694392484 pass: 1
81  4068.97845646361 pass: 1
82  3108.26864317894 pass: 1
83  2585.1335220492897 pass: 1
84  1890.8497268156898 pass: 1
85  1248.40490589113 pass: 1
86  978.5389338872909 pass: 1
87  869.851485021864 pass: 1
88  526.168742996854 pass: 1
89  555.2969418633339 pass: 1
90  12900.9030032308 pas

340  1097.45708762103 pass: 1
341  997.7417140733031 pass: 1
342  885.638224530948 pass: 1
343  807.938767268667 pass: 1
344  765.8632357656951 pass: 1
345  3124.00030982516 pass: 1
346  2947.9385262805104 pass: 1
347  2645.2277526014605 pass: 1
348  2377.68877123935 pass: 1
349  1981.1701161847602 pass: 1
350  1776.86647562189 pass: 1
351  1736.93008403587 pass: 1
352  1389.62934991949 pass: 1
353  1233.59186917076 pass: 1
354  1046.4273841431698 pass: 1
355  978.334648481852 pass: 1
356  917.3643104861699 pass: 1
357  913.575642272268 pass: 1
358  958.236652408901 pass: 1
359  1007.0028688845099 pass: 1
360  5204.24371841169 pass: 1
361  5042.58221304612 pass: 1
362  4722.013403067271 pass: 1
363  5054.32534423757 pass: 1
364  4614.8290411405 pass: 1
365  4701.33442641347 pass: 1
366  5078.31457720914 pass: 1
367  4180.913531981741 pass: 1
368  3403.8152971587297 pass: 1
369  2967.8340564335604 pass: 1
370  2802.27491705489 pass: 1
371  2214.73224591872 pass: 1
372  1761.537647188610

666  327.767654268638 pass: 1
667  286.507537174865 pass: 1
668  255.591313804132 pass: 1
669  218.52350917279801 pass: 1
670  194.166786345182 pass: 1
671  173.916351831834 pass: 1
672  175.13607148129398 pass: 1
673  153.69957329168003 pass: 1
674  405.469653093749 pass: 1
675  2910.52022576864 pass: 1
676  2964.5729890305797 pass: 1
677  2952.05234806594 pass: 1
678  3196.6475237082395 pass: 1
679  2737.34233022991 pass: 1
680  2255.33311863864 pass: 1
681  2881.59411605216 pass: 1
682  2111.20948856339 pass: 1
683  2012.1100704730602 pass: 1
684  1637.06438124572 pass: 1
685  1289.33783690965 pass: 1
686  998.101311670601 pass: 1
687  886.361665299432 pass: 1
688  842.7090677443041 pass: 1
689  998.196856210498 pass: 1
690  10630.9979007564 pass: 1
691  10569.6663143709 pass: 1
692  9985.36958968047 pass: 1
693  9186.59638994181 pass: 1
694  8199.41462069479 pass: 1
695  6809.396223663871 pass: 1
696  6911.1361524468 pass: 1
697  6120.62339974311 pass: 1
698  5245.18737330548 pass:

944  4070.0328269871397 pass: 1
945  3379.8964990926097 pass: 1
946  3587.0003161096297 pass: 1
947  3864.76043619193 pass: 1
948  3934.2733728356798 pass: 1
949  3690.23922847955 pass: 1
950  3032.5170490818605 pass: 1
951  2842.43853009194 pass: 1
952  3047.48803739736 pass: 1
953  2937.3607433106604 pass: 1
954  2873.86209077307 pass: 1
955  2529.63353509293 pass: 1
956  2020.99399397689 pass: 1
957  1324.99618408854 pass: 1
958  1437.6349079611903 pass: 1
959  1637.4500720233898 pass: 1
960  571.162275886001 pass: 1
961  502.153588732071 pass: 1
962  468.506724907536 pass: 1
963  354.846354368724 pass: 1
964  341.309909200504 pass: 1
965  379.756638540888 pass: 1
966  325.382554265056 pass: 1
967  243.30268224694598 pass: 1
968  193.79490564251597 pass: 1
969  161.626624773078 pass: 1
970  135.762385796471 pass: 1
971  118.87344658163201 pass: 1
972  111.363435555958 pass: 1
973  120.17892791115 pass: 1
974  123.876205729993 pass: 1
975  57543.6690591516 pass: 1
976  53612.82500387

1264  662.279518162433 pass: 1
1265  668.297604584601 pass: 1
1266  674.756414674589 pass: 1
1267  615.8202308278791 pass: 1
1268  505.470620633981 pass: 1
1269  465.310386508996 pass: 1
1270  387.942891069774 pass: 1
1271  329.782094609998 pass: 1
1272  393.016060861473 pass: 1
1273  413.737737605203 pass: 1
1274  462.481438106584 pass: 1
1275  2242.70745940423 pass: 1
1276  2136.7717179207502 pass: 1
1277  2178.38855524013 pass: 1
1278  2120.5894686048 pass: 1
1279  1932.8582899968899 pass: 1
1280  1815.48949644078 pass: 1
1281  1751.59585650768 pass: 1
1282  1592.5721816707598 pass: 1
1283  1437.62878537462 pass: 1
1284  1311.74171442571 pass: 1
1285  1217.6579606610499 pass: 1
1286  1157.30427040444 pass: 1
1287  1132.87200087771 pass: 1
1288  1130.4050460226802 pass: 1
1289  1088.7798688454302 pass: 1
1290  40315.2855640552 pass: 1
1291  38403.7777145477 pass: 1
1292  36730.8767001128 pass: 1
1293  35142.4879344543 pass: 1
1294  32549.998231121 pass: 1
1295  30697.340383517003 pas

1588  1490.3610325022598 pass: 1
1589  1087.76240120887 pass: 1
1590  42996.3152212942 pass: 1
1591  48399.8078177184 pass: 1
1592  51264.0713358672 pass: 1
1593  48268.591204554294 pass: 1
1594  38497.6169579293 pass: 1
1595  37567.301210596204 pass: 1
1596  55571.9980595751 pass: 1
1597  45793.976143914806 pass: 1
1598  42717.557253307 pass: 1
1599  35490.2613236708 pass: 1
1600  26921.0827767441 pass: 1
1601  22071.5696085573 pass: 1
1602  17789.419812619 pass: 1
1603  16540.9676143149 pass: 1
1604  18389.384328288997 pass: 1
1605  1279.7697826598599 pass: 1
1606  1282.43716202467 pass: 1
1607  1177.9747348784801 pass: 1
1608  1123.8831680627 pass: 1
1609  880.037775119109 pass: 1
1610  871.224389337854 pass: 1
1611  966.393627185372 pass: 1
1612  721.768690838857 pass: 1
1613  543.110702403072 pass: 1
1614  476.552129968975 pass: 1
1615  433.234976590867 pass: 1
1616  380.50733975156 pass: 1
1617  321.726606346204 pass: 1
1618  308.40877673732797 pass: 1
1619  279.620424920086 pass

1933  2240.02146223114 pass: 1
1934  2170.9202099933 pass: 1
1935  2244.75530830835 pass: 1
1936  2243.99017795909 pass: 1
1937  2046.5367866510899 pass: 1
1938  1970.5713725333603 pass: 1
1939  1631.5358317953198 pass: 1
1940  1525.52611882763 pass: 1
1941  1695.97281114375 pass: 1
1942  1230.4346847203199 pass: 1
1943  950.648204362747 pass: 1
1944  831.2052862105741 pass: 1
1945  720.940932072135 pass: 1
1946  548.2896866905011 pass: 1
1947  458.67781683631205 pass: 1
1948  407.73484454911596 pass: 1
1949  354.003798172847 pass: 1
1950  185152.527227439 pass: 1
1951  172588.877782501 pass: 1
1952  152000.362069501 pass: 1
1953  162155.498619113 pass: 1
1954  144569.175786448 pass: 1
1955  149221.361937107 pass: 1
1956  180640.12511482698 pass: 1
1957  167124.740985109 pass: 1
1958  133195.429339283 pass: 1
1959  124374.268480984 pass: 1
1960  123382.01583526301 pass: 1
1961  108978.48885968301 pass: 1
1962  89061.0513003609 pass: 1
1963  82552.5667888765 pass: 1
1964  82534.87362320

2228  14408.050496276499 pass: 1
2229  12376.9793587916 pass: 1
2230  10129.2888628425 pass: 1
2231  9069.810206054659 pass: 1
2232  8629.10177414089 pass: 1
2233  8476.610510989209 pass: 1
2234  8601.18590579376 pass: 1
2235  1316.9809658343402 pass: 1
2236  1272.44106100063 pass: 1
2237  1261.2089674342199 pass: 1
2238  1226.21531390333 pass: 1
2239  1040.14226787636 pass: 1
2240  1006.60399914224 pass: 1
2241  1039.31208844269 pass: 1
2242  950.432793144075 pass: 1
2243  873.77027271847 pass: 1
2244  711.469946269738 pass: 1
2245  649.804823293709 pass: 1
2246  563.5943419658261 pass: 1
2247  499.860003292353 pass: 1
2248  510.656815730852 pass: 1
2249  533.862411433568 pass: 1
2250  11646.681520811599 pass: 1
2251  10771.782982791601 pass: 1
2252  10340.5193178534 pass: 1
2253  9380.8243118598 pass: 1
2254  8933.219345383492 pass: 1
2255  8981.62914167732 pass: 1
2256  9732.0199723156 pass: 1
2257  9600.00497116723 pass: 1
2258  9396.247251649009 pass: 1
2259  9277.46910479252 pass

2543  12014.3998729502 pass: 1
2544  11092.5107357824 pass: 1
2545  10176.6587120064 pass: 1
2546  8524.96123557779 pass: 1
2547  8331.2619970042 pass: 1
2548  7663.13708026446 pass: 1
2549  7578.85105298846 pass: 1
2550  708.439475708292 pass: 1
2551  710.8186998434129 pass: 1
2552  561.898423858481 pass: 1
2553  445.052005000457 pass: 1
2554  399.15436760830704 pass: 1
2555  388.87460213734 pass: 1
2556  406.375917792695 pass: 1
2557  358.82747162132995 pass: 1
2558  322.313468011784 pass: 1
2559  291.690317491767 pass: 1
2560  266.29004583555303 pass: 1
2561  266.525052882889 pass: 1
2562  252.83153276619598 pass: 1
2563  230.097887300759 pass: 1
2564  139.314773381037 pass: 1
2565  56957.0785737742 pass: 1
2566  56389.181784025095 pass: 1
2567  54715.6948277418 pass: 1
2568  53237.5649111969 pass: 1
2569  46569.6795060716 pass: 1
2570  38577.5582156126 pass: 1
2571  39721.0481819946 pass: 1
2572  39223.5818748702 pass: 1
2573  33579.8594757016 pass: 1
2574  29869.853980544598 pass:

2872  528.063562048951 pass: 1
2873  471.527376124859 pass: 1
2874  442.25285430836 pass: 1
2875  345.18923695172305 pass: 1
2876  323.072106590997 pass: 1
2877  308.056552814055 pass: 1
2878  304.339284043007 pass: 1
2879  306.72382730792197 pass: 1
2880  5953.793927833621 pass: 1
2881  6168.39488117145 pass: 1
2882  5859.887644117641 pass: 1
2883  5491.15998143099 pass: 1
2884  5075.30217639529 pass: 1
2885  4212.05490158881 pass: 1
2886  4378.68740595675 pass: 1
2887  3972.2064753650398 pass: 1
2888  3368.9525761438103 pass: 1
2889  2893.6513555653996 pass: 1
2890  2659.8391084852396 pass: 1
2891  2358.93062926799 pass: 1
2892  2096.05461742145 pass: 1
2893  1893.1453765768 pass: 1
2894  2007.56482637453 pass: 1
2895  3335.54856721641 pass: 1
2896  4770.315932743761 pass: 1
2897  5766.96981223417 pass: 1
2898  5021.462223922979 pass: 1
2899  3603.9405510679208 pass: 1
2900  2929.98028426193 pass: 1
2901  4073.2392798508504 pass: 1
2902  2705.2329026181897 pass: 1
2903  2534.66218967

3208  1335.55319529359 pass: 1
3209  1476.17185000238 pass: 1
3210  1647.03365674887 pass: 1
3211  1580.1816006295498 pass: 1
3212  1421.1715746938698 pass: 1
3213  1349.42024978639 pass: 1
3214  1309.23195973585 pass: 1
3215  1093.81291773535 pass: 1
3216  1203.72073300004 pass: 1
3217  995.3533205587449 pass: 1
3218  900.826916094261 pass: 1
3219  813.603661238484 pass: 1
3220  693.0481356484739 pass: 1
3221  605.174937228709 pass: 1
3222  565.279713864571 pass: 1
3223  536.241854382259 pass: 1
3224  540.004737395739 pass: 1
3225  1738.08820161939 pass: 1
3226  1850.7933594391898 pass: 1
3227  1734.93061223334 pass: 1
3228  1644.61967241117 pass: 1
3229  1463.21357310715 pass: 1
3230  1139.11023298085 pass: 1
3231  1369.0682487096901 pass: 1
3232  1104.58798487761 pass: 1
3233  1030.15419934654 pass: 1
3234  691.317816426353 pass: 1
3235  530.2772217177829 pass: 1
3236  429.15834335486 pass: 1
3237  377.13052444117204 pass: 1
3238  378.27362379123497 pass: 1
3239  341.905562491998 pa

In [21]:
main_data[main_data['GDP'].isnull()]['Country'].unique()

array(['Afghanistan', 'American Samoa', 'Bermuda',
       'British Virgin Islands', 'Cayman Islands', 'Channel Islands',
       'Curacao', 'Eritrea', 'French Polynesia', 'Gibraltar', 'Guam',
       'Iraq', 'Korea, Dem. People���s Rep.', 'Nauru', 'New Caledonia',
       'Northern Mariana Islands', 'Sao Tome and Principe',
       'Sint Maarten (Dutch part)', 'Somalia', 'South Sudan',
       'St. Martin (French part)', 'Syrian Arab Republic',
       'Turks and Caicos Islands', 'Virgin Islands (U.S.)'], dtype=object)

In [22]:
#main_data[main_data['GDP'].isnull()][main_data['Country'] == "Afghanistan"][['Country','Year','GDP']]

In [23]:
main_data[main_data['Country'] == "South Sudan"][['Country', 'Year', 'GDP']]
len(main_data['Country'].unique())

217

In [24]:
# Dropping "South Sudan" - Many missing values
# main_data.drop(main_data[main_data['Country'] == "South Sudan"].index, inplace=True)

In [25]:
#af_test = main_data[main_data['Country'] == "Afghanistan"]
# len(main_data['Country'].unique())

In [26]:
# DEALING WITH MISSING VALUES
# imp=Imputer(missing_values="NaN", strategy="most_frequent" )
# imp.fit(af_test[["GDP"]])
# af_test["GDP"]=imp.transform(af_test[["GDP"]]).ravel()

In [27]:
# af_test["GDP"]

## Total Expenditure

In [28]:
totalex = pd.read_csv("Data/New/Total_Expenditure/data_internet.csv")

In [29]:
count = 0
totalex_data = pd.DataFrame(columns=['Country', 'Year', 'Total_Expenditure'])
for i in range(0, len(totalex['Country'].unique())):
    for j in range(1, len(totalex.columns)):
        #print(totalex.loc[i,'Country'], end='')
        totalex_data.at[count, 'Country'] = totalex.loc[i,'Country']
        totalex_data.at[count, 'Year'] = totalex.columns[j]
       # print(totalex.columns[j])
        totalex_data.at[count, 'Total_Expenditure'] = totalex[totalex['Country'] == totalex['Country'][i]].T[1:].loc[totalex.columns[j]].values[0]
        count+=1

In [30]:
# Dropping "South Sudan" - Many missing values
# totalex_data.drop(totalex_data[totalex_data['Country'] == "South Sudan"].index, inplace=True)

In [31]:
totalex_data['Year'] = totalex_data['Year'].astype(np.int64)
#totalex_data.info()

In [32]:
totalex_data.to_pickle("Data/New/Total_Expenditure/totalex_pickle_data.pkl")

In [33]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(totalex_data[totalex_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            totalex_other_data = totalex_data[totalex_data['Country'] == main_data['Country'][val]][totalex_data['Year'] == main_data['Year'][val]]['Total_Expenditure'].values[0]
            print(totalex_other_data, "pass: 1")
            main_data.at[val, 'Total_Expenditure'] = totalex_other_data
        except IndexError:
            pass
    else:
        try:
            totalex_other_data = totalex_data[totalex_data['Country'].str.startswith(main_data['Country'][val])][totalex_data['Year'] == main_data['Year'][val]]['Total_Expenditure'].values[0]
            print(totalex_other_data, "pass: 2")
            main_data.at[val, 'Total_Expenditure'] = totalex_other_data
        except IndexError:
            try:
                totalex_other_data = totalex_data[totalex_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][totalex_data['Year'] == main_data['Year'][val]]['Total_Expenditure'].values[0]
                print(totalex_other_data, "pass: 3")
                main_data.at[val, 'Total_Expenditure'] = totalex_other_data
            except IndexError:
                pass
        

0  12.0 pass: 1
1  10.59 pass: 1
2  11.67 pass: 1
3  10.17 pass: 1
4  14.4 pass: 1
5  12.73 pass: 1
6  6.93 pass: 1
7  2.95 pass: 1
8  6.3 pass: 1
9  5.49 pass: 1
10  6.98 pass: 1
11  5.58 pass: 1
12  8.99 pass: 1
13  nan pass: 1
14  nan pass: 1
15  9.37 pass: 1
16  9.76 pass: 1
17  9.71 pass: 1
18  9.85 pass: 1
19  8.46 pass: 1
20  8.46 pass: 1
21  8.37 pass: 1
22  8.88 pass: 1
23  9.04 pass: 1
24  9.67 pass: 1
25  9.18 pass: 1
26  7.76 pass: 1
27  7.19 pass: 1
28  7.25 pass: 1
29  7.08 pass: 1
30  9.9 pass: 1
31  9.95 pass: 1
32  9.99 pass: 1
33  9.37 pass: 1
34  9.65 pass: 1
35  9.01 pass: 1
36  8.07 pass: 1
37  8.14 pass: 1
38  7.8 pass: 1
39  8.19 pass: 1
40  8.48 pass: 1
41  8.5 pass: 1
42  8.37 pass: 1
43  8.54 pass: 1
44  8.79 pass: 1
45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  27.92 pass: 1
61  22.32 pass: 1
62  18.88 pass: 1
63  19.31

  """
  if sys.path[0] == '':


 pass: 1
64  24.04 pass: 1
65  25.1 pass: 1
66  24.04 pass: 1
67  22.85 pass: 1
68  19.38 pass: 1
69  19.5 pass: 1
70  22.7 pass: 1
71  21.98 pass: 1
72  19.99 pass: 1
73  19.24 pass: 1
74  19.08 pass: 1
75  5.0 pass: 1
76  7.42 pass: 1
77  5.57 pass: 1
78  5.58 pass: 1
79  5.43 pass: 1
80  6.36 pass: 1
81  6.46 pass: 1
82  6.42 pass: 1
83  9.56 pass: 1
84  6.08 pass: 1
85  6.25 pass: 1
86  4.61 pass: 1
87  3.38 pass: 1
88  5.77 pass: 1
89  2.86 pass: 1
90  18.08 pass: 1
91  16.29 pass: 1
92  17.16 pass: 1
93  15.18 pass: 1
94  16.36 pass: 1
95  11.38 pass: 1
96  12.27 pass: 1
97  10.81 pass: 1
98  10.07 pass: 1
99  12.08 pass: 1
100  10.69 pass: 1
101  11.89 pass: 1
102  9.88 pass: 1
103  10.75 pass: 1
104  11.42 pass: 1
105  6.92 pass: 1
106  7.72 pass: 1
107  8.74 pass: 1
108  11.82 pass: 1
109  14.07 pass: 1
110  17.13 pass: 1
111  15.79 pass: 1
112  15.37 pass: 1
113  16.35 pass: 1
114  16.67 pass: 1
115  16.92 pass: 1
116  17.27 pass: 1
117  18.23 pass: 1
118  17.14 pass: 1
119  

585  586  587  588  589  590  591  592  593  594  595  596  597  598  599  600  15.88 pass: 1
601  15.32 pass: 1
602  14.65 pass: 1
603  14.34 pass: 1
604  13.8 pass: 1
605  14.06 pass: 1
606  13.56 pass: 1
607  13.72 pass: 1
608  13.45 pass: 1
609  12.54 pass: 1
610  12.67 pass: 1
611  12.02 pass: 1
612  15.14 pass: 1
613  14.96 pass: 1
614  14.5 pass: 1
615  10.43 pass: 1
616  10.28 pass: 1
617  10.49 pass: 1
618  10.62 pass: 1
619  10.24 pass: 1
620  10.31 pass: 1
621  10.13 pass: 1
622  11.06 pass: 1
623  9.94 pass: 1
624  9.79 pass: 1
625  10.0 pass: 1
626  9.54 pass: 1
627  9.26 pass: 1
628  9.29 pass: 1
629  10.82 pass: 1
630  18.14 pass: 1
631  18.14 pass: 1
632  18.87 pass: 1
633  20.55 pass: 1
634  18.1 pass: 1
635  18.29 pass: 1
636  18.0 pass: 1
637  16.7 pass: 1
638  16.44 pass: 1
639  15.71 pass: 1
640  15.38 pass: 1
641  18.6 pass: 1
642  16.72 pass: 1
643  16.88 pass: 1
644  19.3 pass: 1
645  8.66 pass: 1
646  7.38 pass: 1
647  9.94 pass: 1
648  7.19 pass: 1
649  6.15 p

1125  1126  1127  1128  1129  1130  1131  1132  1133  1134  1135  1136  1137  1138  1139  1140  9.98 pass: 1
1141  9.98 pass: 1
1142  11.37 pass: 1
1143  12.32 pass: 1
1144  11.85 pass: 1
1145  12.54 pass: 1
1146  11.51 pass: 1
1147  12.06 pass: 1
1148  12.84 pass: 1
1149  12.35 pass: 1
1150  10.31 pass: 1
1151  11.05 pass: 1
1152  11.04 pass: 1
1153  11.19 pass: 1
1154  9.83 pass: 1
1155  1156  1157  1158  1159  1160  1161  1162  1163  1164  1165  1166  1167  1168  1169  1170  9.21 pass: 1
1171  9.81 pass: 1
1172  11.42 pass: 1
1173  10.56 pass: 1
1174  10.98 pass: 1
1175  10.98 pass: 1
1176  9.5 pass: 1
1177  10.3 pass: 1
1178  9.79 pass: 1
1179  10.77 pass: 1
1180  10.73 pass: 1
1181  10.35 pass: 1
1182  9.7 pass: 1
1183  15.06 pass: 1
1184  13.22 pass: 1
1185  1186  1187  1188  1189  1190  1191  1192  1193  1194  1195  1196  1197  1198  1199  1200  17.83 pass: 1
1201  16.86 pass: 1
1202  14.69 pass: 1
1203  14.36 pass: 1
1204  15.69 pass: 1
1205  16.9 pass: 1
1206  16.09 pass: 1
12

1710  1711  1712  1713  1714  1715  1716  1717  1718  1719  1720  1721  1722  1723  1724  1725  13.36 pass: 1
1726  12.53 pass: 1
1727  12.06 pass: 1
1728  12.25 pass: 1
1729  11.88 pass: 1
1730  12.22 pass: 1
1731  12.62 pass: 1
1732  12.86 pass: 1
1733  12.6 pass: 1
1734  11.62 pass: 1
1735  11.28 pass: 1
1736  14.58 pass: 1
1737  13.55 pass: 1
1738  12.14 pass: 1
1739  11.32 pass: 1
1740  13.64 pass: 1
1741  13.64 pass: 1
1742  13.64 pass: 1
1743  14.71 pass: 1
1744  15.15 pass: 1
1745  15.55 pass: 1
1746  16.59 pass: 1
1747  16.05 pass: 1
1748  17.1 pass: 1
1749  16.24 pass: 1
1750  16.34 pass: 1
1751  15.45 pass: 1
1752  17.02 pass: 1
1753  16.35 pass: 1
1754  16.92 pass: 1
1755  1756  1757  1758  1759  1760  1761  1762  1763  1764  1765  1766  1767  1768  1769  1770  10.18 pass: 1
1771  17.22 pass: 1
1772  13.51 pass: 1
1773  17.32 pass: 1
1774  18.56 pass: 1
1775  17.12 pass: 1
1776  13.51 pass: 1
1777  14.67 pass: 1
1778  13.15 pass: 1
1779  12.28 pass: 1
1780  9.68 pass: 1
178

2214  17.66 pass: 1
2215  17.54 pass: 1
2216  17.23 pass: 1
2217  17.19 pass: 1
2218  16.49 pass: 1
2219  16.23 pass: 1
2220  6.76 pass: 1
2221  5.43 pass: 1
2222  4.77 pass: 1
2223  5.3 pass: 1
2224  6.7 pass: 1
2225  6.02 pass: 1
2226  5.41 pass: 1
2227  5.58 pass: 1
2228  5.42 pass: 1
2229  6.09 pass: 1
2230  6.24 pass: 1
2231  6.77 pass: 1
2232  6.79 pass: 1
2233  6.66 pass: 1
2234  7.03 pass: 1
2235  4.73 pass: 1
2236  4.73 pass: 1
2237  4.73 pass: 1
2238  4.72 pass: 1
2239  4.72 pass: 1
2240  4.19 pass: 1
2241  3.91 pass: 1
2242  4.1 pass: 1
2243  4.62 pass: 1
2244  4.3 pass: 1
2245  4.46 pass: 1
2246  4.03 pass: 1
2247  4.48 pass: 1
2248  3.42 pass: 1
2249  3.53 pass: 1
2250  18.11 pass: 1
2251  17.84 pass: 1
2252  16.46 pass: 1
2253  16.52 pass: 1
2254  16.91 pass: 1
2255  16.58 pass: 1
2256  15.48 pass: 1
2257  14.19 pass: 1
2258  15.06 pass: 1
2259  15.98 pass: 1
2260  15.28 pass: 1
2261  14.33 pass: 1
2262  12.57 pass: 1
2263  12.99 pass: 1
2264  11.99 pass: 1
2265  14.63 pa

2690  15.61 pass: 1
2691  15.73 pass: 1
2692  15.58 pass: 1
2693  15.66 pass: 1
2694  15.29 pass: 1
2695  14.91 pass: 1
2696  14.91 pass: 1
2697  13.28 pass: 1
2698  13.34 pass: 1
2699  13.18 pass: 1
2700  11.17 pass: 1
2701  11.52 pass: 1
2702  6.19 pass: 1
2703  6.45 pass: 1
2704  6.8 pass: 1
2705  6.18 pass: 1
2706  7.0 pass: 1
2707  7.76 pass: 1
2708  8.03 pass: 1
2709  7.76 pass: 1
2710  8.79 pass: 1
2711  7.32 pass: 1
2712  6.83 pass: 1
2713  6.4 pass: 1
2714  6.91 pass: 1
2715  2716  2717  2718  2719  2720  2721  2722  2723  2724  2725  2726  2727  2728  2729  2730  2731  2732  2733  2734  2735  2736  2737  2738  2739  2740  2741  2742  2743  2744  2745  2746  2747  2748  2749  2750  2751  2752  2753  2754  2755  2756  2757  2758  2759  2760  2761  2762  2763  2764  2765  2766  2767  2768  2769  2770  2771  2772  2773  2774  2775  11.65 pass: 1
2776  11.65 pass: 1
2777  11.65 pass: 1
2778  11.8 pass: 1
2779  11.65 pass: 1
2780  10.39 pass: 1
2781  9.96 pass: 1
2782  7.72 pass: 1

3218  4.87 pass: 3
3219  4.79 pass: 3
3220  6.11 pass: 3
3221  7.11 pass: 3
3222  7.97 pass: 3
3223  8.58 pass: 3
3224  7.95 pass: 3
3225  11.31 pass: 1
3226  11.01 pass: 1
3227  11.35 pass: 1
3228  11.13 pass: 1
3229  12.6 pass: 1
3230  12.63 pass: 1
3231  11.22 pass: 1
3232  9.47 pass: 1
3233  14.14 pass: 1
3234  17.63 pass: 1
3235  15.71 pass: 1
3236  15.33 pass: 1
3237  14.33 pass: 1
3238  12.38 pass: 1
3239  13.34 pass: 1
3240  8.49 pass: 1
3241  9.59 pass: 1
3242  9.69 pass: 1
3243  7.59 pass: 1
3244  7.47 pass: 1
3245  10.58 pass: 1
3246  6.4 pass: 1
3247  7.51 pass: 1
3248  8.26 pass: 1
3249  10.96 pass: 1
3250  nan pass: 1
3251  nan pass: 1
3252  nan pass: 1
3253  nan pass: 1
3254  nan pass: 1


In [34]:
main_data[main_data['Total_Expenditure'].isnull()]['Country'].unique()

array(['Afghanistan', 'American Samoa', 'Aruba', 'Bermuda',
       'British Virgin Islands', 'Cayman Islands', 'Channel Islands',
       "Cote d'Ivoire", 'Curacao', 'Czech Republic', 'Faroe Islands',
       'French Polynesia', 'Gibraltar', 'Greenland', 'Guam',
       'Hong Kong SAR, China', 'Iraq', 'Isle of Man',
       'Korea, Dem. People���s Rep.', 'Korea, Rep.', 'Kosovo',
       'Kyrgyz Republic', 'Lao PDR', 'Liechtenstein', 'Macao SAR, China',
       'Moldova', 'New Caledonia', 'North Macedonia',
       'Northern Mariana Islands', 'Puerto Rico',
       'Sint Maarten (Dutch part)', 'Slovak Republic', 'Somalia',
       'South Sudan', 'St. Kitts and Nevis', 'St. Lucia',
       'St. Martin (French part)', 'St. Vincent and the Grenadines',
       'Tanzania', 'Turks and Caicos Islands', 'Vietnam',
       'Virgin Islands (U.S.)', 'West Bank and Gaza', 'Zimbabwe'],
      dtype=object)

In [35]:
totalex_data[totalex_data['Country'].str.startswith("Curacao")]

Unnamed: 0,Country,Year,Total_Expenditure


In [36]:
len(main_data[main_data['Total_Expenditure'].isnull()])

622

## MEASLES

In [37]:
measles = pd.read_csv("Data/New/Measles/Measles_Internet.csv")

In [38]:
count = 0
measles_data = pd.DataFrame(columns=['Country', 'Year', 'Measles'])
for i in range(0, len(measles['Country'].unique())):
    for j in range(1, len(measles.columns)):
        #print(measles.loc[i,'Country'], end='')
        measles_data.at[count, 'Country'] = measles.loc[i,'Country']
        measles_data.at[count, 'Year'] = measles.columns[j]
       # print(measles.columns[j])
        measles_data.at[count, 'Measles'] = measles[measles['Country'] == measles['Country'][i]].T[1:].loc[measles.columns[j]].values[0]
        count+=1

In [39]:
measles_data['Year'] = measles_data['Year'].astype(np.int64)

In [40]:
measles_data.to_pickle("Data/New/Measles/measles_pickle_data.pkl")

In [41]:
#measles_data

In [42]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(measles_data[measles_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            measles_other_data = measles_data[measles_data['Country'] == main_data['Country'][val]][measles_data['Year'] == main_data['Year'][val]]['Measles'].values[0]
            print(measles_other_data, "pass: 1")
            main_data.at[val, 'Measles'] = measles_other_data
        except IndexError:
            pass
    else:
        try:
            measles_other_data = measles_data[measles_data['Country'].str.startswith(main_data['Country'][val])][measles_data['Year'] == main_data['Year'][val]]['Measles'].values[0]
            print(measles_other_data, "pass: 2")
            main_data.at[val, 'Measles'] = measles_other_data
        except IndexError:
            try:
                measles_other_data = measles_data[measles_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][measles_data['Year'] == main_data['Year'][val]]['Measles'].values[0]
                print(measles_other_data, "pass: 3")
                main_data.at[val, 'Measles'] = measles_other_data
            except IndexError:
                pass
        

0  492.0 pass: 1
1  430.0 pass: 1
2  2787.0 pass: 1
3  3013.0 pass: 1
4  1989.0 pass: 1
5  2861.0 pass: 1
6  1599.0 pass: 1
7  1141.0 pass: 1
8  1990.0 pass: 1
9  1296.0 pass: 1
10  466.0 pass: 1
11  798.0 pass: 1
12  2486.0 pass: 1
13  8762.0 pass: 1
14  6532.0 pass: 1
15  nan pass: 1
16  0.0 pass: 1
17  9.0 pass: 1
18  28.0 pass: 1
19  10.0 pass: 1
20  0.0 pass: 1
21  nan pass: 1
22  22.0 pass: 1
23  68.0 pass: 1
24  6.0 pass: 1
25  7.0 pass: 1
26  8.0 pass: 1
27  16.0 pass: 1
28  18.0 pass: 1
29  662.0 pass: 1
30  0.0 pass: 1
31  25.0 pass: 1
32  18.0 pass: 1
33  112.0 pass: 1
34  103.0 pass: 1
35  107.0 pass: 1
36  217.0 pass: 1
37  0.0 pass: 1
38  944.0 pass: 1
39  2302.0 pass: 1
40  3289.0 pass: 1
41  15374.0 pass: 1
42  5862.0 pass: 1
43  2686.0 pass: 1
44  nan pass: 1
45  46  47  48  49  50  51

  """
  if sys.path[0] == '':


  52  53  54  55  56  57  58  59  60  nan pass: 1
61  0.0 pass: 1
62  0.0 pass: 1
63  0.0 pass: 1
64  0.0 pass: 1
65  0.0 pass: 1
66  0.0 pass: 1
67  0.0 pass: 1
68  0.0 pass: 1
69  0.0 pass: 1
70  0.0 pass: 1
71  0.0 pass: 1
72  1.0 pass: 1
73  5.0 pass: 1
74  2.0 pass: 1
75  11699.0 pass: 1
76  8523.0 pass: 1
77  4458.0 pass: 1
78  1449.0 pass: 1
79  1190.0 pass: 1
80  2807.0 pass: 1
81  265.0 pass: 1
82  1014.0 pass: 1
83  765.0 pass: 1
84  258.0 pass: 1
85  29.0 pass: 1
86  1196.0 pass: 1
87  11945.0 pass: 1
88  9046.0 pass: 1
89  2219.0 pass: 1
90  0.0 pass: 1
91  0.0 pass: 1
92  0.0 pass: 1
93  0.0 pass: 1
94  0.0 pass: 1
95  0.0 pass: 1
96  0.0 pass: 1
97  0.0 pass: 1
98  0.0 pass: 1
99  0.0 pass: 1
100  0.0 pass: 1
101  0.0 pass: 1
102  0.0 pass: 1
103  0.0 pass: 1
104  0.0 pass: 1
105  1.0 pass: 1
106  0.0 pass: 1
107  2.0 pass: 1
108  3.0 pass: 1
109  17.0 pass: 1
110  3.0 pass: 1
111  0.0 pass: 1
112  0.0 pass: 1
113  0.0 pass: 1
114  0.0 pass: 1
115  0.0 pass: 1
116  0.0 pa

611  1.0 pass: 1
612  0.0 pass: 1
613  0.0 pass: 1
614  0.0 pass: 1
615  52628.0 pass: 1
616  26883.0 pass: 1
617  6183.0 pass: 1
618  9943.0 pass: 1
619  38159.0 pass: 1
620  52461.0 pass: 1
621  131441.0 pass: 1
622  109023.0 pass: 1
623  99602.0 pass: 1
624  124219.0 pass: 1
625  70549.0 pass: 1
626  71879.0 pass: 1
627  58341.0 pass: 1
628  88962.0 pass: 1
629  71093.0 pass: 1
630  0.0 pass: 1
631  1.0 pass: 1
632  1.0 pass: 1
633  6.0 pass: 1
634  0.0 pass: 1
635  0.0 pass: 1
636  0.0 pass: 1
637  0.0 pass: 1
638  0.0 pass: 1
639  0.0 pass: 1
640  0.0 pass: 1
641  0.0 pass: 1
642  139.0 pass: 1
643  3.0 pass: 1
644  1.0 pass: 1
645  0.0 pass: 1
646  0.0 pass: 1
647  1.0 pass: 1
648  3.0 pass: 1
649  0.0 pass: 1
650  1.0 pass: 1
651  0.0 pass: 1
652  0.0 pass: 1
653  85.0 pass: 1
654  912.0 pass: 1
655  0.0 pass: 1
656  0.0 pass: 1
657  0.0 pass: 1
658  nan pass: 1
659  nan pass: 1
660  71.0 pass: 3
661  124.0 pass: 3
662  260.0 pass: 3
663  315.0 pass: 3
664  4.0 pass: 3
665  1.0 

1151  nan pass: 1
1152  5.0 pass: 1
1153  12.0 pass: 1
1154  56.0 pass: 1
1155  1156  1157  1158  1159  1160  1161  1162  1163  1164  1165  1166  1167  1168  1169  1170  0.0 pass: 1
1171  0.0 pass: 1
1172  0.0 pass: 1
1173  0.0 pass: 1
1174  0.0 pass: 1
1175  0.0 pass: 1
1176  0.0 pass: 1
1177  0.0 pass: 1
1178  0.0 pass: 1
1179  0.0 pass: 1
1180  0.0 pass: 1
1181  0.0 pass: 1
1182  0.0 pass: 1
1183  0.0 pass: 1
1184  0.0 pass: 1
1185  1186  1187  1188  1189  1190  1191  1192  1193  1194  1195  1196  1197  1198  1199  1200  0.0 pass: 1
1201  0.0 pass: 1
1202  0.0 pass: 1
1203  0.0 pass: 1
1204  0.0 pass: 1
1205  0.0 pass: 1
1206  0.0 pass: 1
1207  0.0 pass: 1
1208  0.0 pass: 1
1209  0.0 pass: 1
1210  0.0 pass: 1
1211  0.0 pass: 1
1212  0.0 pass: 1
1213  0.0 pass: 1
1214  0.0 pass: 1
1215  175.0 pass: 1
1216  53.0 pass: 1
1217  6.0 pass: 1
1218  11.0 pass: 1
1219  45.0 pass: 1
1220  264.0 pass: 1
1221  89.0 pass: 1
1222  3.0 pass: 1
1223  4.0 pass: 1
1224  99.0 pass: 1
1225  10.0 pass: 

1710  1711  1712  1713  1714  1715  1716  1717  1718  1719  1720  1721  1722  1723  1724  1725  11.0 pass: 1
1726  35.0 pass: 1
1727  0.0 pass: 1
1728  7.0 pass: 1
1729  2.0 pass: 1
1730  0.0 pass: 1
1731  1.0 pass: 1
1732  0.0 pass: 1
1733  1.0 pass: 1
1734  1.0 pass: 1
1735  1.0 pass: 1
1736  1.0 pass: 1
1737  103.0 pass: 1
1738  7.0 pass: 1
1739  19.0 pass: 1
1740  nan pass: 1
1741  0.0 pass: 1
1742  2.0 pass: 1
1743  6.0 pass: 1
1744  0.0 pass: 1
1745  0.0 pass: 1
1746  1.0 pass: 1
1747  0.0 pass: 1
1748  8.0 pass: 1
1749  nan pass: 1
1750  0.0 pass: 1
1751  1.0 pass: 1
1752  nan pass: 1
1753  nan pass: 1
1754  0.0 pass: 1
1755  1756  1757  1758  1759  1760  1761  1762  1763  1764  1765  1766  1767  1768  1769  1770  3.0 pass: 1
1771  6.0 pass: 1
1772  2.0 pass: 1
1773  0.0 pass: 1
1774  1.0 pass: 1
1775  0.0 pass: 1
1776  3.0 pass: 1
1777  0.0 pass: 1
1778  2.0 pass: 1
1779  nan pass: 1
1780  35558.0 pass: 1
1781  62233.0 pass: 1
1782  10795.0 pass: 1
1783  9357.0 pass: 1
1784  35

2212  nan pass: 1
2213  0.0 pass: 1
2214  0.0 pass: 1
2215  7.0 pass: 1
2216  8.0 pass: 1
2217  5.0 pass: 1
2218  4.0 pass: 1
2219  0.0 pass: 1
2220  nan pass: 1
2221  0.0 pass: 1
2222  13.0 pass: 1
2223  5.0 pass: 1
2224  3.0 pass: 1
2225  19.0 pass: 1
2226  18.0 pass: 1
2227  24.0 pass: 1
2228  13.0 pass: 1
2229  25.0 pass: 1
2230  18.0 pass: 1
2231  1.0 pass: 1
2232  5.0 pass: 1
2233  15.0 pass: 1
2234  15.0 pass: 1
2235  1370.0 pass: 1
2236  8749.0 pass: 1
2237  8046.0 pass: 1
2238  4386.0 pass: 1
2239  4321.0 pass: 1
2240  863.0 pass: 1
2241  1129.0 pass: 1
2242  2801.0 pass: 1
2243  7641.0 pass: 1
2244  2981.0 pass: 1
2245  4248.0 pass: 1
2246  4740.0 pass: 1
2247  3903.0 pass: 1
2248  3849.0 pass: 1
2249  2064.0 pass: 1
2250  0.0 pass: 1
2251  0.0 pass: 1
2252  0.0 pass: 1
2253  0.0 pass: 1
2254  0.0 pass: 1
2255  0.0 pass: 1
2256  0.0 pass: 1
2257  0.0 pass: 1
2258  0.0 pass: 1
2259  0.0 pass: 1
2260  0.0 pass: 1
2261  nan pass: 1
2262  0.0 pass: 1
2263  nan pass: 1
2264  0.0 p

2686  131.0 pass: 1
2687  1204.0 pass: 1
2688  3802.0 pass: 1
2689  302.0 pass: 1
2690  41.0 pass: 1
2691  297.0 pass: 1
2692  267.0 pass: 1
2693  362.0 pass: 1
2694  22.0 pass: 1
2695  26.0 pass: 1
2696  256.0 pass: 1
2697  67.0 pass: 1
2698  nan pass: 1
2699  152.0 pass: 1
2700  1686.0 pass: 1
2701  2107.0 pass: 1
2702  51.0 pass: 1
2703  60.0 pass: 1
2704  79.0 pass: 1
2705  21.0 pass: 1
2706  33.0 pass: 1
2707  44.0 pass: 1
2708  0.0 pass: 1
2709  3.0 pass: 1
2710  35.0 pass: 1
2711  65.0 pass: 1
2712  139.0 pass: 1
2713  309.0 pass: 1
2714  16527.0 pass: 1
2715  2716  2717  2718  2719  2720  2721  2722  2723  2724  2725  2726  2727  2728  2729  2730  2731  2732  2733  2734  2735  2736  2737  2738  2739  2740  2741  2742  2743  2744  2745  2746  2747  2748  2749  2750  2751  2752  2753  2754  2755  2756  2757  2758  2759  2760  2761  2762  2763  2764  2765  2766  2767  2768  2769  2770  2771  2772  2773  2774  2775  676.0 pass: 1
2776  2813.0 pass: 1
2777  8523.0 pass: 1
2778  5616

3242  0.0 pass: 1
3243  0.0 pass: 1
3244  9696.0 pass: 1
3245  853.0 pass: 1
3246  0.0 pass: 1
3247  242.0 pass: 1
3248  212.0 pass: 1
3249  420.0 pass: 1
3250  31.0 pass: 1
3251  998.0 pass: 1
3252  304.0 pass: 1
3253  529.0 pass: 1
3254  1483.0 pass: 1


## Income composition of resources

In [43]:
icor = pd.read_csv("Data/New/Income_Composition_Of_Resources/ICOR_Internet.csv")

In [44]:
count = 0
icor_data = pd.DataFrame(columns=['Country', 'Year', 'Income_Composition_Of_Resources'])
for i in range(0, len(icor['Country'].unique())):
    for j in range(1, len(icor.columns)):
        #print(icor.loc[i,'Country'], end='')
        icor_data.at[count, 'Country'] = icor.loc[i,'Country']
        icor_data.at[count, 'Year'] = icor.columns[j]
        #print(icor.columns[j])
        icor_data.at[count, 'Income_Composition_Of_Resources'] = icor[icor['Country'] == icor['Country'][i]].T[1:].loc[icor.columns[j]].values[0]
        count+=1

In [45]:
icor_data['Year'] = icor_data['Year'].astype(np.int64)
icor_data['Income_Composition_Of_Resources'] = icor_data['Income_Composition_Of_Resources'].astype(np.float64)

In [46]:
icor_data.to_pickle("Data/New/Income_Composition_Of_Resources/ICOR_Internet.pkl")

In [47]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(icor_data[icor_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            icor_other_data = icor_data[icor_data['Country'] == main_data['Country'][val]][icor_data['Year'] == main_data['Year'][val]]['Income_Composition_Of_Resources'].values[0]
            print(icor_other_data, "pass: 1")
            main_data.at[val, 'Income_Composition_Of_Resources'] = icor_other_data
        except IndexError:
            pass
    else:
        try:
            icor_other_data = icor_data[icor_data['Country'].str.startswith(main_data['Country'][val])][icor_data['Year'] == main_data['Year'][val]]['Income_Composition_Of_Resources'].values[0]
            print(icor_other_data, "pass: 2")
            main_data.at[val, 'Income_Composition_Of_Resources'] = icor_other_data
        except IndexError:
            try:
                icor_other_data = icor_data[icor_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][icor_data['Year'] == main_data['Year'][val]]['Income_Composition_Of_Resources'].values[0]
                print(icor_other_data, "pass: 3")
                main_data.at[val, 'Income_Composition_Of_Resources'] = icor_other_data
            except IndexError:
                pass
            

0  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  

  if sys.path[0] == '':


33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187  188  189  190  191  192  193  194  195  196  197  198  199  200  201  202  203  204  205  206  207  208  209  210  211  212  213  214  215  216  217  218  219  220  221  222  223  224  225  226  227  228  229  230  231  232  233  234  235  236  237  238  239  240  241  242  243  244  245  24

1585  1586  1587  1588  1589  1590  1591  1592  1593  1594  1595  1596  1597  1598  1599  1600  1601  1602  1603  1604  1605  1606  1607  1608  1609  1610  1611  1612  1613  1614  1615  1616  1617  1618  1619  1620  1621  1622  1623  1624  1625  1626  1627  1628  1629  1630  1631  1632  1633  1634  1635  1636  1637  1638  1639  1640  1641  1642  1643  1644  1645  1646  1647  1648  1649  1650  1651  1652  1653  1654  1655  1656  1657  1658  1659  1660  1661  1662  1663  1664  1665  1666  1667  1668  1669  1670  1671  1672  1673  1674  1675  1676  1677  1678  1679  1680  1681  1682  1683  1684  1685  1686  1687  1688  1689  1690  1691  1692  1693  1694  1695  1696  1697  1698  1699  1700  1701  1702  1703  1704  1705  1706  1707  1708  1709  1710  1711  1712  1713  1714  1715  1716  1717  1718  1719  1720  1721  1722  1723  1724  1725  1726  1727  1728  1729  1730  1731  1732  1733  1734  1735  1736  1737  1738  1739  1740  1741  1742  1743  1744  1745  1746  1747  1748  1749  1750  1751

2959  2960  2961  2962  2963  2964  2965  2966  2967  2968  2969  2970  2971  2972  2973  2974  2975  2976  2977  2978  2979  2980  2981  2982  2983  2984  2985  2986  2987  2988  2989  2990  2991  2992  2993  2994  2995  2996  2997  2998  2999  3000  3001  3002  3003  3004  3005  3006  3007  3008  3009  3010  3011  3012  3013  3014  3015  3016  3017  3018  3019  3020  3021  3022  3023  3024  3025  3026  3027  3028  3029  3030  3031  3032  3033  3034  3035  3036  3037  3038  3039  3040  3041  3042  3043  3044  3045  3046  3047  3048  3049  3050  3051  3052  3053  3054  3055  3056  3057  3058  3059  3060  3061  3062  3063  3064  3065  3066  3067  3068  3069  3070  3071  3072  3073  3074  3075  3076  3077  3078  3079  3080  3081  3082  3083  3084  3085  3086  3087  3088  3089  3090  3091  3092  3093  3094  3095  3096  3097  3098  3099  3100  3101  3102  3103  3104  3105  3106  3107  3108  3109  3110  3111  3112  3113  3114  3115  3116  3117  3118  3119  3120  3121  3122  3123  3124  3125

In [48]:
icor_data[icor_data['Country'] == main_data['Country'][10]].values

array([], shape=(0, 3), dtype=object)

In [49]:
icor_data.dtypes

Country                             object
Year                                 int64
Income_Composition_Of_Resources    float64
dtype: object

## Diptheria

In [50]:
diptheria = pd.read_csv("Data/New/Diptheria/Dip_Internet.csv")

In [51]:
count = 0
diptheria_data = pd.DataFrame(columns=['Country', 'Year', 'Diphtheria'])
for i in range(0, len(diptheria['Country'].unique())):
    for j in range(1, len(diptheria.columns)):
        #print(diptheria.loc[i,'Country'], end='')
        diptheria_data.at[count, 'Country'] = diptheria.loc[i,'Country']
        diptheria_data.at[count, 'Year'] = diptheria.columns[j]
        #print(diptheria.columns[j])
        diptheria_data.at[count, 'Diphtheria'] = diptheria[diptheria['Country'] == diptheria['Country'][i]].T[1:].loc[diptheria.columns[j]].values[0]
        count+=1

In [52]:
diptheria_data['Year'] = diptheria_data['Year'].astype(np.int64)
diptheria_data['Diphtheria'] = diptheria_data['Diphtheria'].astype(np.float64)

In [53]:
diptheria_data.to_pickle("Data/New/Diptheria/Dip_Internet.pkl")

In [54]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(diptheria_data[diptheria_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            diptheria_other_data = diptheria_data[diptheria_data['Country'] == main_data['Country'][val]][diptheria_data['Year'] == main_data['Year'][val]]['Diphtheria'].values[0]
            print(diptheria_other_data, "pass: 1")
            main_data.at[val, 'Diphtheria'] = diptheria_other_data
        except IndexError:
            pass
    else:
        try:
            diptheria_other_data = diptheria_data[diptheria_data['Country'].str.startswith(main_data['Country'][val])][diptheria_data['Year'] == main_data['Year'][val]]['Diphtheria'].values[0]
            print(diptheria_other_data, "pass: 2")
            main_data.at[val, 'Diphtheria'] = diptheria_other_data
        except IndexError:
            try:
                diptheria_other_data = diptheria_data[diptheria_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][diptheria_data['Year'] == main_data['Year'][val]]['Diphtheria'].values[0]
                print(diptheria_other_data, "pass: 3")
                main_data.at[val, 'Diphtheria'] = diptheria_other_data
            except IndexError:
                pass
            

0  62.0 pass: 1
1  64.0 pass: 1
2  67.0 pass: 1
3  68.0 pass: 1
4  66.0 pass: 1
5  63.0 pass: 1
6  64.0 pass: 1
7  63.0 pass: 1
8  58.0 pass: 1
9  58.0 pass: 1
10  50.0 pass: 1
11  41.0 pass: 1
12  36.0 pass: 1
13  33.0 pass: 1
14  24.0 pass: 1
15  98.0 pass: 1
16  99.0 pass: 1
17  99.0 pass: 1
18  99.0 pass: 1
19  99.0 pass: 1
20  98.0 pass: 1
21  99.0 pass: 1
22  98.0 pass: 1
23  97.0 pass: 1
24  98.0 pass: 1
25  97.0 pass: 1
26  97.0 pass: 1
27  98.0 pass: 1
28  97.0 pass: 1
29  97.0 pass: 1
30  95.0 pass: 1
31  95.0 pass: 1
32  95.0 pass: 1
33  95.0 pass: 1
34  95.0 pass: 1
35  95.0 pass: 1
36  93.0 pass: 1
37  95.0 pass: 1
38  95.0 pass: 1
39  88.0 pass: 1
40  86.0 pass: 1
41  87.0 pass: 1
42  86.0 pass: 1
43  89.0 pass: 1
44  86.0 pass: 1
45  46  47  48  49  50  

  """
  if sys.path[0] == '':


51  52  53  54  55  56  57  58  59  60  97.0 pass: 1
61  96.0 pass: 1
62  99.0 pass: 1
63  99.0 pass: 1
64  99.0 pass: 1
65  99.0 pass: 1
66  99.0 pass: 1
67  96.0 pass: 1
68  93.0 pass: 1
69  98.0 pass: 1
70  99.0 pass: 1
71  99.0 pass: 1
72  97.0 pass: 1
73  96.0 pass: 1
74  98.0 pass: 1
75  55.0 pass: 1
76  54.0 pass: 1
77  54.0 pass: 1
78  51.0 pass: 1
79  60.0 pass: 1
80  44.0 pass: 1
81  54.0 pass: 1
82  58.0 pass: 1
83  22.0 pass: 1
84  28.0 pass: 1
85  30.0 pass: 1
86  31.0 pass: 1
87  35.0 pass: 1
88  32.0 pass: 1
89  23.0 pass: 1
90  99.0 pass: 1
91  99.0 pass: 1
92  98.0 pass: 1
93  99.0 pass: 1
94  98.0 pass: 1
95  99.0 pass: 1
96  99.0 pass: 1
97  99.0 pass: 1
98  99.0 pass: 1
99  99.0 pass: 1
100  97.0 pass: 1
101  99.0 pass: 1
102  98.0 pass: 1
103  97.0 pass: 1
104  95.0 pass: 1
105  94.0 pass: 1
106  94.0 pass: 1
107  91.0 pass: 1
108  91.0 pass: 1
109  94.0 pass: 1
110  94.0 pass: 1
111  93.0 pass: 1
112  91.0 pass: 1
113  91.0 pass: 1
114  98.0 pass: 1
115  98.0 pass

607  96.0 pass: 1
608  94.0 pass: 1
609  91.0 pass: 1
610  93.0 pass: 1
611  96.0 pass: 1
612  97.0 pass: 1
613  97.0 pass: 1
614  91.0 pass: 1
615  99.0 pass: 1
616  99.0 pass: 1
617  99.0 pass: 1
618  99.0 pass: 1
619  99.0 pass: 1
620  99.0 pass: 1
621  97.0 pass: 1
622  93.0 pass: 1
623  93.0 pass: 1
624  87.0 pass: 1
625  87.0 pass: 1
626  86.0 pass: 1
627  86.0 pass: 1
628  86.0 pass: 1
629  85.0 pass: 1
630  90.0 pass: 1
631  91.0 pass: 1
632  91.0 pass: 1
633  85.0 pass: 1
634  88.0 pass: 1
635  92.0 pass: 1
636  92.0 pass: 1
637  93.0 pass: 1
638  93.0 pass: 1
639  93.0 pass: 1
640  89.0 pass: 1
641  92.0 pass: 1
642  81.0 pass: 1
643  80.0 pass: 1
644  79.0 pass: 1
645  87.0 pass: 1
646  87.0 pass: 1
647  86.0 pass: 1
648  83.0 pass: 1
649  74.0 pass: 1
650  83.0 pass: 1
651  81.0 pass: 1
652  75.0 pass: 1
653  69.0 pass: 1
654  68.0 pass: 1
655  76.0 pass: 1
656  80.0 pass: 1
657  89.0 pass: 1
658  70.0 pass: 1
659  70.0 pass: 1
660  90.0 pass: 3
661  85.0 pass: 3
662  85.0 

1155  1156  1157  1158  1159  1160  1161  1162  1163  1164  1165  1166  1167  1168  1169  1170  97.0 pass: 1
1171  97.0 pass: 1
1172  97.0 pass: 1
1173  95.0 pass: 1
1174  97.0 pass: 1
1175  99.0 pass: 1
1176  99.0 pass: 1
1177  96.0 pass: 1
1178  91.0 pass: 1
1179  99.0 pass: 1
1180  83.0 pass: 1
1181  97.0 pass: 1
1182  98.0 pass: 1
1183  96.0 pass: 1
1184  97.0 pass: 1
1185  1186  1187  1188  1189  1190  1191  1192  1193  1194  1195  1196  1197  1198  1199  1200  73.0 pass: 1
1201  85.0 pass: 1
1202  96.0 pass: 1
1203  88.0 pass: 1
1204  94.0 pass: 1
1205  92.0 pass: 1
1206  95.0 pass: 1
1207  85.0 pass: 1
1208  89.0 pass: 1
1209  87.0 pass: 1
1210  87.0 pass: 1
1211  84.0 pass: 1
1212  82.0 pass: 1
1213  77.0 pass: 1
1214  81.0 pass: 1
1215  34.0 pass: 1
1216  44.0 pass: 1
1217  53.0 pass: 1
1218  63.0 pass: 1
1219  64.0 pass: 1
1220  57.0 pass: 1
1221  60.0 pass: 1
1222  63.0 pass: 1
1223  57.0 pass: 1
1224  59.0 pass: 1
1225  60.0 pass: 1
1226  57.0 pass: 1
1227  53.0 pass: 1
122

1684  70.0 pass: 1
1685  81.0 pass: 1
1686  75.0 pass: 1
1687  65.0 pass: 1
1688  60.0 pass: 1
1689  60.0 pass: 1
1690  31.0 pass: 1
1691  35.0 pass: 1
1692  39.0 pass: 1
1693  42.0 pass: 1
1694  46.0 pass: 1
1695  94.0 pass: 1
1696  96.0 pass: 1
1697  98.0 pass: 1
1698  98.0 pass: 1
1699  98.0 pass: 1
1700  98.0 pass: 1
1701  98.0 pass: 1
1702  98.0 pass: 1
1703  98.0 pass: 1
1704  98.0 pass: 1
1705  97.0 pass: 1
1706  95.0 pass: 1
1707  93.0 pass: 1
1708  94.0 pass: 1
1709  94.0 pass: 1
1710  1711  1712  1713  1714  1715  1716  1717  1718  1719  1720  1721  1722  1723  1724  1725  93.0 pass: 1
1726  93.0 pass: 1
1727  93.0 pass: 1
1728  92.0 pass: 1
1729  95.0 pass: 1
1730  98.0 pass: 1
1731  96.0 pass: 1
1732  95.0 pass: 1
1733  94.0 pass: 1
1734  94.0 pass: 1
1735  94.0 pass: 1
1736  94.0 pass: 1
1737  95.0 pass: 1
1738  95.0 pass: 1
1739  94.0 pass: 1
1740  99.0 pass: 1
1741  99.0 pass: 1
1742  99.0 pass: 1
1743  99.0 pass: 1
1744  99.0 pass: 1
1745  99.0 pass: 1
1746  99.0 pass: 

2175  2176  2177  2178  2179  2180  2181  2182  2183  2184  2185  2186  2187  2188  2189  2190  2191  2192  2193  2194  2195  2196  2197  2198  2199  2200  2201  2202  2203  2204  2205  93.0 pass: 1
2206  94.0 pass: 1
2207  95.0 pass: 1
2208  94.0 pass: 1
2209  93.0 pass: 1
2210  94.0 pass: 1
2211  94.0 pass: 1
2212  93.0 pass: 1
2213  94.0 pass: 1
2214  91.0 pass: 1
2215  92.0 pass: 1
2216  92.0 pass: 1
2217  93.0 pass: 1
2218  91.0 pass: 1
2219  90.0 pass: 1
2220  99.0 pass: 1
2221  98.0 pass: 1
2222  98.0 pass: 1
2223  99.0 pass: 1
2224  99.0 pass: 1
2225  98.0 pass: 1
2226  99.0 pass: 1
2227  99.0 pass: 1
2228  98.0 pass: 1
2229  99.0 pass: 1
2230  99.0 pass: 1
2231  99.0 pass: 1
2232  99.0 pass: 1
2233  99.0 pass: 1
2234  99.0 pass: 1
2235  69.0 pass: 1
2236  65.0 pass: 1
2237  64.0 pass: 1
2238  63.0 pass: 1
2239  52.0 pass: 1
2240  52.0 pass: 1
2241  53.0 pass: 1
2242  54.0 pass: 1
2243  59.0 pass: 1
2244  63.0 pass: 1
2245  68.0 pass: 1
2246  66.0 pass: 1
2247  63.0 pass: 1
224

2677  nan pass: 1
2678  nan pass: 1
2679  nan pass: 1
2680  nan pass: 1
2681  nan pass: 1
2682  nan pass: 1
2683  nan pass: 1
2684  nan pass: 1
2685  97.0 pass: 1
2686  96.0 pass: 1
2687  97.0 pass: 1
2688  97.0 pass: 1
2689  97.0 pass: 1
2690  96.0 pass: 1
2691  97.0 pass: 1
2692  96.0 pass: 1
2693  98.0 pass: 1
2694  96.0 pass: 1
2695  97.0 pass: 1
2696  98.0 pass: 1
2697  98.0 pass: 1
2698  96.0 pass: 1
2699  95.0 pass: 1
2700  99.0 pass: 1
2701  99.0 pass: 1
2702  99.0 pass: 1
2703  99.0 pass: 1
2704  99.0 pass: 1
2705  97.0 pass: 1
2706  98.0 pass: 1
2707  98.0 pass: 1
2708  98.0 pass: 1
2709  99.0 pass: 1
2710  97.0 pass: 1
2711  99.0 pass: 1
2712  98.0 pass: 1
2713  98.0 pass: 1
2714  99.0 pass: 1
2715  2716  2717  2718  2719  2720  2721  2722  2723  2724  2725  2726  2727  2728  2729  2730  2731  2732  2733  2734  2735  2736  2737  2738  2739  2740  2741  2742  2743  2744  2745  2746  2747  2748  2749  2750  2751  2752  2753  2754  2755  2756  2757  2758  2759  2760  2761  2762

In [55]:
#main_data[['Country','Year','Diphtheria']]
#main_data.iloc[[3151]]

## Thinness (10-19 Years)

In [56]:
thin_10_19 = pd.read_csv("Data/New/Thinness_10_19/thin_9_10_internet.csv")

In [57]:
# Rename empty column name
thin_10_19.rename(columns = {thin_10_19.columns[0]: "Country"}, inplace=True)

In [58]:
count = 0
thin_10_19_data = pd.DataFrame(columns=['Country', 'Year', 'Thinness_10-19_years'])
for i in range(1, len(thin_10_19['Country'].unique())):
    j = 1
    while j < len(thin_10_19.columns):
        #print(thin_10_19.loc[i,'Country'], end=' ')
        thin_10_19_data.at[count, 'Country'] = thin_10_19.loc[i,'Country']
        thin_10_19_data.at[count, 'Year'] = thin_10_19.columns[j]
        #print(thin_10_19.columns[j], j)
        thin_10_19_data.at[count, 'Thinness_10-19_years'] = thin_10_19[thin_10_19['Country'] == thin_10_19['Country'][i]].T[1:].loc[thin_10_19.columns[j]].values[0].split(' [')[0]
        count+=1
        j += 3
        

In [59]:
#thin_10_19_data[thin_10_19_data['Country'] == "Uganda"]

In [60]:
thin_10_19_data['Year'] = thin_10_19_data['Year'].astype(np.int64)
#thin_10_19_data['Thinness_10-19_years'] = thin_10_19_data['Thinness_10-19_years'].astype(np.float64)

In [61]:
thin_10_19_data.to_pickle("Data/New/Thinness_10_19/Thinness_10_19.pkl")

In [62]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(thin_10_19_data[thin_10_19_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            thin_10_19_other_data = thin_10_19_data[thin_10_19_data['Country'] == main_data['Country'][val]][thin_10_19_data['Year'] == main_data['Year'][val]]['Thinness_10-19_years'].values[0]
            print(thin_10_19_other_data, "pass: 1")
            main_data.at[val, 'Thinness_10-19_years'] = thin_10_19_other_data
        except IndexError:
            pass
    else:
        try:
            thin_10_19_other_data = thin_10_19_data[thin_10_19_data['Country'].str.startswith(main_data['Country'][val])][thin_10_19_data['Year'] == main_data['Year'][val]]['Thinness_10-19_years'].values[0]
            print(thin_10_19_other_data, "pass: 2")
            main_data.at[val, 'Thinness_10-19_years'] = thin_10_19_other_data
        except IndexError:
            try:
                thin_10_19_other_data = thin_10_19_data[thin_10_19_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][thin_10_19_data['Year'] == main_data['Year'][val]]['Thinness_10-19_years'].values[0]
                print(thin_10_19_other_data, "pass: 3")
                main_data.at[val, 'Thinness_10-19_years'] = thin_10_19_other_data
            except IndexError:
                pass

0  17.7 pass: 1
1  17.9 pass: 1
2  18.2 pass: 1
3  18.4 pass: 1
4  18.6 pass: 1
5  18.8 pass: 1
6  19.0 pass: 1
7  19.2 pass: 1
8  19.3 pass: 1
9  19.5 pass: 1
10  19.7 pass: 1
11  19.9 pass: 1
12  20.1 pass: 1
13  20.3 pass: 1
14  20.5 pass: 1
15  1.3 pass: 1
16  1.3 pass: 1
17  1.4 pass: 1
18  1.4 pass: 1
19  1.5 pass: 1
20  1.6 pass: 1
21  1.6 pass: 1
22  1.7 pass: 1
23  1.8 pass: 1
24  1.8 pass: 1
25  1.9 pass: 1
26  2.0 pass: 1
27  2.1 pass: 1
28  2.1 pass: 1
29  2.2 pass: 1
30  5.9 pass: 1
31  5.9 pass: 1
32  5.9 pass: 1
33  5.9 pass: 1
34  6.0 pass: 1
35  6.0 pass: 1
36  6.0 pass: 1
37  6.1 pass: 1
38  6.1 pass: 1
39  6.2 pass: 1
40  6.3 pass: 1
41  6.3 pass: 1
42  6.4 pass: 1
43  6.5 pass: 1
44  6.6 pass: 1
45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  0.8 pass: 1
61  

  """
  if sys.path[0] == '':


0.8 pass: 1
62  0.8 pass: 1
63  0.7 pass: 1
64  0.7 pass: 1
65  0.7 pass: 1
66  0.7 pass: 1
67  0.7 pass: 1
68  0.7 pass: 1
69  0.7 pass: 1
70  0.7 pass: 1
71  0.7 pass: 1
72  0.7 pass: 1
73  0.7 pass: 1
74  0.7 pass: 1
75  8.6 pass: 1
76  8.8 pass: 1
77  8.9 pass: 1
78  9.1 pass: 1
79  9.3 pass: 1
80  9.5 pass: 1
81  9.6 pass: 1
82  9.8 pass: 1
83  10.0 pass: 1
84  10.2 pass: 1
85  10.4 pass: 1
86  10.5 pass: 1
87  10.7 pass: 1
88  10.9 pass: 1
89  11.1 pass: 1
90  3.3 pass: 1
91  3.3 pass: 1
92  3.3 pass: 1
93  3.3 pass: 1
94  3.4 pass: 1
95  3.4 pass: 1
96  3.4 pass: 1
97  3.4 pass: 1
98  3.5 pass: 1
99  3.5 pass: 1
100  3.5 pass: 1
101  3.6 pass: 1
102  3.6 pass: 1
103  3.7 pass: 1
104  3.7 pass: 1
105  1.0 pass: 1
106  1.0 pass: 1
107  1.0 pass: 1
108  1.0 pass: 1
109  1.0 pass: 1
110  1.0 pass: 1
111  1.1 pass: 1
112  1.1 pass: 1
113  1.1 pass: 1
114  1.1 pass: 1
115  1.2 pass: 1
116  1.2 pass: 1
117  1.2 pass: 1
118  1.2 pass: 1
119  1.3 pass: 1
120  2.1 pass: 1
121  2.0 pass: 1

609  0.9 pass: 1
610  1.0 pass: 1
611  1.0 pass: 1
612  1.0 pass: 1
613  1.1 pass: 1
614  1.1 pass: 1
615  3.8 pass: 1
616  3.9 pass: 1
617  4.1 pass: 1
618  4.2 pass: 1
619  4.4 pass: 1
620  4.5 pass: 1
621  4.7 pass: 1
622  4.8 pass: 1
623  5.0 pass: 1
624  5.1 pass: 1
625  5.3 pass: 1
626  5.5 pass: 1
627  5.7 pass: 1
628  5.9 pass: 1
629  6.0 pass: 1
630  2.1 pass: 1
631  2.1 pass: 1
632  2.2 pass: 1
633  2.2 pass: 1
634  2.2 pass: 1
635  2.3 pass: 1
636  2.3 pass: 1
637  2.3 pass: 1
638  2.4 pass: 1
639  2.4 pass: 1
640  2.5 pass: 1
641  2.5 pass: 1
642  2.5 pass: 1
643  2.6 pass: 1
644  2.6 pass: 1
645  6.8 pass: 1
646  6.9 pass: 1
647  7.0 pass: 1
648  7.1 pass: 1
649  7.2 pass: 1
650  7.2 pass: 1
651  7.3 pass: 1
652  7.4 pass: 1
653  7.5 pass: 1
654  7.5 pass: 1
655  7.6 pass: 1
656  7.7 pass: 1
657  7.8 pass: 1
658  7.9 pass: 1
659  8.0 pass: 1
660  7.7 pass: 3
661  7.9 pass: 3
662  8.0 pass: 3
663  8.1 pass: 3
664  8.3 pass: 3
665  8.4 pass: 3
666  8.5 pass: 3
667  8.7 pass:

1174  3.8 pass: 1
1175  3.8 pass: 1
1176  3.9 pass: 1
1177  3.9 pass: 1
1178  3.9 pass: 1
1179  4.0 pass: 1
1180  4.0 pass: 1
1181  4.1 pass: 1
1182  4.2 pass: 1
1183  4.3 pass: 1
1184  4.4 pass: 1
1185  1186  1187  1188  1189  1190  1191  1192  1193  1194  1195  1196  1197  1198  1199  1200  1.2 pass: 1
1201  1.2 pass: 1
1202  1.2 pass: 1
1203  1.3 pass: 1
1204  1.3 pass: 1
1205  1.3 pass: 1
1206  1.3 pass: 1
1207  1.4 pass: 1
1208  1.4 pass: 1
1209  1.4 pass: 1
1210  1.4 pass: 1
1211  1.5 pass: 1
1212  1.5 pass: 1
1213  1.6 pass: 1
1214  1.6 pass: 1
1215  7.7 pass: 1
1216  7.8 pass: 1
1217  8.0 pass: 1
1218  8.2 pass: 1
1219  8.4 pass: 1
1220  8.6 pass: 1
1221  8.9 pass: 1
1222  9.1 pass: 1
1223  9.3 pass: 1
1224  9.5 pass: 1
1225  9.7 pass: 1
1226  9.9 pass: 1
1227  10.1 pass: 1
1228  10.3 pass: 1
1229  10.5 pass: 1
1230  7.5 pass: 1
1231  7.7 pass: 1
1232  7.8 pass: 1
1233  8.0 pass: 1
1234  8.3 pass: 1
1235  8.5 pass: 1
1236  8.7 pass: 1
1237  8.9 pass: 1
1238  9.2 pass: 1
1239  9

1752  1.0 pass: 1
1753  1.0 pass: 1
1754  1.0 pass: 1
1755  1756  1757  1758  1759  1760  1761  1762  1763  1764  1765  1766  1767  1768  1769  1770  7.3 pass: 1
1771  7.4 pass: 1
1772  7.5 pass: 1
1773  7.6 pass: 1
1774  7.7 pass: 1
1775  7.8 pass: 1
1776  7.9 pass: 1
1777  8.0 pass: 1
1778  8.1 pass: 1
1779  8.2 pass: 1
1780  8.3 pass: 1
1781  8.5 pass: 1
1782  8.6 pass: 1
1783  8.7 pass: 1
1784  8.8 pass: 1
1785  6.5 pass: 1
1786  6.6 pass: 1
1787  6.7 pass: 1
1788  6.8 pass: 1
1789  6.9 pass: 1
1790  7.0 pass: 1
1791  7.1 pass: 1
1792  7.3 pass: 1
1793  7.4 pass: 1
1794  7.5 pass: 1
1795  7.6 pass: 1
1796  7.7 pass: 1
1797  7.9 pass: 1
1798  8.0 pass: 1
1799  8.1 pass: 1
1800  7.8 pass: 1
1801  7.9 pass: 1
1802  8.0 pass: 1
1803  8.2 pass: 1
1804  8.3 pass: 1
1805  8.5 pass: 1
1806  8.7 pass: 1
1807  8.8 pass: 1
1808  9.0 pass: 1
1809  9.2 pass: 1
1810  9.3 pass: 1
1811  9.5 pass: 1
1812  9.7 pass: 1
1813  9.8 pass: 1
1814  9.9 pass: 1
1815  13.7 pass: 1
1816  13.8 pass: 1
1817  13

2293  1.6 pass: 1
2294  1.6 pass: 1
2295  2.0 pass: 1
2296  2.0 pass: 1
2297  2.1 pass: 1
2298  2.1 pass: 1
2299  2.1 pass: 1
2300  2.1 pass: 1
2301  2.2 pass: 1
2302  2.2 pass: 1
2303  2.3 pass: 1
2304  2.3 pass: 1
2305  2.4 pass: 1
2306  2.4 pass: 1
2307  2.5 pass: 1
2308  2.5 pass: 1
2309  2.6 pass: 1
2310  1.1 pass: 1
2311  1.1 pass: 1
2312  1.1 pass: 1
2313  1.1 pass: 1
2314  1.2 pass: 1
2315  1.2 pass: 1
2316  1.2 pass: 1
2317  1.2 pass: 1
2318  1.3 pass: 1
2319  1.3 pass: 1
2320  1.3 pass: 1
2321  1.4 pass: 1
2322  1.4 pass: 1
2323  1.4 pass: 1
2324  1.5 pass: 1
2325  10.0 pass: 1
2326  10.0 pass: 1
2327  10.0 pass: 1
2328  10.0 pass: 1
2329  10.0 pass: 1
2330  10.0 pass: 1
2331  10.0 pass: 1
2332  10.0 pass: 1
2333  10.0 pass: 1
2334  10.0 pass: 1
2335  10.0 pass: 1
2336  10.0 pass: 1
2337  10.0 pass: 1
2338  10.0 pass: 1
2339  10.0 pass: 1
2340  2.0 pass: 1
2341  2.0 pass: 1
2342  2.0 pass: 1
2343  2.1 pass: 1
2344  2.1 pass: 1
2345  2.2 pass: 1
2346  2.2 pass: 1
2347  2.3 pas

2884  8.3 pass: 1
2885  8.5 pass: 1
2886  8.6 pass: 1
2887  8.7 pass: 1
2888  8.9 pass: 1
2889  9.0 pass: 1
2890  9.1 pass: 1
2891  9.2 pass: 1
2892  9.3 pass: 1
2893  9.4 pass: 1
2894  9.4 pass: 1
2895  11.1 pass: 1
2896  11.2 pass: 1
2897  11.3 pass: 1
2898  11.5 pass: 1
2899  11.6 pass: 1
2900  11.7 pass: 1
2901  11.8 pass: 1
2902  11.9 pass: 1
2903  12.0 pass: 1
2904  12.0 pass: 1
2905  12.1 pass: 1
2906  12.1 pass: 1
2907  12.1 pass: 1
2908  12.2 pass: 1
2909  12.2 pass: 1
2910  6.8 pass: 1
2911  7.0 pass: 1
2912  7.1 pass: 1
2913  7.3 pass: 1
2914  7.5 pass: 1
2915  7.7 pass: 1
2916  7.9 pass: 1
2917  8.1 pass: 1
2918  8.4 pass: 1
2919  8.6 pass: 1
2920  8.8 pass: 1
2921  9.0 pass: 1
2922  9.2 pass: 1
2923  9.4 pass: 1
2924  9.6 pass: 1
2925  0.1 pass: 1
2926  0.1 pass: 1
2927  0.1 pass: 1
2928  0.1 pass: 1
2929  0.1 pass: 1
2930  0.1 pass: 1
2931  0.1 pass: 1
2932  0.1 pass: 1
2933  0.1 pass: 1
2934  0.1 pass: 1
2935  0.1 pass: 1
2936  0.1 pass: 1
2937  0.1 pass: 1
2938  0.1 pas

In [63]:
main_data[['Country','Year','Thinness_10-19_years']].isnull().sum()

Country                   0
Year                      0
Thinness_10-19_years    585
dtype: int64

In [64]:
thin_5_9 = pd.read_csv("Data/New/Thinness_5_9/thin_5_9_internet.csv")

In [65]:
# Rename empty column name
thin_5_9.rename(columns = {thin_5_9.columns[0]: "Country"}, inplace=True)

In [66]:
count = 0
thin_5_9_data = pd.DataFrame(columns=['Country', 'Year', 'Thinness_5-9_years'])
for i in range(1, len(thin_5_9['Country'].unique())):
    j = 1
    while j < len(thin_5_9.columns):
        #print(thin_5_9.loc[i,'Country'], end=' ')
        thin_5_9_data.at[count, 'Country'] = thin_5_9.loc[i,'Country']
        thin_5_9_data.at[count, 'Year'] = thin_5_9.columns[j]
        #print(thin_5_9.columns[j], j)
        thin_5_9_data.at[count, 'Thinness_5-9_years'] = thin_5_9[thin_5_9['Country'] == thin_5_9['Country'][i]].T[1:].loc[thin_5_9.columns[j]].values[0].split(' [')[0]
        count+=1
        j += 3
        

In [67]:
thin_5_9_data['Year'] = thin_5_9_data['Year'].astype(np.int64)
#thin_5_9_data['Thinness_5-9_years'] = thin_5_9_data['Thinness_5-9_years'].astype(np.float64)

In [68]:
thin_5_9_data.to_pickle("Data/New/Thinness_5_9/thin_5_9_internet.pkl")

In [69]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(thin_5_9_data[thin_5_9_data['Country'] == main_data['Country'][val]].values) != 0:
        try:
            thin_5_9_other_data = thin_5_9_data[thin_5_9_data['Country'] == main_data['Country'][val]][thin_5_9_data['Year'] == main_data['Year'][val]]['Thinness_5-9_years'].values[0]
            print(thin_5_9_other_data, "pass: 1")
            main_data.at[val, 'Thinness_5-9_years'] = thin_5_9_other_data
        except IndexError:
            pass
    else:
        try:
            thin_5_9_other_data = thin_5_9_data[thin_5_9_data['Country'].str.startswith(main_data['Country'][val])][thin_5_9_data['Year'] == main_data['Year'][val]]['Thinness_5-9_years'].values[0]
            print(thin_5_9_other_data, "pass: 2")
            main_data.at[val, 'Thinness_5-9_years'] = thin_5_9_other_data
        except IndexError:
            try:
                thin_5_9_other_data = thin_5_9_data[thin_5_9_data['Country'].str.startswith(main_data['Country'][val].split(',')[0])][thin_5_9_data['Year'] == main_data['Year'][val]]['Thinness_5-9_years'].values[0]
                print(thin_5_9_other_data, "pass: 3")
                main_data.at[val, 'Thinness_5-9_years'] = thin_5_9_other_data
            except IndexError:
                pass

0  17.7 pass: 1
1  18.0 pass: 1
2  18.2 pass: 1
3  18.4 pass: 1
4  18.7 pass: 1
5  18.9 pass: 1
6  19.1 pass: 1
7  19.3 pass: 1
8  19.5 pass: 1
9  19.7 pass: 1
10  19.9 pass: 1
11  20.2 pass: 1
12  20.4 pass: 1
13  20.5 pass: 1
14  20.7 pass: 1
15  1.4 pass: 1
16  1.4 pass: 1
17  1.5 pass: 1
18  1.5 pass: 1
19  1.6 pass: 1
20  1.6 pass: 1
21  1.7 pass: 1
22  1.8 pass: 1
23  1.8 pass: 1
24  1.9 pass: 1
25  2.0 pass: 1
26  2.1 pass: 1
27  2.1 pass: 1
28  2.2 pass: 1
29  2.3 pass: 1
30  5.8 pass: 1
31  5.8 pass: 1
32  5.8 pass: 1
33  5.8 pass: 1
34  5.9 pass: 1
35  5.9 pass: 1
36  5.9 pass: 1
37  6.0 pass: 1
38  6.0 pass: 1
39  6.1 pass: 1
40  6.1 pass: 1
41  6.2 pass: 1
42  6.3 pass: 1
43  

  """


6.4 pass: 1
44  6.5 pass: 1
45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  0.7 pass: 1
61  0.7 pass: 1
62  0.7 pass: 1
63  0.7 pass: 1
64  0.7 pass: 1
65  0.7 pass: 1
66  0.7 pass: 1
67  0.7 pass: 1
68  0.7 pass: 1
69  0.7 pass: 1
70  0.7 pass: 1
71  0.7 pass: 1
72  0.7 pass: 1
73  0.7 pass: 1
74  0.7 pass: 1
75  8.5 pass: 1
76  8.6 pass: 1
77  

  if sys.path[0] == '':


8.8 pass: 1
78  9.0 pass: 1
79  9.2 pass: 1
80  9.4 pass: 1
81  9.6 pass: 1
82  9.7 pass: 1
83  9.9 pass: 1
84  10.1 pass: 1
85  10.3 pass: 1
86  10.5 pass: 1
87  10.7 pass: 1
88  10.9 pass: 1
89  11.0 pass: 1
90  3.3 pass: 1
91  3.3 pass: 1
92  3.3 pass: 1
93  3.3 pass: 1
94  3.3 pass: 1
95  3.3 pass: 1
96  3.3 pass: 1
97  3.4 pass: 1
98  3.4 pass: 1
99  3.4 pass: 1
100  3.5 pass: 1
101  3.5 pass: 1
102  3.5 pass: 1
103  3.6 pass: 1
104  3.6 pass: 1
105  0.9 pass: 1
106  0.9 pass: 1
107  0.9 pass: 1
108  0.9 pass: 1
109  0.9 pass: 1
110  0.9 pass: 1
111  0.9 pass: 1
112  0.9 pass: 1
113  1.0 pass: 1
114  1.0 pass: 1
115  1.0 pass: 1
116  1.0 pass: 1
117  1.1 pass: 1
118  1.1 pass: 1
119  1.1 pass: 1
120  2.1 pass: 1
121  2.1 pass: 1
122  2.1 pass: 1
123  2.1 pass: 1
124  2.1 pass: 1
125  2.1 pass: 1
126  2.1 pass: 1
127  2.1 pass: 1
128  2.1 pass: 1
129  2.1 pass: 1
130  2.1 pass: 1
131  2.1 pass: 1
132  2.1 pass: 1
133  2.2 pass: 1
134  2.2 pass: 1
135  136  137  138  139  140  141  

666  8.2 pass: 3
667  8.3 pass: 3
668  8.5 pass: 3
669  8.6 pass: 3
670  8.8 pass: 3
671  8.9 pass: 3
672  9.0 pass: 3
673  9.2 pass: 3
674  9.3 pass: 3
675  7.4 pass: 3
676  7.5 pass: 3
677  7.6 pass: 3
678  7.7 pass: 3
679  7.9 pass: 3
680  8.0 pass: 3
681  8.2 pass: 3
682  8.3 pass: 3
683  8.5 pass: 3
684  8.6 pass: 3
685  8.8 pass: 3
686  8.9 pass: 3
687  9.0 pass: 3
688  9.2 pass: 3
689  9.3 pass: 3
690  1.7 pass: 1
691  1.7 pass: 1
692  1.7 pass: 1
693  1.8 pass: 1
694  1.8 pass: 1
695  1.8 pass: 1
696  1.9 pass: 1
697  1.9 pass: 1
698  2.0 pass: 1
699  2.0 pass: 1
700  2.1 pass: 1
701  2.2 pass: 1
702  2.2 pass: 1
703  2.3 pass: 1
704  2.4 pass: 1
705  706  707  708  709  710  711  712  713  714  715  716  717  718  719  720  1.5 pass: 1
721  1.5 pass: 1
722  1.5 pass: 1
723  1.6 pass: 1
724  1.6 pass: 1
725  1.7 pass: 1
726  1.7 pass: 1
727  1.8 pass: 1
728  1.8 pass: 1
729  1.9 pass: 1
730  1.9 pass: 1
731  2.0 pass: 1
732  2.0 pass: 1
733  2.1 pass: 1
734  2.1 pass: 1
735  3.

1288  2.7 pass: 1
1289  2.8 pass: 1
1290  1291  1292  1293  1294  1295  1296  1297  1298  1299  1300  1301  1302  1303  1304  1305  1.7 pass: 1
1306  1.7 pass: 1
1307  1.7 pass: 1
1308  1.8 pass: 1
1309  1.8 pass: 1
1310  1.9 pass: 1
1311  1.9 pass: 1
1312  2.0 pass: 1
1313  2.0 pass: 1
1314  2.1 pass: 1
1315  2.1 pass: 1
1316  2.2 pass: 1
1317  2.2 pass: 1
1318  2.3 pass: 1
1319  2.3 pass: 1
1320  0.9 pass: 1
1321  0.9 pass: 1
1322  0.9 pass: 1
1323  0.9 pass: 1
1324  0.9 pass: 1
1325  0.9 pass: 1
1326  0.9 pass: 1
1327  0.9 pass: 1
1328  0.9 pass: 1
1329  0.9 pass: 1
1330  0.9 pass: 1
1331  0.9 pass: 1
1332  0.9 pass: 1
1333  0.9 pass: 1
1334  0.9 pass: 1
1335  27.5 pass: 1
1336  27.6 pass: 1
1337  27.7 pass: 1
1338  27.8 pass: 1
1339  27.8 pass: 1
1340  27.9 pass: 1
1341  28.0 pass: 1
1342  28.0 pass: 1
1343  28.1 pass: 1
1344  28.2 pass: 1
1345  28.3 pass: 1
1346  28.4 pass: 1
1347  28.5 pass: 1
1348  28.6 pass: 1
1349  28.8 pass: 1
1350  10.3 pass: 1
1351  10.4 pass: 1
1352  10.5 

1846  0.7 pass: 1
1847  0.7 pass: 1
1848  0.7 pass: 1
1849  0.7 pass: 1
1850  0.7 pass: 1
1851  0.7 pass: 1
1852  0.7 pass: 1
1853  0.7 pass: 1
1854  0.7 pass: 1
1855  0.7 pass: 1
1856  0.7 pass: 1
1857  0.7 pass: 1
1858  0.7 pass: 1
1859  0.7 pass: 1
1860  0.1 pass: 1
1861  0.1 pass: 1
1862  0.1 pass: 1
1863  0.1 pass: 1
1864  0.1 pass: 1
1865  0.1 pass: 1
1866  0.1 pass: 1
1867  0.1 pass: 1
1868  0.1 pass: 1
1869  0.1 pass: 1
1870  0.1 pass: 1
1871  0.1 pass: 1
1872  0.2 pass: 1
1873  0.2 pass: 1
1874  0.2 pass: 1
1875  7.9 pass: 1
1876  8.1 pass: 1
1877  8.3 pass: 1
1878  8.5 pass: 1
1879  8.7 pass: 1
1880  8.9 pass: 1
1881  9.1 pass: 1
1882  9.4 pass: 1
1883  9.6 pass: 1
1884  9.8 pass: 1
1885  10.1 pass: 1
1886  10.3 pass: 1
1887  10.5 pass: 1
1888  10.7 pass: 1
1889  11.0 pass: 1
1890  7.0 pass: 1
1891  7.0 pass: 1
1892  7.1 pass: 1
1893  7.2 pass: 1
1894  7.3 pass: 1
1895  7.3 pass: 1
1896  7.4 pass: 1
1897  7.5 pass: 1
1898  7.6 pass: 1
1899  7.7 pass: 1
1900  7.8 pass: 1
1901 

2354  2.8 pass: 1
2355  0.5 pass: 1
2356  0.5 pass: 1
2357  0.5 pass: 1
2358  0.5 pass: 1
2359  0.5 pass: 1
2360  0.5 pass: 1
2361  0.5 pass: 1
2362  0.5 pass: 1
2363  0.6 pass: 1
2364  0.6 pass: 1
2365  0.6 pass: 1
2366  0.6 pass: 1
2367  0.6 pass: 1
2368  0.6 pass: 1
2369  0.6 pass: 1
2370  2371  2372  2373  2374  2375  2376  2377  2378  2379  2380  2381  2382  2383  2384  2385  4.7 pass: 1
2386  4.7 pass: 1
2387  4.6 pass: 1
2388  4.6 pass: 1
2389  4.6 pass: 1
2390  4.5 pass: 1
2391  4.5 pass: 1
2392  4.5 pass: 1
2393  4.5 pass: 1
2394  4.5 pass: 1
2395  4.5 pass: 1
2396  4.5 pass: 1
2397  4.5 pass: 1
2398  4.5 pass: 1
2399  4.5 pass: 1
2400  2.9 pass: 1
2401  3.0 pass: 1
2402  3.1 pass: 1
2403  3.2 pass: 1
2404  3.3 pass: 1
2405  3.5 pass: 1
2406  3.6 pass: 1
2407  3.7 pass: 1
2408  3.8 pass: 1
2409  3.9 pass: 1
2410  4.0 pass: 1
2411  4.1 pass: 1
2412  4.2 pass: 1
2413  4.3 pass: 1
2414  4.3 pass: 1
2415  2.3 pass: 1
2416  2.4 pass: 1
2417  2.4 pass: 1
2418  2.5 pass: 1
2419  2.5 

2909  12.3 pass: 1
2910  6.6 pass: 1
2911  6.8 pass: 1
2912  7.0 pass: 1
2913  7.2 pass: 1
2914  7.4 pass: 1
2915  7.6 pass: 1
2916  7.8 pass: 1
2917  8.0 pass: 1
2918  8.3 pass: 1
2919  8.5 pass: 1
2920  8.7 pass: 1
2921  8.9 pass: 1
2922  9.1 pass: 1
2923  9.3 pass: 1
2924  9.5 pass: 1
2925  0.1 pass: 1
2926  0.1 pass: 1
2927  0.1 pass: 1
2928  0.1 pass: 1
2929  0.1 pass: 1
2930  0.1 pass: 1
2931  0.1 pass: 1
2932  0.1 pass: 1
2933  0.1 pass: 1
2934  0.1 pass: 1
2935  0.1 pass: 1
2936  0.1 pass: 1
2937  0.1 pass: 1
2938  0.1 pass: 1
2939  0.1 pass: 1
2940  6.0 pass: 1
2941  6.1 pass: 1
2942  6.2 pass: 1
2943  6.3 pass: 1
2944  6.4 pass: 1
2945  6.4 pass: 1
2946  6.5 pass: 1
2947  6.6 pass: 1
2948  6.7 pass: 1
2949  6.8 pass: 1
2950  6.9 pass: 1
2951  7.0 pass: 1
2952  7.1 pass: 1
2953  7.2 pass: 1
2954  7.3 pass: 1
2955  6.3 pass: 1
2956  6.3 pass: 1
2957  6.3 pass: 1
2958  6.3 pass: 1
2959  6.3 pass: 1
2960  6.3 pass: 1
2961  6.3 pass: 1
2962  6.3 pass: 1
2963  6.3 pass: 1
2964  6.3

In [70]:
main_data[['Country','Year','Thinness_5-9_years']].isnull().sum()

Country                 0
Year                    0
Thinness_5-9_years    585
dtype: int64

## STATUS OF EACH COUNTRY

In [83]:
status_data = pd.read_csv("Data/New/Status/Status_Of_Country.csv")

In [84]:
status_data.rename(columns = {status.columns[0]: "ID"}, inplace=True)
status_data.columns

Index(['x', 'Economy', 'Code', 'Region', 'Income group', 'Lending category',
       'Other'],
      dtype='object')

In [85]:
status_data.head()

Unnamed: 0,x,Economy,Code,Region,Income group,Lending category,Other
0,1,Afghanistan,AFG,South Asia,Low income,IDA,HIPC
1,2,Albania,ALB,Europe & Central Asia,Upper middle income,IBRD,
2,3,Algeria,DZA,Middle East & North Africa,Upper middle income,IBRD,
3,4,American Samoa,ASM,East Asia & Pacific,Upper middle income,..,
4,5,Andorra,AND,Europe & Central Asia,High income,..,


In [100]:
for val in main_data['Country'].index:
    print(val,' ',end='')
    if len(status_data[status_data['Economy'] == main_data['Country'][val]].values) != 0:
        try:
            status_other_data = status_data[status_data['Economy'] == main_data['Country'][val]]["Income group"].values[0]
            print(status_other_data, "pass: 1")
            main_data.at[val, 'Status'] = status_other_data
        except IndexError:
            pass
    else:
        try:
            status_other_data = status_data[status_data['Economy'].str.startswith(main_data['Country'][val])]["Income group"].values[0]
            print(status_other_data, "pass: 2")
            main_data.at[val, 'Status'] = status_other_data
        except IndexError:
            try:
                status_other_data = status_data[status_data['Economy'].str.startswith(main_data['Country'][val].split(',')[0])]["Income group"].values[0]
                print(status_other_data, "pass: 3")
                main_data.at[val, 'Status'] = status_other_data
            except IndexError:
                pass

0  Low income pass: 1
1  Low income pass: 1
2  Low income pass: 1
3  Low income pass: 1
4  Low income pass: 1
5  Low income pass: 1
6  Low income pass: 1
7  Low income pass: 1
8  Low income pass: 1
9  Low income pass: 1
10  Low income pass: 1
11  Low income pass: 1
12  Low income pass: 1
13  Low income pass: 1
14  Low income pass: 1
15  Upper middle income pass: 1
16  Upper middle income pass: 1
17  Upper middle income pass: 1
18  Upper middle income pass: 1
19  Upper middle income pass: 1
20  Upper middle income pass: 1
21  Upper middle income pass: 1
22  Upper middle income pass: 1
23  Upper middle income pass: 1
24  Upper middle income pass: 1
25  Upper middle income pass: 1
26  Upper middle income pass: 1
27  Upper middle income pass: 1
28  Upper middle income pass: 1
29  Upper middle income pass: 1
30  Upper middle income pass: 1
31  Upper middle income pass: 1
32  Upper middle income pass: 1
33  Upper middle income pass: 1
34  Upper middle income pass: 1
35  Upper middle income p

425  High income pass: 1
426  High income pass: 1
427  High income pass: 1
428  High income pass: 1
429  High income pass: 1
430  High income pass: 1
431  High income pass: 1
432  High income pass: 1
433  High income pass: 1
434  High income pass: 1
435  Upper middle income pass: 1
436  Upper middle income pass: 1
437  Upper middle income pass: 1
438  Upper middle income pass: 1
439  Upper middle income pass: 1
440  Upper middle income pass: 1
441  Upper middle income pass: 1
442  Upper middle income pass: 1
443  Upper middle income pass: 1
444  Upper middle income pass: 1
445  Upper middle income pass: 1
446  Upper middle income pass: 1
447  Upper middle income pass: 1
448  Upper middle income pass: 1
449  Upper middle income pass: 1
450  Low income pass: 1
451  Low income pass: 1
452  Low income pass: 1
453  Low income pass: 1
454  Low income pass: 1
455  Low income pass: 1
456  Low income pass: 1
457  Low income pass: 1
458  Low income pass: 1
459  Low income pass: 1
460  Low income

859  Upper middle income pass: 1
860  Upper middle income pass: 1
861  Upper middle income pass: 1
862  Upper middle income pass: 1
863  Upper middle income pass: 1
864  Upper middle income pass: 1
865  Upper middle income pass: 1
866  Upper middle income pass: 1
867  Upper middle income pass: 1
868  Upper middle income pass: 1
869  Upper middle income pass: 1
870  Lower middle income pass: 1
871  Lower middle income pass: 1
872  Lower middle income pass: 1
873  Lower middle income pass: 1
874  Lower middle income pass: 1
875  Lower middle income pass: 1
876  Lower middle income pass: 1
877  Lower middle income pass: 1
878  Lower middle income pass: 1
879  Lower middle income pass: 1
880  Lower middle income pass: 1
881  Lower middle income pass: 1
882  Lower middle income pass: 1
883  Lower middle income pass: 1
884  Lower middle income pass: 1
885  Lower middle income pass: 1
886  Lower middle income pass: 1
887  Lower middle income pass: 1
888  Lower middle income pass: 1
889  Lower

1315  High income pass: 1
1316  High income pass: 1
1317  High income pass: 1
1318  High income pass: 1
1319  High income pass: 1
1320  High income pass: 1
1321  High income pass: 1
1322  High income pass: 1
1323  High income pass: 1
1324  High income pass: 1
1325  High income pass: 1
1326  High income pass: 1
1327  High income pass: 1
1328  High income pass: 1
1329  High income pass: 1
1330  High income pass: 1
1331  High income pass: 1
1332  High income pass: 1
1333  High income pass: 1
1334  High income pass: 1
1335  Lower middle income pass: 1
1336  Lower middle income pass: 1
1337  Lower middle income pass: 1
1338  Lower middle income pass: 1
1339  Lower middle income pass: 1
1340  Lower middle income pass: 1
1341  Lower middle income pass: 1
1342  Lower middle income pass: 1
1343  Lower middle income pass: 1
1344  Lower middle income pass: 1
1345  Lower middle income pass: 1
1346  Lower middle income pass: 1
1347  Lower middle income pass: 1
1348  Lower middle income pass: 1
1349

1590  High income pass: 1
1591  High income pass: 1
1592  High income pass: 1
1593  High income pass: 1
1594  High income pass: 1
1595  High income pass: 1
1596  High income pass: 1
1597  High income pass: 1
1598  High income pass: 1
1599  High income pass: 1
1600  High income pass: 1
1601  High income pass: 1
1602  High income pass: 1
1603  High income pass: 1
1604  High income pass: 1
1605  Lower middle income pass: 1
1606  Lower middle income pass: 1
1607  Lower middle income pass: 1
1608  Lower middle income pass: 1
1609  Lower middle income pass: 1
1610  Lower middle income pass: 1
1611  Lower middle income pass: 1
1612  Lower middle income pass: 1
1613  Lower middle income pass: 1
1614  Lower middle income pass: 1
1615  Lower middle income pass: 1
1616  Lower middle income pass: 1
1617  Lower middle income pass: 1
1618  Lower middle income pass: 1
1619  Lower middle income pass: 1
1620  Lower middle income pass: 1
1621  Lower middle income pass: 1
1622  Lower middle income pass: 

1907  Upper middle income pass: 1
1908  Upper middle income pass: 1
1909  Upper middle income pass: 1
1910  Upper middle income pass: 1
1911  Upper middle income pass: 1
1912  Upper middle income pass: 1
1913  Upper middle income pass: 1
1914  Upper middle income pass: 1
1915  Upper middle income pass: 1
1916  Upper middle income pass: 1
1917  Upper middle income pass: 1
1918  Upper middle income pass: 1
1919  Upper middle income pass: 1
1920  Lower middle income pass: 1
1921  Lower middle income pass: 1
1922  Lower middle income pass: 1
1923  Lower middle income pass: 1
1924  Lower middle income pass: 1
1925  Lower middle income pass: 1
1926  Lower middle income pass: 1
1927  Lower middle income pass: 1
1928  Lower middle income pass: 1
1929  Lower middle income pass: 1
1930  Lower middle income pass: 1
1931  Lower middle income pass: 1
1932  Lower middle income pass: 1
1933  Lower middle income pass: 1
1934  Lower middle income pass: 1
1935  Lower middle income pass: 1
1936  Lower mi

2248  Lower middle income pass: 1
2249  Lower middle income pass: 1
2250  High income pass: 1
2251  High income pass: 1
2252  High income pass: 1
2253  High income pass: 1
2254  High income pass: 1
2255  High income pass: 1
2256  High income pass: 1
2257  High income pass: 1
2258  High income pass: 1
2259  High income pass: 1
2260  High income pass: 1
2261  High income pass: 1
2262  High income pass: 1
2263  High income pass: 1
2264  High income pass: 1
2265  High income pass: 1
2266  High income pass: 1
2267  High income pass: 1
2268  High income pass: 1
2269  High income pass: 1
2270  High income pass: 1
2271  High income pass: 1
2272  High income pass: 1
2273  High income pass: 1
2274  High income pass: 1
2275  High income pass: 1
2276  High income pass: 1
2277  High income pass: 1
2278  High income pass: 1
2279  High income pass: 1
2280  Lower middle income pass: 1
2281  Lower middle income pass: 1
2282  Lower middle income pass: 1
2283  Lower middle income pass: 1
2284  Lower midd

2630  Lower middle income pass: 1
2631  Lower middle income pass: 1
2632  Lower middle income pass: 1
2633  Lower middle income pass: 1
2634  Lower middle income pass: 1
2635  Lower middle income pass: 1
2636  Lower middle income pass: 1
2637  Lower middle income pass: 1
2638  Lower middle income pass: 1
2639  Lower middle income pass: 1
2640  Low income pass: 1
2641  Low income pass: 1
2642  Low income pass: 1
2643  Low income pass: 1
2644  Low income pass: 1
2645  Low income pass: 1
2646  Low income pass: 1
2647  Low income pass: 1
2648  Low income pass: 1
2649  Low income pass: 1
2650  Low income pass: 1
2651  Low income pass: 1
2652  Low income pass: 1
2653  Low income pass: 1
2654  Low income pass: 1
2655  Upper middle income pass: 1
2656  Upper middle income pass: 1
2657  Upper middle income pass: 1
2658  Upper middle income pass: 1
2659  Upper middle income pass: 1
2660  Upper middle income pass: 1
2661  Upper middle income pass: 1
2662  Upper middle income pass: 1
2663  Upper m

2938  Upper middle income pass: 1
2939  Upper middle income pass: 1
2940  High income pass: 1
2941  High income pass: 1
2942  High income pass: 1
2943  High income pass: 1
2944  High income pass: 1
2945  High income pass: 1
2946  High income pass: 1
2947  High income pass: 1
2948  High income pass: 1
2949  High income pass: 1
2950  High income pass: 1
2951  High income pass: 1
2952  High income pass: 1
2953  High income pass: 1
2954  High income pass: 1
2955  Lower middle income pass: 1
2956  Lower middle income pass: 1
2957  Lower middle income pass: 1
2958  Lower middle income pass: 1
2959  Lower middle income pass: 1
2960  Lower middle income pass: 1
2961  Lower middle income pass: 1
2962  Lower middle income pass: 1
2963  Lower middle income pass: 1
2964  Lower middle income pass: 1
2965  Lower middle income pass: 1
2966  Lower middle income pass: 1
2967  Lower middle income pass: 1
2968  Lower middle income pass: 1
2969  Lower middle income pass: 1
2970  Upper middle income pass: 

In [105]:
main_data.sample(10)

Unnamed: 0,Country,Year,Status,Life_Expectancy,Adult_Mortality,Infant_Deaths,Alcohol,Percentage_Expenditure,Measles,BMI,...,Polio,Total_Expenditure,Diphtheria,HIV/AIDS,GDP,Population,Thinness_10-19_years,Thinness_5-9_years,Income_Composition_Of_Resources,Schooling
1825,Maldives,2004,Upper middle income,,,,,,37.0,,...,,15.88,96.0,,3952.12,310423.0,14.6,14.7,,
2650,Somalia,2004,Low income,,,,,,12008.0,,...,,,30.0,,,10116228.0,7.7,7.5,,
2577,Singapore,2002,High income,,,,,,211.0,,...,,6.41,94.0,,22016.8,4175950.0,2.1,2.0,,
2156,Niger,2003,Low income,,,,,,54190.0,,...,,9.39,41.0,,215.805,12656870.0,12.5,12.5,,
1746,Luxembourg,2008,High income,,,,,,1.0,,...,,16.59,99.0,,114294.0,488650.0,0.9,0.9,,
252,Barbados,2002,High income,,,,,,0.0,,...,,10.14,87.0,,11675.3,271478.0,4.2,4.2,,
1388,Iraq,2006,Upper middle income,,,,,,474.0,,...,,3.2,59.0,,2351.81,27697912.0,5.4,5.2,,
242,Barbados,2012,High income,,,,,,0.0,,...,,10.69,87.0,,16536.2,281585.0,3.8,3.8,,
757,Curacao,2007,,,,,,,,,...,,,,,,144056.0,,,,
1750,Luxembourg,2004,High income,,,,,,0.0,,...,,16.34,99.0,,75716.4,458095.0,1.0,0.9,,


75