In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

%matplotlib inline

In [2]:
# import gdp dataset using pandas read_csv() method
gdp_df = pd.read_csv('../Data/gdp_percapita.csv', nrows = 6868)

In [3]:
gdp_df.head(6)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
0,Afghanistan,2019,2065.036398,
1,Afghanistan,2018,2033.779002,
2,Afghanistan,2017,2058.383832,
3,Afghanistan,2016,2057.062164,
4,Afghanistan,2015,2068.265904,
5,Afghanistan,2014,2102.385234,


In [4]:
# import internet usage dataset using pandas read_csv() method
internet_df = pd.read_csv('../Data/internet_use.csv', nrows = 4495)

In [5]:
internet_df.head(6)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
0,Afghanistan,2014,6.39,
1,Afghanistan,2013,5.9,
2,Afghanistan,2012,5.454545,
3,Afghanistan,2011,5.0,
4,Afghanistan,2010,4.0,
5,Afghanistan,2009,3.55,


Look at shape of each df. how many rows & columns?

In [6]:
#rows - 6870
#columns - 4
gdp_df.shape

(6868, 4)

In [7]:
#rows - 4675
#columns - 4
internet_df.shape

(4495, 4)

In [8]:
gdp_df.dtypes

Country or Area     object
Year                 int64
Value              float64
Value Footnotes    float64
dtype: object

In [9]:
internet_df.dtypes

Country or Area     object
Year                 int64
Value              float64
Value Footnotes    float64
dtype: object

In [10]:
gdp_df.tail(10)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
6858,Zimbabwe,1999,3600.849291,
6859,Zimbabwe,1998,3653.920016,
6860,Zimbabwe,1997,3580.048793,
6861,Zimbabwe,1996,3520.430146,
6862,Zimbabwe,1995,3226.41393,
6863,Zimbabwe,1994,3263.934978,
6864,Zimbabwe,1993,3033.504852,
6865,Zimbabwe,1992,3054.889178,
6866,Zimbabwe,1991,3426.598094,
6867,Zimbabwe,1990,3324.348171,


In [11]:
internet_df.tail(10)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
4485,Zimbabwe,2002,3.994356,
4486,Zimbabwe,2001,0.799846,
4487,Zimbabwe,2000,0.401434,
4488,Zimbabwe,1999,0.161676,
4489,Zimbabwe,1998,0.081648,
4490,Zimbabwe,1997,0.03308,
4491,Zimbabwe,1996,0.01679,
4492,Zimbabwe,1995,0.007684,
4493,Zimbabwe,1994,0.001739,
4494,Zimbabwe,1990,0.0,


In [12]:
gdp_df.columns

Index(['Country or Area', 'Year', 'Value', 'Value Footnotes'], dtype='object')

In [13]:
# Drop value footnotes column from both df's. verify output
gdp_df = gdp_df.drop(columns = 'Value Footnotes')
gdp_df

Unnamed: 0,Country or Area,Year,Value
0,Afghanistan,2019,2065.036398
1,Afghanistan,2018,2033.779002
2,Afghanistan,2017,2058.383832
3,Afghanistan,2016,2057.062164
4,Afghanistan,2015,2068.265904
...,...,...,...
6863,Zimbabwe,1994,3263.934978
6864,Zimbabwe,1993,3033.504852
6865,Zimbabwe,1992,3054.889178
6866,Zimbabwe,1991,3426.598094


In [14]:
internet_df = internet_df.drop(columns = 'Value Footnotes')
internet_df

Unnamed: 0,Country or Area,Year,Value
0,Afghanistan,2014,6.390000
1,Afghanistan,2013,5.900000
2,Afghanistan,2012,5.454545
3,Afghanistan,2011,5.000000
4,Afghanistan,2010,4.000000
...,...,...,...
4490,Zimbabwe,1997,0.033080
4491,Zimbabwe,1996,0.016790
4492,Zimbabwe,1995,0.007684
4493,Zimbabwe,1994,0.001739


In [15]:
# Change the columns for gdp_df to 'Country' 'Year' and 'GDP_Per_Capita' & verify change

gdp_df = gdp_df.rename(columns = {'Country or Area': 'Country', 'Value': 'GDP_Per_Capita'})
gdp_df.head()

Unnamed: 0,Country,Year,GDP_Per_Capita
0,Afghanistan,2019,2065.036398
1,Afghanistan,2018,2033.779002
2,Afghanistan,2017,2058.383832
3,Afghanistan,2016,2057.062164
4,Afghanistan,2015,2068.265904


In [16]:
# Change the columns for internet_df to 'Country' 'Year' and 'GDP_Per_Capita' & verify change

internet_df = internet_df.rename(columns = {'Country or Area': 'Country', 'Value': 'Internet_Users_Pct'})
internet_df.head()

Unnamed: 0,Country,Year,Internet_Users_Pct
0,Afghanistan,2014,6.39
1,Afghanistan,2013,5.9
2,Afghanistan,2012,5.454545
3,Afghanistan,2011,5.0
4,Afghanistan,2010,4.0


In [17]:
# merge the two df's into one.  Merge **all rows**. call new df gdp_and_internet_use.

gdp_and_internet_use = gdp_df.merge(internet_df, on = ['Country', 'Year'])
gdp_and_internet_use.head()

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
0,Afghanistan,2014,2102.385234,6.39
1,Afghanistan,2013,2116.465454,5.9
2,Afghanistan,2012,2075.491747,5.454545
3,Afghanistan,2011,1904.559799,5.0
4,Afghanistan,2010,1957.029338,4.0


In [18]:
gdp_and_internet_use.tail()

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
3453,Zimbabwe,1997,3580.048793,0.03308
3454,Zimbabwe,1996,3520.430146,0.01679
3455,Zimbabwe,1995,3226.41393,0.007684
3456,Zimbabwe,1994,3263.934978,0.001739
3457,Zimbabwe,1990,3324.348171,0.0


In [19]:
# subset the combined df to keep only data from 2004, 2009, and 2014; create three df's for these results; 

two004_df = gdp_and_internet_use[gdp_and_internet_use['Year'] == 2004]
two004_df

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
10,Afghanistan,2004,1200.278256,0.105809
23,Albania,2004,7604.838002,2.420388
44,Algeria,2004,10058.435939,4.634475
66,Angola,2004,5593.241537,0.464815
86,Antigua and Barbuda,2004,18745.681534,24.266544
...,...,...,...,...
3361,Uruguay,2004,12479.966871,17.063098
3383,Uzbekistan,2004,3221.189333,2.593725
3404,Vanuatu,2004,2792.356252,4.746603
3424,Zambia,2004,2261.766537,2.013550


In [20]:
two009_df = gdp_and_internet_use[gdp_and_internet_use['Year'] == 2009]
two009_df

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
5,Afghanistan,2009,1758.904043,3.550000
18,Albania,2009,10346.864199,41.200000
39,Algeria,2009,10782.361398,11.230000
61,Angola,2009,7643.493749,6.000000
81,Antigua and Barbuda,2009,20046.839017,42.000000
...,...,...,...,...
3356,Uruguay,2009,16477.823860,41.800000
3378,Uzbekistan,2009,4446.863606,17.058216
3399,Vanuatu,2009,3270.529800,7.500000
3419,Zambia,2009,2917.516220,6.310000


In [21]:
two014_df = gdp_and_internet_use[gdp_and_internet_use['Year'] == 2014]
two014_df

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
0,Afghanistan,2014,2102.385234,6.39
13,Albania,2014,11623.866679,60.10
34,Algeria,2014,11512.705405,18.09
56,Angola,2014,8239.828837,21.26
76,Antigua and Barbuda,2014,18104.241470,64.00
...,...,...,...,...
3351,Uruguay,2014,20582.168953,61.46
3373,Uzbekistan,2014,5764.492705,43.55
3394,Vanuatu,2014,3116.077025,18.80
3414,Zambia,2014,3450.046561,17.34


In [28]:
#which country had highest internet users in 2014? What was the percentage? Lowest? Percentage? max id'd the high at 98.16 and
#  nothing else; idxmax identified line 1381 as the location of the max

two014_df.Internet_Users_Pct.idxmax()

1381

In [32]:
#  print list(df.loc[df['xrb'][::-1].idxmax()]) from stack overflow

print list(two014_df.loc[two014_df['GDP_Per_Capita'][::-1].idmax()])


NameError: name 'printlist' is not defined

Highest percentage of Internet users in 2014 is Iceland at 98.16%
Lowest percentage of Internet users in 2014 is Timor-Leste at 1.14%

In [23]:
#which country had highest internet users in 2004? What was the percentage? Lowest? Percentage?

two004_df.sort_values(by = 'Internet_Users_Pct', ascending = False)

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
2985,Sweden,2004,44496.023401,83.890000
1391,Iceland,2004,45145.545658,83.880000
864,Denmark,2004,50607.628971,80.930000
2306,Norway,2004,60893.410993,77.690000
1063,Finland,2004,43760.279146,72.390000
...,...,...,...,...
1019,Ethiopia,2004,793.093786,0.155335
10,Afghanistan,2004,1200.278256,0.105809
3035,Tajikistan,2004,1737.743630,0.077480
1759,Liberia,2004,1181.546158,0.031011


Highest percentage of Internet users in 2004 is Sweden at 83.89%
Lowest percentage of Internet users in 2004 is Myanmar at 0.24%

In [24]:
#which country had highest internet users in 2009? What was the percentage? Lowest? Percentage?
two009_df.sort_values(by = 'Internet_Users_Pct', ascending = False)

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
1386,Iceland,2009,48177.084123,93.000000
2301,Norway,2009,61886.564498,92.080000
2980,Sweden,2009,45638.035577,91.000000
2190,Netherlands,2009,51772.424928,89.630000
1803,Luxembourg,2009,104932.811953,87.310000
...,...,...,...,...
1014,Ethiopia,2009,1150.206179,0.540000
570,Cambodia,2009,2603.583778,0.530000
2733,Sierra Leone,2009,1372.392285,0.260000
2127,Myanmar,2009,2874.780227,0.220000


Highest percentage of Internet users in 2019 is Iceland at 93%
Lowest percentage of Internet users in 2003 is Timor-Leste at 0.18%

In [25]:
# Which Country had the highest gdp per capita in 2014? What was the %'age? Lowest? Percentage?

two014_df.sort_values(by = 'GDP_Per_Capita', ascending = False)

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
1798,Luxembourg,2014,108760.916030,94.67
2542,Qatar,2014,95578.416973,91.49
2751,Singapore,2014,87808.663157,82.00
372,Bermuda,2014,77361.104881,96.80
627,Cayman Islands,2014,66207.447230,74.10
...,...,...,...,...
3098,Togo,2014,1404.132938,5.70
2102,Mozambique,2014,1217.090210,5.94
2256,Niger,2014,1127.615541,1.95
1842,Malawi,2014,1027.208496,5.83


Highest GDP per capita in 2014 is Luxembourg at 108,760
Lowest GDP per capita in 2014 is Burundi at 886