In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import random
from scipy.stats import linregress
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

In [2]:
#set all columns to be displayed
pd.set_option('display.max_columns', None)

In [3]:
# Study data file 1
worldinternet_path = "Resources/data.csv"



In [4]:
worldinternet_results = pd.read_csv(worldinternet_path)

In [5]:
worldinternet_results.head()

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,1990 [YR1990],2000 [YR2000],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020]
0,Individuals using the Internet (% of population),IT.NET.USER.ZS,Afghanistan,AFG,0,..,5,5.454545455,5.9,7,8.26,..,..,..,..,..
1,Individuals using the Internet (% of population),IT.NET.USER.ZS,Albania,ALB,0,0.114097347,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,72.23767711
2,Individuals using the Internet (% of population),IT.NET.USER.ZS,Algeria,DZA,0,0.491705679,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,..
3,Individuals using the Internet (% of population),IT.NET.USER.ZS,American Samoa,ASM,0,..,..,..,..,..,..,..,..,..,..,..
4,Individuals using the Internet (% of population),IT.NET.USER.ZS,Andorra,AND,0,10.53883561,81,..,..,..,..,..,91.56746703,..,..,..


In [6]:
worldinternet_results.shape

(271, 16)

In [7]:
# Study data file 2
worldincome_path = "Resources/countries.csv"

In [8]:
worldincome_results = pd.read_csv(worldincome_path)

In [9]:
worldincome_results.head(30)

Unnamed: 0,Country Code,Region,IncomeGroup,SpecialNotes,TableName,Unnamed: 5
0,ABW,Latin America & Caribbean,High income,,Aruba,
1,AFE,,,"26 countries, stretching from the Red Sea in t...",Africa Eastern and Southern,
2,AFG,South Asia,Low income,The reporting period for national accounts dat...,Afghanistan,
3,AFW,,,"22 countries, stretching from the westernmost ...",Africa Western and Central,
4,AGO,Sub-Saharan Africa,Lower middle income,,Angola,
5,ALB,Europe & Central Asia,Upper middle income,,Albania,
6,AND,Europe & Central Asia,High income,,Andorra,
7,ARB,,,Arab World aggregate. Arab World is composed o...,Arab World,
8,ARE,Middle East & North Africa,High income,,United Arab Emirates,
9,ARG,Latin America & Caribbean,Upper middle income,The World Bank systematically assesses the app...,Argentina,


In [10]:
worldincome_results.shape

(265, 6)

In [11]:
#merging the data sets 
world_overview = pd.merge(worldinternet_results, worldincome_results,how='left', left_on='Country Name', right_on='TableName')

In [12]:
world_overview.shape

(271, 22)

In [13]:
world_overview.head()

Unnamed: 0,Series Name,Series Code,Country Name,Country Code_x,1990 [YR1990],2000 [YR2000],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],Country Code_y,Region,IncomeGroup,SpecialNotes,TableName,Unnamed: 5
0,Individuals using the Internet (% of population),IT.NET.USER.ZS,Afghanistan,AFG,0,..,5,5.454545455,5.9,7,8.26,..,..,..,..,..,AFG,South Asia,Low income,The reporting period for national accounts dat...,Afghanistan,
1,Individuals using the Internet (% of population),IT.NET.USER.ZS,Albania,ALB,0,0.114097347,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,72.23767711,ALB,Europe & Central Asia,Upper middle income,,Albania,
2,Individuals using the Internet (% of population),IT.NET.USER.ZS,Algeria,DZA,0,0.491705679,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,..,DZA,Middle East & North Africa,Lower middle income,,Algeria,
3,Individuals using the Internet (% of population),IT.NET.USER.ZS,American Samoa,ASM,0,..,..,..,..,..,..,..,..,..,..,..,ASM,East Asia & Pacific,Upper middle income,,American Samoa,
4,Individuals using the Internet (% of population),IT.NET.USER.ZS,Andorra,AND,0,10.53883561,81,..,..,..,..,..,91.56746703,..,..,..,AND,Europe & Central Asia,High income,,Andorra,


In [14]:
#filtering the dataset to remove unwanted columns 
internet_df = world_overview.filter(['Country Name','Country Code_x','2011 [YR2011]','2012 [YR2012]','2013 [YR2013]','2014 [YR2014]','2015 [YR2015]','2016 [YR2016]','2017 [YR2017]','2018 [YR2018]','2019 [YR2019]','2020 [YR2020]','IncomeGroup'])

In [15]:
internet_df.head(30)

Unnamed: 0,Country Name,Country Code_x,2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],IncomeGroup
0,Afghanistan,AFG,5,5.454545455,5.9,7,8.26,..,..,..,..,..,Low income
1,Albania,ALB,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,72.23767711,Upper middle income
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,..,Lower middle income
3,American Samoa,ASM,..,..,..,..,..,..,..,..,..,..,Upper middle income
4,Andorra,AND,81,..,..,..,..,..,91.56746703,..,..,..,High income
5,Angola,AGO,3.1,6.5,8.9,21.4,29,29,32,35,36,..,Lower middle income
6,Antigua and Barbuda,ATG,52,58,63.4,67.78,70,73,..,..,..,..,High income
7,Argentina,ARG,51,55.8,59.9,64.7,68.04306411,70.96898082,74.29490687,..,..,..,Upper middle income
8,Armenia,ARM,32,37.5,41.9,54.62280586,59.10083377,64.34602977,64.74488433,68.24505226,66.54394969,..,Upper middle income
9,Aruba,ABW,69,74,78.9,83.78,88.66122693,93.54245387,97.17,..,..,..,High income


In [16]:
#reading in the population data and filtering
worldbank_pop = "Resources/pop.csv"
worldbank_population = pd.read_csv(worldbank_pop)
worldbank_population = worldbank_population.filter(['Country Name','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020'])
worldbank_population.head()

Unnamed: 0,Country Name,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,102050.0,102565.0,103165.0,103776.0,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0
1,Africa Eastern and Southern,532760424.0,547482863.0,562601578.0,578075373.0,593871847.0,609978946.0,626392880.0,643090131.0,660046272.0,677243299.0
2,Afghanistan,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0
3,Africa Western and Central,360285439.0,370243017.0,380437896.0,390882979.0,401586651.0,412551299.0,423769930.0,435229381.0,446911598.0,458803476.0
4,Angola,24220660.0,25107925.0,26015786.0,26941773.0,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,32866268.0


In [17]:
#merging the datasets
world_overview1 = pd.merge(internet_df, worldbank_population,how='left', left_on='Country Name', right_on='Country Name')

In [18]:

world_overview1.head()

Unnamed: 0,Country Name,Country Code_x,2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],IncomeGroup,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,AFG,5,5.454545455,5.9,7,8.26,..,..,..,..,..,Low income,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0
1,Albania,ALB,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,72.23767711,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2837743.0
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,..,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,43851043.0
3,American Samoa,ASM,..,..,..,..,..,..,..,..,..,..,Upper middle income,55755.0,55669.0,55717.0,55791.0,55806.0,55739.0,55617.0,55461.0,55312.0,55197.0
4,Andorra,AND,81,..,..,..,..,..,91.56746703,..,..,..,High income,83748.0,82427.0,80770.0,79213.0,77993.0,77295.0,76997.0,77008.0,77146.0,77265.0


In [19]:
#reading in the GDP CSV and filtering 
worldbank_GDP = "Resources/GDP.csv"
worldbank_gdp = pd.read_csv(worldbank_GDP)
worldbank_gdp.shape
worldbank_gdp.head()
worldbank_gdp = worldbank_gdp.filter(['Country Name','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020'])
worldbank_gdp.head()

Unnamed: 0,Country Name,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,3.446055,-1.369863,4.198232,0.3,5.700001,2.1,1.999999,,,
1,Africa Eastern and Southern,4.014183,1.972652,4.30837,3.986754,2.925591,2.019391,2.542298,2.475272,2.077898,-2.939186
2,Afghanistan,0.426355,12.752287,5.600745,2.724543,1.451315,2.260314,2.647003,1.189228,3.911603,-2.351101
3,Africa Western and Central,4.848351,5.142964,6.104241,5.92735,2.745937,0.127595,2.318042,2.95223,3.190336,-0.884981
4,Angola,3.471976,8.542188,4.954545,4.822628,0.943572,-2.58005,-0.147213,-2.00363,-0.624644,-5.399987


In [20]:
#final merge 
world_final = pd.merge(world_overview1, worldbank_gdp,how='left', left_on='Country Name', right_on='Country Name')
world_final.head()

Unnamed: 0,Country Name,Country Code_x,2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],IncomeGroup,2011_x,2012_x,2013_x,2014_x,2015_x,2016_x,2017_x,2018_x,2019_x,2020_x,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y,2018_y,2019_y,2020_y
0,Afghanistan,AFG,5,5.454545455,5.9,7,8.26,..,..,..,..,..,Low income,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0,0.426355,12.752287,5.600745,2.724543,1.451315,2.260314,2.647003,1.189228,3.911603,-2.351101
1,Albania,ALB,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,72.23767711,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2837743.0,2.545406,1.417243,1.002018,1.774449,2.218726,3.314981,3.802227,4.01936,2.11342,-3.955398
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,..,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,43851043.0,2.9,3.4,2.8,3.8,3.7,3.2,1.3,1.1,1.0,-5.1
3,American Samoa,ASM,..,..,..,..,..,..,..,..,..,..,Upper middle income,55755.0,55669.0,55717.0,55791.0,55806.0,55739.0,55617.0,55461.0,55312.0,55197.0,0.0,-4.334828,-2.5,1.762821,3.149606,-1.679389,-6.987578,2.671119,-0.487805,3.921569
4,Andorra,AND,81,..,..,..,..,..,91.56746703,..,..,..,High income,83748.0,82427.0,80770.0,79213.0,77993.0,77295.0,76997.0,77008.0,77146.0,77265.0,-0.00807,-4.974444,-3.547597,2.504466,1.43414,3.709678,0.346072,1.588765,2.015548,-11.952693


In [21]:
#renaming the columns to distinguish the data 
world_final.rename(columns={'Country Name': 'Country','Country Code_x': 'Abbr','2011 [YR2011]': 'Internet_Use_Perc_2011','2012 [YR2012]': 'Internet_Use_Perc_2012','2013 [YR2013]': 'Internet_Use_Perc_2013','2014 [YR2014]': 'Internet_Use_Perc_2014','2015 [YR2015]': 'Internet_Use_Perc_2015','2016 [YR2016]': 'Internet_Use_Perc_2016','2017 [YR2017]': 'Internet_Use_Perc_2017','2018 [YR2018]': 'Internet_Use_Perc_2018','2019 [YR2019]': 'Internet_Use_Perc_2019','2020 [YR2020]': 'Internet_Use_Perc_2020','2011_y': 'GDP_2011','2012_y': 'GDP_2012','2013_y': 'GDP_2013','2014_y': 'GDP_2014','2015_y': 'GDP_2015','2016_y': 'GDP_2016','2017_y': 'GDP_2017','2018_y': 'GDP_2018','2019_y': 'GDP_2019','2020_y': 'GDP_2020','2011_x': 'population_2011','2012_x': 'population_2012','2013_x': 'population_2013','2014_x': 'population_2014','2015_x': 'population_2015','2016_x': 'population_2016','2017_x': 'population_2017','2018_x': 'population_2018','2019_x': 'population_2019','2020_x': 'population_2020'}, inplace=True)



In [22]:
world_final.head()

Unnamed: 0,Country,Abbr,Internet_Use_Perc_2011,Internet_Use_Perc_2012,Internet_Use_Perc_2013,Internet_Use_Perc_2014,Internet_Use_Perc_2015,Internet_Use_Perc_2016,Internet_Use_Perc_2017,Internet_Use_Perc_2018,Internet_Use_Perc_2019,Internet_Use_Perc_2020,IncomeGroup,population_2011,population_2012,population_2013,population_2014,population_2015,population_2016,population_2017,population_2018,population_2019,population_2020,GDP_2011,GDP_2012,GDP_2013,GDP_2014,GDP_2015,GDP_2016,GDP_2017,GDP_2018,GDP_2019,GDP_2020
0,Afghanistan,AFG,5,5.454545455,5.9,7,8.26,..,..,..,..,..,Low income,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0,0.426355,12.752287,5.600745,2.724543,1.451315,2.260314,2.647003,1.189228,3.911603,-2.351101
1,Albania,ALB,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,72.23767711,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2837743.0,2.545406,1.417243,1.002018,1.774449,2.218726,3.314981,3.802227,4.01936,2.11342,-3.955398
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,..,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,43851043.0,2.9,3.4,2.8,3.8,3.7,3.2,1.3,1.1,1.0,-5.1
3,American Samoa,ASM,..,..,..,..,..,..,..,..,..,..,Upper middle income,55755.0,55669.0,55717.0,55791.0,55806.0,55739.0,55617.0,55461.0,55312.0,55197.0,0.0,-4.334828,-2.5,1.762821,3.149606,-1.679389,-6.987578,2.671119,-0.487805,3.921569
4,Andorra,AND,81,..,..,..,..,..,91.56746703,..,..,..,High income,83748.0,82427.0,80770.0,79213.0,77993.0,77295.0,76997.0,77008.0,77146.0,77265.0,-0.00807,-4.974444,-3.547597,2.504466,1.43414,3.709678,0.346072,1.588765,2.015548,-11.952693


In [23]:
#remove the 2020 data for internet %use, population and GDP as a lot of them were incomplete
world_final = world_final.drop(['Internet_Use_Perc_2020', 'population_2020', 'GDP_2020'], axis = 1)
world_final.head()

Unnamed: 0,Country,Abbr,Internet_Use_Perc_2011,Internet_Use_Perc_2012,Internet_Use_Perc_2013,Internet_Use_Perc_2014,Internet_Use_Perc_2015,Internet_Use_Perc_2016,Internet_Use_Perc_2017,Internet_Use_Perc_2018,Internet_Use_Perc_2019,IncomeGroup,population_2011,population_2012,population_2013,population_2014,population_2015,population_2016,population_2017,population_2018,population_2019,GDP_2011,GDP_2012,GDP_2013,GDP_2014,GDP_2015,GDP_2016,GDP_2017,GDP_2018,GDP_2019
0,Afghanistan,AFG,5,5.454545455,5.9,7,8.26,..,..,..,..,Low income,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,0.426355,12.752287,5.600745,2.724543,1.451315,2.260314,2.647003,1.189228,3.911603
1,Albania,ALB,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2.545406,1.417243,1.002018,1.774449,2.218726,3.314981,3.802227,4.01936,2.11342
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,2.9,3.4,2.8,3.8,3.7,3.2,1.3,1.1,1.0
3,American Samoa,ASM,..,..,..,..,..,..,..,..,..,Upper middle income,55755.0,55669.0,55717.0,55791.0,55806.0,55739.0,55617.0,55461.0,55312.0,0.0,-4.334828,-2.5,1.762821,3.149606,-1.679389,-6.987578,2.671119,-0.487805
4,Andorra,AND,81,..,..,..,..,..,91.56746703,..,..,High income,83748.0,82427.0,80770.0,79213.0,77993.0,77295.0,76997.0,77008.0,77146.0,-0.00807,-4.974444,-3.547597,2.504466,1.43414,3.709678,0.346072,1.588765,2.015548


In [24]:
world_final.shape

(271, 30)

In [25]:
world_final.head()

Unnamed: 0,Country,Abbr,Internet_Use_Perc_2011,Internet_Use_Perc_2012,Internet_Use_Perc_2013,Internet_Use_Perc_2014,Internet_Use_Perc_2015,Internet_Use_Perc_2016,Internet_Use_Perc_2017,Internet_Use_Perc_2018,Internet_Use_Perc_2019,IncomeGroup,population_2011,population_2012,population_2013,population_2014,population_2015,population_2016,population_2017,population_2018,population_2019,GDP_2011,GDP_2012,GDP_2013,GDP_2014,GDP_2015,GDP_2016,GDP_2017,GDP_2018,GDP_2019
0,Afghanistan,AFG,5,5.454545455,5.9,7,8.26,..,..,..,..,Low income,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,0.426355,12.752287,5.600745,2.724543,1.451315,2.260314,2.647003,1.189228,3.911603
1,Albania,ALB,47,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2.545406,1.417243,1.002018,1.774449,2.218726,3.314981,3.802227,4.01936,2.11342
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,2.9,3.4,2.8,3.8,3.7,3.2,1.3,1.1,1.0
3,American Samoa,ASM,..,..,..,..,..,..,..,..,..,Upper middle income,55755.0,55669.0,55717.0,55791.0,55806.0,55739.0,55617.0,55461.0,55312.0,0.0,-4.334828,-2.5,1.762821,3.149606,-1.679389,-6.987578,2.671119,-0.487805
4,Andorra,AND,81,..,..,..,..,..,91.56746703,..,..,High income,83748.0,82427.0,80770.0,79213.0,77993.0,77295.0,76997.0,77008.0,77146.0,-0.00807,-4.974444,-3.547597,2.504466,1.43414,3.709678,0.346072,1.588765,2.015548


In [26]:
#remove all rows containing ".." as those will be unusable for further work
world_final = world_final[~world_final.isin(['..']).any(axis=1)]

In [27]:
world_final.head()

Unnamed: 0,Country,Abbr,Internet_Use_Perc_2011,Internet_Use_Perc_2012,Internet_Use_Perc_2013,Internet_Use_Perc_2014,Internet_Use_Perc_2015,Internet_Use_Perc_2016,Internet_Use_Perc_2017,Internet_Use_Perc_2018,Internet_Use_Perc_2019,IncomeGroup,population_2011,population_2012,population_2013,population_2014,population_2015,population_2016,population_2017,population_2018,population_2019,GDP_2011,GDP_2012,GDP_2013,GDP_2014,GDP_2015,GDP_2016,GDP_2017,GDP_2018,GDP_2019
1,Albania,ALB,47.0,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.55039112,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2.545406,1.417243,1.002018,1.774449,2.218726,3.314981,3.802227,4.01936,2.11342
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.94552688,47.69105515,49.03846808,57.5,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,2.9,3.4,2.8,3.8,3.7,3.2,1.3,1.1,1.0
5,Angola,AGO,3.1,6.5,8.9,21.4,29.0,29.0,32.0,35.0,36.0,Lower middle income,24220660.0,25107925.0,26015786.0,26941773.0,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,3.471976,8.542188,4.954545,4.822628,0.943572,-2.58005,-0.147213,-2.00363,-0.624644
8,Armenia,ARM,32.0,37.5,41.9,54.62280586,59.10083377,64.34602977,64.74488433,68.24505226,66.54394969,Upper middle income,2876536.0,2884239.0,2897593.0,2912403.0,2925559.0,2936147.0,2944789.0,2951741.0,2957728.0,4.7,7.2,3.3,3.6,3.2,0.2,7.5,5.2,7.6
11,Austria,AUT,78.7399931,80.02999392,80.6188,80.99582496,83.94014193,84.32374257,87.93558659,87.47913723,87.75220479,High income,8391643.0,8429991.0,8479823.0,8546356.0,8642699.0,8736668.0,8797566.0,8840521.0,8879920.0,2.922797,0.680446,0.025505,0.661273,1.014502,1.989437,2.258572,2.501595,1.491211


In [28]:
world_final = world_final.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

In [29]:
world_final.shape

(120, 30)

In [30]:
world_final.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 120 entries, 1 to 216
Data columns (total 30 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Country                 120 non-null    object 
 1   Abbr                    120 non-null    object 
 2   Internet_Use_Perc_2011  120 non-null    object 
 3   Internet_Use_Perc_2012  120 non-null    object 
 4   Internet_Use_Perc_2013  120 non-null    object 
 5   Internet_Use_Perc_2014  120 non-null    object 
 6   Internet_Use_Perc_2015  120 non-null    object 
 7   Internet_Use_Perc_2016  120 non-null    object 
 8   Internet_Use_Perc_2017  120 non-null    object 
 9   Internet_Use_Perc_2018  120 non-null    object 
 10  Internet_Use_Perc_2019  120 non-null    object 
 11  IncomeGroup             120 non-null    object 
 12  population_2011         120 non-null    float64
 13  population_2012         120 non-null    float64
 14  population_2013         120 non-null    fl

In [31]:
world_final = world_final.astype({
    'Internet_Use_Perc_2011': 'float64',
    'Internet_Use_Perc_2012': 'float64',
    'Internet_Use_Perc_2013': 'float64',
    'Internet_Use_Perc_2014': 'float64',
    'Internet_Use_Perc_2015': 'float64',
    'Internet_Use_Perc_2016': 'float64',
    'Internet_Use_Perc_2017': 'float64',
    'Internet_Use_Perc_2018': 'float64',
    'Internet_Use_Perc_2019': 'float64'
})

In [32]:
world_final = world_final.round(decimals = 1)

In [33]:
world_final.head()

Unnamed: 0,Country,Abbr,Internet_Use_Perc_2011,Internet_Use_Perc_2012,Internet_Use_Perc_2013,Internet_Use_Perc_2014,Internet_Use_Perc_2015,Internet_Use_Perc_2016,Internet_Use_Perc_2017,Internet_Use_Perc_2018,Internet_Use_Perc_2019,IncomeGroup,population_2011,population_2012,population_2013,population_2014,population_2015,population_2016,population_2017,population_2018,population_2019,GDP_2011,GDP_2012,GDP_2013,GDP_2014,GDP_2015,GDP_2016,GDP_2017,GDP_2018,GDP_2019
1,Albania,ALB,47.0,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.6,Upper middle income,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2.5,1.4,1.0,1.8,2.2,3.3,3.8,4.0,2.1
2,Algeria,DZA,14.9,18.2,22.5,29.5,38.2,42.9,47.7,49.0,57.5,Lower middle income,36661438.0,37383899.0,38140135.0,38923688.0,39728020.0,40551398.0,41389174.0,42228415.0,43053054.0,2.9,3.4,2.8,3.8,3.7,3.2,1.3,1.1,1.0
5,Angola,AGO,3.1,6.5,8.9,21.4,29.0,29.0,32.0,35.0,36.0,Lower middle income,24220660.0,25107925.0,26015786.0,26941773.0,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,3.5,8.5,5.0,4.8,0.9,-2.6,-0.1,-2.0,-0.6
8,Armenia,ARM,32.0,37.5,41.9,54.6,59.1,64.3,64.7,68.2,66.5,Upper middle income,2876536.0,2884239.0,2897593.0,2912403.0,2925559.0,2936147.0,2944789.0,2951741.0,2957728.0,4.7,7.2,3.3,3.6,3.2,0.2,7.5,5.2,7.6
11,Austria,AUT,78.7,80.0,80.6,81.0,83.9,84.3,87.9,87.5,87.8,High income,8391643.0,8429991.0,8479823.0,8546356.0,8642699.0,8736668.0,8797566.0,8840521.0,8879920.0,2.9,0.7,0.0,0.7,1.0,2.0,2.3,2.5,1.5


In [34]:
internet_use = world_final[['Country', 'Internet_Use_Perc_2011', 'Internet_Use_Perc_2012', 'Internet_Use_Perc_2013', 'Internet_Use_Perc_2014', 'Internet_Use_Perc_2015', 'Internet_Use_Perc_2016', 'Internet_Use_Perc_2017', 'Internet_Use_Perc_2018', 'Internet_Use_Perc_2019']]
internet_use.head()
                    
                    
                    
                    

Unnamed: 0,Country,Internet_Use_Perc_2011,Internet_Use_Perc_2012,Internet_Use_Perc_2013,Internet_Use_Perc_2014,Internet_Use_Perc_2015,Internet_Use_Perc_2016,Internet_Use_Perc_2017,Internet_Use_Perc_2018,Internet_Use_Perc_2019
1,Albania,47.0,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.6
2,Algeria,14.9,18.2,22.5,29.5,38.2,42.9,47.7,49.0,57.5
5,Angola,3.1,6.5,8.9,21.4,29.0,29.0,32.0,35.0,36.0
8,Armenia,32.0,37.5,41.9,54.6,59.1,64.3,64.7,68.2,66.5
11,Austria,78.7,80.0,80.6,81.0,83.9,84.3,87.9,87.5,87.8


In [39]:
internet_use.rename(columns={'Internet_Use_Perc_2011': '2011','Internet_Use_Perc_2012': '2012', 'Internet_Use_Perc_2013': '2013', 'Internet_Use_Perc_2014': '2014', 'Internet_Use_Perc_2015': '2015', 'Internet_Use_Perc_2016': '2016', 'Internet_Use_Perc_2017': '2017', 'Internet_Use_Perc_2018': '2018', 'Internet_Use_Perc_2019': '2019'}, inplace=True)
internet_use.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,Country,2011,2012,2013,2014,2015,2016,2017,2018,2019
1,Albania,47.0,49.4,51.8,54.3,56.9,59.6,62.4,65.4,68.6
2,Algeria,14.9,18.2,22.5,29.5,38.2,42.9,47.7,49.0,57.5
5,Angola,3.1,6.5,8.9,21.4,29.0,29.0,32.0,35.0,36.0
8,Armenia,32.0,37.5,41.9,54.6,59.1,64.3,64.7,68.2,66.5
11,Austria,78.7,80.0,80.6,81.0,83.9,84.3,87.9,87.5,87.8


In [None]:
gdp = world_final[['Country', 'Internet_Use_Perc_2011', 'Internet_Use_Perc_2012', 'Internet_Use_Perc_2013', 'Internet_Use_Perc_2014', 'Internet_Use_Perc_2015', 'Internet_Use_Perc_2016', 'Internet_Use_Perc_2017', 'Internet_Use_Perc_2018', 'Internet_Use_Perc_2019']]
internet_use.head()

In [None]:
world_final.to_csv('Internet_data_hpfl.csv')

In [40]:
internet_use.to_csv('Internet.csv')