# Comparisons: Our World In Data (source)

## Data

The data was pulled from the [urbanization dataset](https://ourworldindata.org/grapher/urbanization-last-500-years?time=1950..2016&country=PLW~GUM~FSM~MNP~MHL~ASM~NRU~OWID_WRL~Northern+America~South+America~Europe~Asia~Africa~Oceania~Micronesia+%28subregion%29) from Our World In Data. The original datasource is quoted as 'the UN World Urbanization Prospects database covering the period from 1950 to 2050'.


In [113]:
import pandas as pd
from pprint import pprint

start_year = 1950
end_year = 2016

regions = ['Micronesia (subregion)',
           'Northern America',
           'South America',
           'Europe',
           'Oceania',
           'Africa',
           'Asia',
           'World']

df = pd.read_csv('urbanization_1950_2016.csv')
print(df.head())
print(df.shape)

df = df.pivot(index='Year',
              columns='Entity',
              values='Urban population (%) long-run to 2016 (OWID)')

# Keep certain years and columns
df = df.loc[start_year:end_year]

# Change percentages into decimals for plotting later
df = df / 100

print(df.head())
print(df.shape)

        Entity Code  Year  Urban population (%) long-run to 2016 (OWID)
0  Afghanistan  AFG  1950                                         6.000
1  Afghanistan  AFG  1951                                         6.208
2  Afghanistan  AFG  1952                                         6.422
3  Afghanistan  AFG  1953                                         6.643
4  Afghanistan  AFG  1954                                         6.872
(18695, 4)
Entity  Afghanistan    Africa  Albania  Algeria  American Samoa  Andorra  \
Year                                                                       
1950        0.06000  0.142821  0.20528  0.22213         0.61769  0.38800   
1951        0.06208  0.146648  0.21503  0.22965         0.62223  0.40708   
1952        0.06422  0.150587  0.22768  0.23736         0.62676  0.42648   
1953        0.06643  0.154636  0.24082  0.24522         0.63126  0.44605   
1954        0.06872  0.158800  0.25448  0.25327         0.63574  0.46581   

Entity   Angola  Anguill

In [40]:
from bokeh.io import show, output_notebook
output_notebook()

In [116]:
from bokeh.palettes import Category10 as Palette
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource

countries = ['Guam',
             'Northern Mariana Islands',
             'American Samoa',
             'Palau',
             'Marshall Islands',
             'Micronesia (country)']

source = ColumnDataSource(df)

palette = Palette[len(countries)]

p = figure(width=800,
           height=500,
           x_range=(start_year, end_year),
           y_range=(0, 1),
           title="Urbanized Population Percentage of Select Pacific Islands")

for i, c in enumerate(countries):
    p.line(x='Year',
           y=c,
           line_color=palette[i],
           line_width=2,
           legend_label=c,
           source=source)

p.xaxis.axis_label = "Year"
p.yaxis.axis_label = "Percent Urbanized"
p.yaxis.formatter = NumeralTickFormatter(format="0%")
p.legend.location = "bottom_right"
p.legend.label_text_font_size = "0.7em"
p.legend.margin = 5
p.legend.spacing = 12
p.legend.padding = 1
p.legend.orientation = 'horizontal'

show(p)

In [115]:
from bokeh.palettes import Category10 as Palette
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, NumeralTickFormatter

regions = ['Guam',
           'Micronesia (subregion)',
           'Oceania',
           'Northern America',
           'South America',
           'Europe',
           'Africa',
           'Asia',
           'World']

source = ColumnDataSource(df)

palette = Palette[len(regions)]

p = figure(width=800,
           height=500,
           x_range=(start_year, end_year),
           y_range=(0, 1),
           title="Urbanized Population Percentage of Guam, Micronesia, Geographic Regions, and the World.")

for i, c in enumerate(regions):
    p.line(x='Year',
           y=c,
           line_color=palette[i],
           line_width=2,
           legend_label=c,
           source=source)

p.xaxis.axis_label = "Year"
p.yaxis.axis_label = "Percent Urbanized"
p.yaxis.formatter = NumeralTickFormatter(format="0%")
p.legend.location = "bottom_right"
p.legend.label_text_font_size = "0.7em"
p.legend.margin = 5
p.legend.spacing = 12
p.legend.padding = 1
p.legend.orientation = 'horizontal'

show(p)