## Looking for world population?

 - The last data from the World Bank is the global county population in 2016
 - You can find the top countries of populations
 - You also can find it in the world map.

---

In [None]:
#pip install datapackage

In [None]:
# Some world data are summarized data
# - we are looking for single country population data

In [None]:
# Import pandas 
import pandas as pd 
import datapackage
import matplotlib.pyplot as plt

# interact:  Draw a bar by top numbers
from ipywidgets import interact


In [None]:
data_url = 'https://datahub.io/JohnSnowLabs/population-figures-by-country/datapackage.json'

# to load Data Package into storage
package = datapackage.Package(data_url)

# to load only tabular data
resources = package.resources
for resource in resources:
    if resource.tabular:
        df = pd.read_csv(resource.descriptor['path'])
df.set_index("Country_Code", inplace = True) 

In [None]:
# df.head()
# print(df.shape)

# the most recent year: 2016
df16= df.iloc[:, -1]
# df16.sort_values().tail(20)

In [None]:
# Find the country code and merge them

In [None]:
url = "https://gist.githubusercontent.com/tadast/8827699/raw/3cd639fa34eec5067080a61c69e3ae25e3076abb/countries_codes_and_coordinates.csv"
df_code3 = pd.read_csv(url)
code3 = df_code3["Alpha-3 code"][0]

# for i, c in enumerate(code3):
#     print(i,c)
# # the length is not 3, and the longtitude/latitude have the same problem. 

# reset to 
df_code3["Code3"] = df_code3["Alpha-3 code"].map(lambda x: x.strip()[1:-1] )
df_code3["Lat"] = df_code3["Latitude (average)"].map(lambda x: x.strip()[1:-1]) 
df_code3["Long"] = df_code3["Longitude (average)"].map(lambda x: x.strip()[1:-1]) 

# set index 
df_code3.set_index("Code3", inplace=True )

# keep only the long and lat cols.
df_code3=df_code3.iloc[:, [-2, -1]]
# df_code3.shape # 256 rows


In [None]:
# there are no missing value, but some duplicated: 24 rows
df_code3.isna().sum() # none

# du=df_code3[df_code3.duplicated()] # 24 : 12 are extra
# print(du.size)

#remove
df_code3 = df_code3.drop_duplicates()
# df_code3

In [None]:
# Merge

In [None]:
# use left-join-right
df_m= df_code3.join(df) 
# df_m

In [None]:
# some country's name is missing: 32 countries
df_m.Country.isna().sum()

# # keep only the country which has name in it 
df_good = df_m[df_m.Country.notna()]

# df_good

In [None]:
# drop duplicated: non
# df_good.duplicated().sum()


In [None]:
# set-index by Country name
df_good.set_index(["Country"], inplace=True)

# no duplicated
# df_good[df_good.duplicated()].sum().sum()

# Top 10 countries in population (2016)

In [None]:
# # Population top 10 in 2016
df_2016 =pd.DataFrame(df_good.loc[:, 'Year_2016'])
df_2016.dropna(inplace=True)
df_2016.sort_values(by="Year_2016", ascending =False).head(10).applymap(lambda x:int(x)) 

In [None]:
# In year 2016, top n
def f_bar(Top_n_2016):
    df_top=df_2016.sort_values(by="Year_2016", ascending=False).head(Top_n_2016)
    df_top.plot(kind = 'bar',  subplots= True, figsize=(20,5), legend = None)
    plt.title(" Top counties in year 2016")
    plt.show()  
    
    
# bar by country    
if __name__ == "__main__": 
    interact(f_bar, Top_n_2016=(1,20))

# Global population changes over time

In [None]:
# find the total of each year
Total_all_year = df_good.iloc[:, 2:-1].sum()

Total_all_year.plot(figsize=(10, 5), title = "Global Population vs Years")
plt.show()

# Population Changes Country by Country

In [None]:
# interact:  Draw a year-year for each country
def f_bar(Country):
    df_good.loc[Country, "Year_1960":"Year_2016"].plot( kind= "bar", color=['r', 'b', 'g','k'], figsize=(15,5))
    plt.title(" Population change of each country ")

    
# drop box by country    
if __name__ == "__main__": 
    interact(f_bar, Country=df_good.index)

In [None]:
# remove the longitude and latitude
df_year = df_good.iloc[:, 2:].T # change the year only as index


# by line 
def f_slide(Year):
    ind = Year-1960
    df_xy = df_year.iloc[ind, :]
    df_xy.plot(kind= "bar", figsize=(30,10));
    plt.title(" Population - year")
    
# use the country_data: change by year    
if __name__ == "__main__":
    interact(f_slide, Year=(1960, 2016) )
#     interact(f_line, year=(df_year.index))   #n=(1,20)

In [None]:
#one missing 
df_good[df_good.Year_2016.isna()]

# # keep all non-na
df_good = df_good[df_good.Year_2016.notna()]


# 2016 Global Population Map

In [None]:
import folium
world_map = folium.Map(location=[11,0], 
                      
                       zoom_start=1, max_zoom = 10, min_zoom = 2)

for i in range(0,len(df_good)):
    folium.Circle(
        location=[df_good.iloc[i]['Lat'], df_good.iloc[i]['Long']],
        fill=True,
        radius=( df_good.iloc[i,-1]/1000),
        color='blue',
        tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+ df_good.index[i] + "</h4>"
                    "<hr style='margin:10px;color: white;'>"+
                    "<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
                        "<li>2016 Population: "+str(int(df_good.iloc[i,-1]) )+"</li>"+
                    "</ul></div>",
        fill_color='indigo',
        ).add_to(world_map)

world_map

# Voila

In [None]:

# coming soon

In [None]:
# if __name__ == '__main__':
#     !voila  Global_population.ipynb --theme=dark --template=material