# **China Plotly Dash**

---

# `01` Fetch Data

### `i` Import Necessary Libraries

In [1]:

import requests # to make HTTP requests
import json # to handle JSON files
import pandas as pd # to handle dataframes
import matplotlib.pyplot as plt # to make plots
# fetch from http page
import requests
# plotly
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
# jupyter-dash
from jupyter_dash import JupyterDash
# html
from dash import html





### `ii` Api Fetch 

In [2]:

popualtion = pd.read_csv("china-population-2023-04-02.csv")
birth_rate = pd.read_csv("birth_rate.csv")
death_rate = pd.read_csv("death_rate.csv")
life_expectancy = pd.read_csv("life_exp.csv")
density = pd.read_csv("dentsity.csv")


### `iii` EDA

In [3]:
df_list = [popualtion, birth_rate, death_rate, life_expectancy, density]

In [4]:
# check if data have same date range
def check_date_range(list_of_df):
    for i in range(len(list_of_df)):
        for j in range(len(list_of_df)):
            if list_of_df[i].columns[0] != list_of_df[j].columns[0]:
                print("Date range not match")
                return False
    print("Date range match")
    return True
check_date_range([popualtion, birth_rate, death_rate, life_expectancy, density])

Date range match


True

In [5]:
#check  of if all data has same length
def check_length(df_list):
    for df in df_list:
        print("length of " + df.columns[0] + " is " + str(len(df)))

check_length(df_list)        

length of date is 151
length of date is 151
length of date is 151
length of date is 151
length of date is 151


In [6]:
#print columns for all dataframes
def print_columns(df_list):
    for df in df_list:
        print(df.columns)
        print("")
        
print_columns(df_list)   

Index(['date', ' Population', ' Annual Growth Rate'], dtype='object')

Index(['date', ' Births per 1000 People', ' Annual % Change'], dtype='object')

Index(['date', ' Deaths per 1000 People', ' Annual % Change'], dtype='object')

Index(['date', ' Life Expectancy from Birth (Years)', ' Annual % Change'], dtype='object')

Index(['date', ' Population per Square KM', ' Annual % Change'], dtype='object')



In [7]:
# we found every data has column named Annual % Change and we want to change the name of this column to the name of the dataframe
def change_column_name(df_list):
    for df in df_list:
        if ' Annual % Change' in df.columns:
            df.rename(columns={' Annual % Change': df.columns[1] + " Annual Change"}, inplace=True)
            print("Column name changed to " + df.columns[1] + "Annual Change in " + df.columns[1])
        else:
            print("Column name not found in " + df.columns[1])    
    return df_list

df_list = change_column_name(df_list)
        


Column name not found in  Population
Column name changed to  Births per 1000 PeopleAnnual Change in  Births per 1000 People
Column name changed to  Deaths per 1000 PeopleAnnual Change in  Deaths per 1000 People
Column name changed to  Life Expectancy from Birth (Years)Annual Change in  Life Expectancy from Birth (Years)
Column name changed to  Population per Square KMAnnual Change in  Population per Square KM


In [8]:
# merge dataframes
def merge_df(df_list):
    df = df_list[0]
    for i in range(1, len(df_list)):
        df = df.merge(df_list[i], on='date')
    return df

merged = merge_df(df_list)

In [9]:
merged.head()

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,12/31/1950,543979233,,46.133,,23.366,,43.446,,56.66444,
1,12/31/1951,553613988,1.77,44.921,-2.63,23.106,-1.11,43.574,0.29,57.668059,1.77
2,12/31/1952,564954522,2.05,43.71,-2.7,22.845,-1.13,43.702,0.29,58.849363,2.05
3,12/31/1953,577378682,2.2,42.498,-2.77,22.585,-1.14,43.83,0.29,60.143545,2.2
4,12/31/1954,589936004,2.17,41.286,-2.85,22.325,-1.15,43.958,0.29,61.451598,2.17


In [10]:
# as we start from 1950 the change rate with NaN  ,so we fill them with 0
merged.fillna(0, inplace=True)

In [11]:
#save merged data to csv
merged.to_csv("merged.csv", index=False)

In [12]:
merged

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,12/31/1950,543979233,0.00,46.133,0.00,23.366,0.00,43.446,0.00,56.664440,0.00
1,12/31/1951,553613988,1.77,44.921,-2.63,23.106,-1.11,43.574,0.29,57.668059,1.77
2,12/31/1952,564954522,2.05,43.710,-2.70,22.845,-1.13,43.702,0.29,58.849363,2.05
3,12/31/1953,577378682,2.20,42.498,-2.77,22.585,-1.14,43.830,0.29,60.143545,2.20
4,12/31/1954,589936004,2.17,41.286,-2.85,22.325,-1.15,43.958,0.29,61.451598,2.17
...,...,...,...,...,...,...,...,...,...,...,...
146,12/31/2096,804153594,-1.18,8.864,-0.06,13.646,-0.20,87.398,0.12,83.765905,-1.18
147,12/31/2097,794673479,-1.18,8.858,-0.07,13.620,-0.19,87.504,0.12,82.778394,-1.18
148,12/31/2098,785270314,-1.18,8.853,-0.06,13.594,-0.19,87.610,0.12,81.798899,-1.18
149,12/31/2099,775944429,-1.19,8.848,-0.06,13.568,-0.19,87.716,0.12,80.827454,-1.19


In [13]:
# convert date column to datetime and fromat from  12/31/2100	 to only year
merged['date'] = pd.to_datetime(merged['date'])
merged['date'] = merged['date'].dt.year
merged.head()

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,1950,543979233,0.0,46.133,0.0,23.366,0.0,43.446,0.0,56.66444,0.0
1,1951,553613988,1.77,44.921,-2.63,23.106,-1.11,43.574,0.29,57.668059,1.77
2,1952,564954522,2.05,43.71,-2.7,22.845,-1.13,43.702,0.29,58.849363,2.05
3,1953,577378682,2.2,42.498,-2.77,22.585,-1.14,43.83,0.29,60.143545,2.2
4,1954,589936004,2.17,41.286,-2.85,22.325,-1.15,43.958,0.29,61.451598,2.17


### Amgad Data

In [14]:
df2 = pd.read_csv("China demographics 1965-2050 (1).csv")
df2.head()

Unnamed: 0,Year,Urban population (% of total population) [SP.URB.TOTL.IN.ZS],Sex ratio at birth (male births per female births) [SP.POP.BRTH.MF],Rural population (% of total population) [SP.RUR.TOTL.ZS],"Population, total [SP.POP.TOTL]","Population, male (% of total population) [SP.POP.TOTL.MA.ZS]","Population, female (% of total population) [SP.POP.TOTL.FE.ZS]",Population growth (annual %) [SP.POP.GROW],Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS],Net migration [SM.POP.NETM],"Mortality rate, infant (per 1,000 live births) [SP.DYN.IMRT.IN]","Fertility rate, total (births per woman) [SP.DYN.TFRT.IN]","Death rate, crude (per 1,000 people) [SP.DYN.CDRT.IN]","Birth rate, crude (per 1,000 people) [SP.DYN.CBRT.IN]",Age dependency ratio (% of working-age population) [SP.POP.DPND]
0,1969,17.528,1.063,82.472,796025000,50.959032,49.040968,2.74000210569558,3.68749,-72835,83.4,6.175,8.03,34.11,80.961979
1,1970,17.4,1.064,82.6,818315000,50.954552,49.045448,2.76167556645304,3.712521,-126514,79.7,6.085,7.6,33.43,80.524027
2,1971,17.292,1.064,82.708,841105000,50.952559,49.047441,2.74691554961857,3.743416,-180202,76.0,5.523,7.32,30.65,80.188208
3,1972,17.184,1.064,82.816,862030000,50.951571,49.048429,2.45735692952286,3.777798,-215730,72.5,5.112,7.61,29.77,79.481177
4,1973,17.184,1.064,82.816,881940000,50.950011,49.049989,2.28339536396383,3.821944,-215090,68.9,4.726,7.04,27.93,78.831239


In [15]:
df2.columns # print columns

Index(['Year', 'Urban population (% of total population) [SP.URB.TOTL.IN.ZS]',
       'Sex ratio at birth (male births per female births) [SP.POP.BRTH.MF]',
       'Rural population (% of total population) [SP.RUR.TOTL.ZS]',
       'Population, total [SP.POP.TOTL]',
       'Population, male (% of total population) [SP.POP.TOTL.MA.ZS]',
       'Population, female (% of total population) [SP.POP.TOTL.FE.ZS]',
       'Population growth (annual %) [SP.POP.GROW]',
       'Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS]',
       'Net migration [SM.POP.NETM]',
       'Mortality rate, infant (per 1,000 live births) [SP.DYN.IMRT.IN]',
       'Fertility rate, total (births per woman) [SP.DYN.TFRT.IN]',
       'Death rate, crude (per 1,000 people) [SP.DYN.CDRT.IN]',
       'Birth rate, crude (per 1,000 people) [SP.DYN.CBRT.IN]',
       'Age dependency ratio (% of working-age population) [SP.POP.DPND]'],
      dtype='object')

In [16]:
# range of years
print("min year: " + str(df2['Year'].min()), "max year: " + str(df2['Year'].max()))

min year: 1969 max year: 2050


### 

### `iv` Implement Plotly  

`Figure of Popualtion` 

In [37]:
fig = px.line(merged, x="date", y=" Population", title="Total Population of China",
        labels={"date": "Year", " Population": "Population"}, width=800, height=500, template="plotly_dark"
            ,#add annual change to hover data
            hover_data={" Annual Growth Rate": True}, )

fig.add_vline(x=2023, line_width=3, line_dash="dash", line_color="red",  annotation_text="2023", annotation_position="top right") 
#add historical data before 2023


fig.add_vline(x=1950, line_width=3, line_dash="dash", line_color="red",  annotation_text="1950", annotation_position="top right")
# the area between 1950 and 2023 is the historical data
fig.add_vrect(x0=1950, x1=2023, fillcolor="red", opacity=0.05, annotation_text="", annotation_position="top left", annotation_font_size=12)
#add projection data after 2023
fig.add_vline(x=2023, line_width=3, line_dash="dash", line_color="green",  annotation_text="2023", annotation_position="top right")
# the area between 2023 and 2100 is the projection data
fig.add_vrect(x0=2023, x1=2100, fillcolor="green", opacity=0.05, annotation_text="", annotation_position="top left", annotation_font_size=12)







            
            

`Birth Rate, Death Rate and Life Expectancy of China` 

In [18]:
# birth  rate 
fig2 = px.line(merged, x="date", y=" Births per 1000 People", title="", template="plotly_dark",)

# add death rate to fig2
fig2.add_scatter(x=merged['date'], y=merged[' Deaths per 1000 People'], name="Death Rate")
# add life expectancy to fig2
fig2.add_scatter(x=merged['date'], y=merged[' Life Expectancy from Birth (Years)'], name="Life Expectancy")

#add title to fig2
fig2.update_layout(  title="Birth Rate, Death Rate and Life Expectancy of China"),
# add  scatter to birth rate again to make it display on legend area with color blue
fig2.add_scatter(x=merged['date'], y=merged[' Births per 1000 People'], name="Birth Rate", )
                   
                  




`Figure of Dependancy Ratio` 

In [19]:
# from amgad data
fig3 = go.Figure( )
fig3.add_trace(go.Bar(x=df2["Year"], y=df2["Age dependency ratio (% of working-age population) [SP.POP.DPND]"], name="Age dependency ratio (% of working-age population)",
                       opacity=0.8)  )
fig3.update_layout(template='plotly_dark', title="Working-Age Dependency Ratio", xaxis_title="Year",
                   )
fig3.show()



`Figure of Density` 

In [43]:
# density
fig4 = px.scatter(merged, x="date", y=" Population per Square KM", title="Density Per KM of China", template="plotly_dark",)
fig4.show()

`Figure of Urban vs Rural` 

In [27]:
#Urban vs Rural
# h stack bar chart
# Urabn vs Rural using df2 
fig10 = px.bar(df2, x="Year", y=['Urban population (% of total population) [SP.URB.TOTL.IN.ZS]',
                                 'Rural population (% of total population) [SP.RUR.TOTL.ZS]'],
               title="Urban vs Rural Population of China", template="plotly_dark",
               labels={"value": "Population", "variable": "Population Type", "Year": "Year", 
                       "Urban population (% of total population) [SP.URB.TOTL.IN.ZS]": "Urban Population",
                       "Rural population (% of total population) [SP.RUR.TOTL.ZS]": "Rural Population"})
fig10.update_layout(barmode='stack' , xaxis_title="Year", yaxis_title="Population",
                    #add legend title 
                    legend_title="Population Type",)
                  
fig10.show()

`Banes`

# `Implemnent Dash`

In [23]:
#dash app
dash_app = JupyterDash(__name__)
dash_app.layout = html.Div([

    
    ])
dash_app.run_server()


Dash app running on http://127.0.0.1:8050/
