# **China Plotly Dash**

---

# `01` Fetch Data

### `i` Import Necessary Libraries

In [1]:

import requests # to make HTTP requests
import json # to handle JSON files
import pandas as pd # to handle dataframes
import matplotlib.pyplot as plt # to make plots
# fetch from http page
import requests
# plotly
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
# jupyter-dash
from jupyter_dash import JupyterDash
# html
from dash import html

### `ii` Api Fetch 

In [2]:

popualtion = pd.read_csv("Data-2/china-population-2023-04-02.csv")
birth_rate = pd.read_csv("Data-2/birth_rate.csv")
death_rate = pd.read_csv("Data-2/death_rate.csv")
life_expectancy = pd.read_csv("Data-2/life_exp.csv")
density = pd.read_csv("Data-2/dentsity.csv") 


### `iii` EDA

In [3]:
df_list = [popualtion, birth_rate, death_rate, life_expectancy, density]

In [4]:
# check if data have same date range
def check_date_range(list_of_df):
    for i in range(len(list_of_df)):
        for j in range(len(list_of_df)):
            if list_of_df[i].columns[0] != list_of_df[j].columns[0]:
                print("Date range not match")
                return False
    print("Date range match")
    return True
check_date_range([popualtion, birth_rate, death_rate, life_expectancy, density])

Date range match


True

In [5]:
#check  of if all data has same length
def check_length(df_list):
    for df in df_list:
        print("length of " + df.columns[0] + " is " + str(len(df)))

check_length(df_list)        

length of date is 151
length of date is 151
length of date is 151
length of date is 151
length of date is 151


In [6]:
#print columns for all dataframes
def print_columns(df_list):
    for df in df_list:
        print(df.columns)
        print("")
        
print_columns(df_list)   

Index(['date', ' Population', ' Annual Growth Rate'], dtype='object')

Index(['date', ' Births per 1000 People', ' Annual % Change'], dtype='object')

Index(['date', ' Deaths per 1000 People', ' Annual % Change'], dtype='object')

Index(['date', ' Life Expectancy from Birth (Years)', ' Annual % Change'], dtype='object')

Index(['date', ' Population per Square KM', ' Annual % Change'], dtype='object')



In [7]:
# we found every data has column named Annual % Change and we want to change the name of this column to the name of the dataframe
def change_column_name(df_list):
    for df in df_list:
        if ' Annual % Change' in df.columns:
            df.rename(columns={' Annual % Change': df.columns[1] + " Annual Change"}, inplace=True)
            print("Column name changed to " + df.columns[1] + "Annual Change in " + df.columns[1])
        else:
            print("Column name not found in " + df.columns[1])    
    return df_list

df_list = change_column_name(df_list)
        


Column name not found in  Population
Column name changed to  Births per 1000 PeopleAnnual Change in  Births per 1000 People
Column name changed to  Deaths per 1000 PeopleAnnual Change in  Deaths per 1000 People
Column name changed to  Life Expectancy from Birth (Years)Annual Change in  Life Expectancy from Birth (Years)
Column name changed to  Population per Square KMAnnual Change in  Population per Square KM


In [8]:
# merge dataframes
def merge_df(df_list):
    df = df_list[0]
    for i in range(1, len(df_list)):
        df = df.merge(df_list[i], on='date')
    return df

merged = merge_df(df_list)

In [9]:
merged.head()

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,12/31/1950,543979233,,46.133,,23.366,,43.446,,56.66444,
1,12/31/1951,553613988,1.77,44.921,-2.63,23.106,-1.11,43.574,0.29,57.668059,1.77
2,12/31/1952,564954522,2.05,43.71,-2.7,22.845,-1.13,43.702,0.29,58.849363,2.05
3,12/31/1953,577378682,2.2,42.498,-2.77,22.585,-1.14,43.83,0.29,60.143545,2.2
4,12/31/1954,589936004,2.17,41.286,-2.85,22.325,-1.15,43.958,0.29,61.451598,2.17


In [10]:
# as we start from 1950 the change rate with NaN  ,so we fill them with 0
merged.fillna(0, inplace=True)

In [11]:
#save merged data to csv
merged.to_csv("merged.csv", index=False)

In [12]:
merged

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,12/31/1950,543979233,0.00,46.133,0.00,23.366,0.00,43.446,0.00,56.664440,0.00
1,12/31/1951,553613988,1.77,44.921,-2.63,23.106,-1.11,43.574,0.29,57.668059,1.77
2,12/31/1952,564954522,2.05,43.710,-2.70,22.845,-1.13,43.702,0.29,58.849363,2.05
3,12/31/1953,577378682,2.20,42.498,-2.77,22.585,-1.14,43.830,0.29,60.143545,2.20
4,12/31/1954,589936004,2.17,41.286,-2.85,22.325,-1.15,43.958,0.29,61.451598,2.17
...,...,...,...,...,...,...,...,...,...,...,...
146,12/31/2096,804153594,-1.18,8.864,-0.06,13.646,-0.20,87.398,0.12,83.765905,-1.18
147,12/31/2097,794673479,-1.18,8.858,-0.07,13.620,-0.19,87.504,0.12,82.778394,-1.18
148,12/31/2098,785270314,-1.18,8.853,-0.06,13.594,-0.19,87.610,0.12,81.798899,-1.18
149,12/31/2099,775944429,-1.19,8.848,-0.06,13.568,-0.19,87.716,0.12,80.827454,-1.19


In [13]:
# convert date column to datetime and fromat from  12/31/2100	 to only year
merged['date'] = pd.to_datetime(merged['date'])
merged['date'] = merged['date'].dt.year
merged.head()

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,1950,543979233,0.0,46.133,0.0,23.366,0.0,43.446,0.0,56.66444,0.0
1,1951,553613988,1.77,44.921,-2.63,23.106,-1.11,43.574,0.29,57.668059,1.77
2,1952,564954522,2.05,43.71,-2.7,22.845,-1.13,43.702,0.29,58.849363,2.05
3,1953,577378682,2.2,42.498,-2.77,22.585,-1.14,43.83,0.29,60.143545,2.2
4,1954,589936004,2.17,41.286,-2.85,22.325,-1.15,43.958,0.29,61.451598,2.17


In [14]:
# make merged between 1969 and 2050 to make all datasets have same range
merged = merged[merged['date'] >= 1969]
merged = merged[merged['date'] <= 2050]

In [15]:
merged.reset_index(drop=True, inplace=True) # reset index

### Amgad Data

In [16]:
df2 = pd.read_csv("Data-2/China demographics 1965-2050 (1).csv")
df2.head()

Unnamed: 0,Year,Urban population (% of total population) [SP.URB.TOTL.IN.ZS],Sex ratio at birth (male births per female births) [SP.POP.BRTH.MF],Rural population (% of total population) [SP.RUR.TOTL.ZS],"Population, total [SP.POP.TOTL]","Population, male (% of total population) [SP.POP.TOTL.MA.ZS]","Population, female (% of total population) [SP.POP.TOTL.FE.ZS]",Population growth (annual %) [SP.POP.GROW],Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS],Net migration [SM.POP.NETM],"Mortality rate, infant (per 1,000 live births) [SP.DYN.IMRT.IN]","Fertility rate, total (births per woman) [SP.DYN.TFRT.IN]","Death rate, crude (per 1,000 people) [SP.DYN.CDRT.IN]","Birth rate, crude (per 1,000 people) [SP.DYN.CBRT.IN]",Age dependency ratio (% of working-age population) [SP.POP.DPND]
0,1969,17.528,1.063,82.472,796025000,50.959032,49.040968,2.74000210569558,3.68749,-72835,83.4,6.175,8.03,34.11,80.961979
1,1970,17.4,1.064,82.6,818315000,50.954552,49.045448,2.76167556645304,3.712521,-126514,79.7,6.085,7.6,33.43,80.524027
2,1971,17.292,1.064,82.708,841105000,50.952559,49.047441,2.74691554961857,3.743416,-180202,76.0,5.523,7.32,30.65,80.188208
3,1972,17.184,1.064,82.816,862030000,50.951571,49.048429,2.45735692952286,3.777798,-215730,72.5,5.112,7.61,29.77,79.481177
4,1973,17.184,1.064,82.816,881940000,50.950011,49.049989,2.28339536396383,3.821944,-215090,68.9,4.726,7.04,27.93,78.831239


In [17]:
df2.columns # print columns

Index(['Year', 'Urban population (% of total population) [SP.URB.TOTL.IN.ZS]',
       'Sex ratio at birth (male births per female births) [SP.POP.BRTH.MF]',
       'Rural population (% of total population) [SP.RUR.TOTL.ZS]',
       'Population, total [SP.POP.TOTL]',
       'Population, male (% of total population) [SP.POP.TOTL.MA.ZS]',
       'Population, female (% of total population) [SP.POP.TOTL.FE.ZS]',
       'Population growth (annual %) [SP.POP.GROW]',
       'Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS]',
       'Net migration [SM.POP.NETM]',
       'Mortality rate, infant (per 1,000 live births) [SP.DYN.IMRT.IN]',
       'Fertility rate, total (births per woman) [SP.DYN.TFRT.IN]',
       'Death rate, crude (per 1,000 people) [SP.DYN.CDRT.IN]',
       'Birth rate, crude (per 1,000 people) [SP.DYN.CBRT.IN]',
       'Age dependency ratio (% of working-age population) [SP.POP.DPND]'],
      dtype='object')

In [18]:
# range of years
print("min year: " + str(df2['Year'].min()), "max year: " + str(df2['Year'].max()))

min year: 1969 max year: 2050


### `Pyramid Data`

In [19]:
#pyramid_data =pd.read_excel("Data/china-population-pyramid-1960-2050.xls ", sheet_name="Data") # sheet_name="Data" to read only data
pyramid_data = pd.read_csv("Data/china-population-pyramid-1960-2050.csv",)

In [20]:
# Explore Pyramid data
pyramid_data.head()

Unnamed: 0,Year,0-4Female,5-9Female,10-14Female,15-19Female,20-24Female,25-29Female,30-34Female,35-39Female,40-44Female,...,55-59Male,60-64Male,65-69Male,70-74Male,75-79Male,80-84Male,85-89Male,90-94Male,95-99Male,100+Male
0,1960,47920315.0,46829753.0,33432080.0,25471658.0,24122343.0,23327941.0,20533620.0,18630935.5,16497774.0,...,10724846.0,7732748.5,5380578.0,3229643.0,1571773.0,557486.0,166261.5,30039.0,2309.0,29.5
1,1961,43854185.0,47677726.0,35825728.0,26076781.5,24016057.5,23698797.5,20836776.5,18794400.0,16628372.0,...,10761714.0,7718281.5,5340784.0,3111826.5,1493829.5,539886.0,150155.0,26290.5,1912.5,16.5
2,1962,42618084.0,48227487.0,38568543.0,27060995.5,24056249.0,23960210.5,21219384.0,19007207.5,16794522.0,...,10812142.0,7851263.0,5353814.0,3084425.0,1474477.0,540474.0,138849.5,23929.5,1612.0,10.0
3,1963,45892983.5,48233568.0,41316192.0,28554232.5,24141310.0,23937322.5,21672178.0,19363372.0,17074803.5,...,10874094.5,8075194.0,5383838.5,3117905.0,1491396.5,547760.0,130685.5,22378.0,1406.0,5.0
4,1964,51299213.5,46735446.0,43595994.0,30430756.0,24424064.0,23695036.5,22181661.5,19632192.5,17492035.5,...,10986288.5,8343232.0,5396712.5,3160661.5,1518130.0,548604.0,125479.5,20836.5,1243.0,3.5


In [21]:
pyramid_data.columns # print columns


Index(['Year', '0-4Female', '5-9Female', '10-14Female', '15-19Female',
       '20-24Female', '25-29Female', '30-34Female', '35-39Female',
       '40-44Female', '45-49Female', '50-54Female', '55-59Female',
       '60-64Female', '65-69Female', '70-74Female', '75-79Female',
       '80-84Female', '85-89Female', '90-94Female', '95-99Female',
       '100+Female', '0-4Male', '5-9Male', '10-14Male', '15-19Male',
       '20-24Male', '25-29Male', '30-34Male', '35-39Male', '40-44Male',
       '45-49Male', '50-54Male', '55-59Male', '60-64Male', '65-69Male',
       '70-74Male', '75-79Male', '80-84Male', '85-89Male', '90-94Male',
       '95-99Male', '100+Male'],
      dtype='object')

In [22]:
pyramid_data.columns[1:22] #first group for men 

Index(['0-4Female', '5-9Female', '10-14Female', '15-19Female', '20-24Female',
       '25-29Female', '30-34Female', '35-39Female', '40-44Female',
       '45-49Female', '50-54Female', '55-59Female', '60-64Female',
       '65-69Female', '70-74Female', '75-79Female', '80-84Female',
       '85-89Female', '90-94Female', '95-99Female', '100+Female'],
      dtype='object')

In [23]:
pyramid_data.columns[22:44] #first group for women 

Index(['0-4Male', '5-9Male', '10-14Male', '15-19Male', '20-24Male',
       '25-29Male', '30-34Male', '35-39Male', '40-44Male', '45-49Male',
       '50-54Male', '55-59Male', '60-64Male', '65-69Male', '70-74Male',
       '75-79Male', '80-84Male', '85-89Male', '90-94Male', '95-99Male',
       '100+Male'],
      dtype='object')

In [24]:
men_data = pyramid_data.iloc[:,1:22] # men data
women_data = pyramid_data.iloc[:,22:] # women data


In [25]:
pyramid_data

Unnamed: 0,Year,0-4Female,5-9Female,10-14Female,15-19Female,20-24Female,25-29Female,30-34Female,35-39Female,40-44Female,...,55-59Male,60-64Male,65-69Male,70-74Male,75-79Male,80-84Male,85-89Male,90-94Male,95-99Male,100+Male
0,1960,47920315.00,46829753.00,33432080.00,25471658.00,24122343.00,23327941.00,20533620.00,18630935.50,16497774.00,...,10724846.00,7732748.50,5380578.00,3229643.00,1571773.00,557486.00,166261.50,30039.00,2309.00,29.50
1,1961,43854185.00,47677726.00,35825728.00,26076781.50,24016057.50,23698797.50,20836776.50,18794400.00,16628372.00,...,10761714.00,7718281.50,5340784.00,3111826.50,1493829.50,539886.00,150155.00,26290.50,1912.50,16.50
2,1962,42618084.00,48227487.00,38568543.00,27060995.50,24056249.00,23960210.50,21219384.00,19007207.50,16794522.00,...,10812142.00,7851263.00,5353814.00,3084425.00,1474477.00,540474.00,138849.50,23929.50,1612.00,10.00
3,1963,45892983.50,48233568.00,41316192.00,28554232.50,24141310.00,23937322.50,21672178.00,19363372.00,17074803.50,...,10874094.50,8075194.00,5383838.50,3117905.00,1491396.50,547760.00,130685.50,22378.00,1406.00,5.00
4,1964,51299213.50,46735446.00,43595994.00,30430756.00,24424064.00,23695036.50,22181661.50,19632192.50,17492035.50,...,10986288.50,8343232.00,5396712.50,3160661.50,1518130.00,548604.00,125479.50,20836.50,1243.00,3.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,2046,24886285.50,24672713.00,23906747.00,24009361.50,24688067.50,34491899.50,41216552.00,39028621.50,35678662.00,...,60609076.00,50611300.50,41575488.50,44470440.00,41820927.50,28599909.00,11141836.50,5215883.50,824204.50,41810.00
87,2047,24670788.50,24834136.00,23981892.00,23912899.50,24499517.50,31096746.00,41267756.00,39773244.00,36086272.50,...,58591508.50,52300110.00,43074212.00,42248528.50,42687484.00,30223013.50,11405663.50,5479257.00,931267.50,47610.50
88,2048,24362773.50,24936250.00,24106410.00,23851113.00,24319076.50,28278187.00,40583569.50,40428475.50,36744836.50,...,55929049.00,54246881.00,44658169.00,40049971.50,42926577.50,30754932.00,13075476.50,5674290.50,1039990.00,54261.50
89,2049,23947531.50,24979265.00,24275496.00,23811101.50,24142448.50,26284203.50,39140215.00,40792478.00,37547722.50,...,52988064.50,56437396.00,45673892.00,38517002.00,42290607.00,31466294.50,15222165.50,5596963.50,1135300.00,61656.50


In [26]:
# as values is 47,920,315.00 we need to remove the comma and the dot
pyramid_data = pyramid_data.replace(',','', regex=True) #regex=True to replace all commas

    

In [27]:
#convert all values to float
pyramid_data = pyramid_data.astype(float)

In [28]:
#make pyramid data between 1969 and 2050 to make all datasets have same range
pyramid_data = pyramid_data[pyramid_data['Year'] >= 1969]

In [29]:
# reset index
pyramid_data.reset_index(drop=True, inplace=True)

In [30]:
# final we want to check that no data is missing and distribution of data
pyramid_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82 entries, 0 to 81
Data columns (total 43 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Year         82 non-null     float64
 1   0-4Female    82 non-null     float64
 2   5-9Female    82 non-null     float64
 3   10-14Female  82 non-null     float64
 4   15-19Female  82 non-null     float64
 5   20-24Female  82 non-null     float64
 6   25-29Female  82 non-null     float64
 7   30-34Female  82 non-null     float64
 8   35-39Female  82 non-null     float64
 9   40-44Female  82 non-null     float64
 10  45-49Female  82 non-null     float64
 11  50-54Female  82 non-null     float64
 12  55-59Female  82 non-null     float64
 13  60-64Female  82 non-null     float64
 14  65-69Female  82 non-null     float64
 15  70-74Female  82 non-null     float64
 16  75-79Female  82 non-null     float64
 17  80-84Female  82 non-null     float64
 18  85-89Female  82 non-null     float64
 19  90-94Femal

### 

### `iv` Implement Plotly  

`Figure of Popualtion` 

In [31]:
df2.columns

Index(['Year', 'Urban population (% of total population) [SP.URB.TOTL.IN.ZS]',
       'Sex ratio at birth (male births per female births) [SP.POP.BRTH.MF]',
       'Rural population (% of total population) [SP.RUR.TOTL.ZS]',
       'Population, total [SP.POP.TOTL]',
       'Population, male (% of total population) [SP.POP.TOTL.MA.ZS]',
       'Population, female (% of total population) [SP.POP.TOTL.FE.ZS]',
       'Population growth (annual %) [SP.POP.GROW]',
       'Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS]',
       'Net migration [SM.POP.NETM]',
       'Mortality rate, infant (per 1,000 live births) [SP.DYN.IMRT.IN]',
       'Fertility rate, total (births per woman) [SP.DYN.TFRT.IN]',
       'Death rate, crude (per 1,000 people) [SP.DYN.CDRT.IN]',
       'Birth rate, crude (per 1,000 people) [SP.DYN.CBRT.IN]',
       'Age dependency ratio (% of working-age population) [SP.POP.DPND]'],
      dtype='object')

In [32]:
merged["date"]

0     1969
1     1970
2     1971
3     1972
4     1973
      ... 
77    2046
78    2047
79    2048
80    2049
81    2050
Name: date, Length: 82, dtype: int64

In [33]:
merged

Unnamed: 0,date,Population,Annual Growth Rate,Births per 1000 People,Births per 1000 People Annual Change,Deaths per 1000 People,Deaths per 1000 People Annual Change,Life Expectancy from Birth (Years),Life Expectancy from Birth (Years) Annual Change,Population per Square KM,Population per Square KM Annual Change
0,1969,801430976,2.70,37.976,-3.83,12.075,-5.66,56.710,2.24,83.482299,2.70
1,1970,822534450,2.63,36.463,-3.98,11.351,-6.00,57.950,2.19,85.680576,2.63
2,1971,843285424,2.52,34.951,-4.15,10.626,-6.39,59.190,2.14,87.842133,2.52
3,1972,862840403,2.32,33.438,-4.33,9.902,-6.81,60.430,2.09,89.879108,2.32
4,1973,881652080,2.18,31.926,-4.52,9.178,-7.31,61.670,2.05,91.838655,2.18
...,...,...,...,...,...,...,...,...,...,...,...
77,2046,1343210240,-0.49,9.231,-0.29,12.390,1.52,81.204,0.19,139.917576,-0.49
78,2047,1336262908,-0.52,9.205,-0.28,12.576,1.50,81.362,0.19,139.193896,-0.52
79,2048,1328873607,-0.55,9.179,-0.28,12.761,1.47,81.520,0.19,138.424178,-0.55
80,2049,1321004205,-0.59,9.150,-0.32,12.917,1.22,81.672,0.19,137.604450,-0.59


In [34]:
fig = px.line(merged, x="date", y=" Population", title="Total Population of China",
        labels={"date": "Year", " Population": "Population"}, width=800, height=500, template="plotly_dark"
            ,#add annual change to hover data
            hover_data={" Annual Growth Rate": True}, )

#add 'Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS]' as line to the plot from df2
# chaange the scale of y axis to log to see the change in the population and make scale of y2 to 0-50
fig.add_scatter(x=df2['Year'], y=df2['Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS]'], yaxis="y2"
                , name="Population ages 65 and above", mode='lines', line=dict(color='red', width=2))


fig.update_layout(yaxis_type="log", yaxis2=dict(overlaying='y', side='right', range=[0, 50], title='above 65')
                  ,legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))





fig.add_vline(x=2023, line_width=3, line_dash="dash", line_color="red",  annotation_text="2023", annotation_position="top right") 
#add historical data before 2023


fig.add_vline(x=1969, line_width=3, line_dash="dash", line_color="red",  annotation_text="1950", annotation_position="top right")
# the area between 1950 and 2023 is the historical data
fig.add_vrect(x0=1969, x1=2023, fillcolor="red", opacity=0.05, annotation_text="", annotation_position="top left", annotation_font_size=12)
#add projection data after 2023
fig.add_vline(x=2023, line_width=3, line_dash="dash", line_color="green",  annotation_text="2023", annotation_position="top right")
# the area between 2023 and 2100 is the projection data
fig.add_vrect(x0=2023, x1=2050, fillcolor="green", opacity=0.05, annotation_text="", annotation_position="top left", annotation_font_size=12)







            
            

`Birth Rate, Death Rate and Life Expectancy of China` 

In [35]:
# birth  rate 
fig2 = px.line(merged, x="date", y=" Births per 1000 People", title="", template="plotly_dark",)

# add death rate to fig2
fig2.add_scatter(x=merged['date'], y=merged[' Deaths per 1000 People'], name="Death Rate")
# add life expectancy to fig2
fig2.add_scatter(x=merged['date'], y=merged[' Life Expectancy from Birth (Years)'], name="Life Expectancy")

#add title to fig2
fig2.update_layout(  title="Birth Rate, Death Rate and Life Expectancy of China"),
# add  scatter to birth rate again to make it display on legend area with color blue
fig2.add_scatter(x=merged['date'], y=merged[' Births per 1000 People'], name="Birth Rate", )
                   
                  




`Figure of Dependancy Ratio` 

In [36]:
# from amgad data
fig3 = go.Figure( )
fig3.add_trace(go.Bar(x=df2["Year"], y=df2["Age dependency ratio (% of working-age population) [SP.POP.DPND]"], name="Age dependency ratio (% of working-age population)",
                       opacity=0.8)  )
fig3.update_layout(template='plotly_dark', title="Working-Age Dependency Ratio", xaxis_title="Year",
                   )
fig3.show()



`Figure of Density` 

In [37]:
# density
fig4 = px.scatter(merged, x="date", y=" Population per Square KM", title="Density Per KM of China", template="plotly_dark",)
fig4.show()

`Figure of Urban vs Rural` 

In [38]:
#Urban vs Rural
# h stack bar chart
# Urabn vs Rural using df2 
fig10 = px.bar(df2, x="Year", y=['Urban population (% of total population) [SP.URB.TOTL.IN.ZS]',
                                 'Rural population (% of total population) [SP.RUR.TOTL.ZS]'],
               title="Urban vs Rural Population of China", template="plotly_dark",
               labels={"value": "Population", "variable": "Population Type", "Year": "Year", 
                       "Urban population (% of total population) [SP.URB.TOTL.IN.ZS]": "Urban Population",
                       "Rural population (% of total population) [SP.RUR.TOTL.ZS]": "Rural Population"})
fig10.update_layout(barmode='stack' , xaxis_title="Year", yaxis_title="Population",
                    #add legend title 
                    legend_title="Population Type",)
                  
fig10.show()

`Pyramid Figure`

In [39]:
# when year = 2023
pyramid_data.iloc[:,22:][pyramid_data['Year'] == 2024]

Unnamed: 0,0-4Male,5-9Male,10-14Male,15-19Male,20-24Male,25-29Male,30-34Male,35-39Male,40-44Male,45-49Male,...,55-59Male,60-64Male,65-69Male,70-74Male,75-79Male,80-84Male,85-89Male,90-94Male,95-99Male,100+Male
55,29918891.0,45517498.5,48463752.0,45115818.0,42666238.5,44740790.5,55973054.5,61256089.5,52154649.0,47859658.0,...,57122468.0,41312244.5,35334064.5,28454002.5,16153373.0,8703738.5,4101749.5,1188671.0,145131.0,5073.0


In [40]:
pyramid_data.columns[1:22].str.replace('Female','')


Index(['0-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39',
       '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79',
       '80-84', '85-89', '90-94', '95-99', '100+'],
      dtype='object')

In [41]:
trace_male = go.Bar(
    x=-pyramid_data.iloc[:,1:22].iloc[50],
    y=pyramid_data.columns[1:22].str.replace('Female',''), # to make data in the same order and same name
    orientation='h',
    name='Male',
    marker=dict(),
    #hover data
    #make hover with positive value by adding - to x
    hovertemplate='%{y} %{x} <extra></extra>',


    
)

# Create the trace for the female data
trace_female = go.Bar(
    x=pyramid_data.iloc[:,1:22].iloc[50],
    y=pyramid_data.columns[1:22].str.replace('Female',''), # to make data in the same order and same name
    orientation='h',
    name='Female',
    marker=dict(),
    hovertemplate='%{y} %{x} <extra></extra>',
   
     
)

# Combine the traces into a single figure
fig_py = go.Figure(data=[trace_male, trace_female])

# Update the layout with chart titles and axis labels
fig_py.update_layout(
    title=f'Age Distribution in China in {pyramid_data["Year"].iloc[50].astype(int)} ',
    xaxis_title='Population',
    yaxis_title='Age Group',
    barmode='relative',
    bargap=.1,
    width=800,
    height=500,
    margin=dict(l=100, r=100, t=50, b=50),
    template="plotly_dark", 
    xaxis=go.layout.XAxis( 
    tickvals=[-60000000, -40000000, -20000000, 0, 20000000, 40000000, 60000000],                      
     ticktext=["60M", "40M", "20M", "0", "20M",
                                 "40M", '60M'],)
        
)



fig_py.show()


`Gender Pie Figure`

In [42]:
df2.columns

Index(['Year', 'Urban population (% of total population) [SP.URB.TOTL.IN.ZS]',
       'Sex ratio at birth (male births per female births) [SP.POP.BRTH.MF]',
       'Rural population (% of total population) [SP.RUR.TOTL.ZS]',
       'Population, total [SP.POP.TOTL]',
       'Population, male (% of total population) [SP.POP.TOTL.MA.ZS]',
       'Population, female (% of total population) [SP.POP.TOTL.FE.ZS]',
       'Population growth (annual %) [SP.POP.GROW]',
       'Population ages 65 and above (% of total population) [SP.POP.65UP.TO.ZS]',
       'Net migration [SM.POP.NETM]',
       'Mortality rate, infant (per 1,000 live births) [SP.DYN.IMRT.IN]',
       'Fertility rate, total (births per woman) [SP.DYN.TFRT.IN]',
       'Death rate, crude (per 1,000 people) [SP.DYN.CDRT.IN]',
       'Birth rate, crude (per 1,000 people) [SP.DYN.CBRT.IN]',
       'Age dependency ratio (% of working-age population) [SP.POP.DPND]'],
      dtype='object')

In [43]:
#rename column name of 'Population, male (% of total population) [SP.POP.TOTL.MA.ZS]' to Male Percentage
df2=df2.rename(columns={'Population, male (% of total population) [SP.POP.TOTL.MA.ZS]':'Male Percentage'})
#rename column name of 'Population, female (% of total population) [SP.POP.TOTL.MA.ZS]' to Male Percentage
df2=df2.rename(columns={'Population, female (% of total population) [SP.POP.TOTL.FE.ZS]': 'Female Percentage'})


In [44]:
# we need to make a pie chart for each year to specify the percentage of gender 

fig_pie = go.Figure()
fig_pie.add_trace(go.Pie(labels=df2.columns[5:7], values=df2.iloc[3,5:7]
                         ),)
fig_pie.update_traces(hoverinfo='label+value', textfont_size=10,
                      marker=dict(line=dict(color='#000000', width=1 ), ))

fig_pie.update_layout(title=f'Gender Distribution in China in {df2["Year"].iloc[3]}',
    template="plotly_dark", legend_title="Gender", legend=dict(x=0.80, y=0.90,),height=500, width=800)


    


In [45]:
# index of pyramids data
pyramid_data["Year"].max()

2050.0

In [46]:
#index of df2
df2.index

RangeIndex(start=0, stop=82, step=1)

In [47]:
from dash.dependencies import Input, Output
from dash.exceptions import PreventUpdate
# dcc
from dash import dcc
# jupyter dash
from jupyter_dash import JupyterDash


app = JupyterDash(__name__)
 # call back to update fig_py when year is changed
app.layout = html.Div([
    html.H1("China Population Pyramid", style={'text-align': 'center'}),
    html.Div([
        html.Div([
            dcc.Graph(id='fig1', figure=fig_py),
            dcc.Graph(id='fig2', figure=fig_pie),
            
            
            
            # menu to select year


            dcc.Dropdown(
                id='year',
                options=[{'label':pyramid_data["Year"].iloc[i]    , 'value':i }
                         for i in  pyramid_data.index],
                style={'width': '100%'},
            )


        ])])]) 

@app.callback(
    Output(component_id='fig1', component_property='figure'),
    Output(component_id='fig2', component_property='figure'),
    Input(component_id='year', component_property='value')
)

def update_figure(year):
    if year == None:
        raise PreventUpdate
    else:
        data = [
            go.Bar(
                x=-pyramid_data.iloc[:,22:].iloc[year],
                y=pyramid_data.columns[1:22].str.replace('Female',''),
                orientation='h',
                name='Male',
                marker=dict()

            ),
            # Create the trace for the female data
            go.Bar(
                x=pyramid_data.iloc[:,1:22].iloc[year],
                 y=pyramid_data.columns[1:22].str.replace('Female',''),
                orientation='h',
                name='Female',
                marker=dict(),
                hovertemplate='%{y} %{x} <extra></extra>',
                hoverinfo='text'
            )]
        
      
        fig_py = go.Figure(data=data, layout=go.Layout(
            title='Age Distribution in China  ',
            xaxis_title='Population',
            yaxis_title='Age Group',
            barmode='relative',
            bargap=.1,
            width=800,
            height=500,
            margin=dict(l=100, r=100, t=50, b=50),
            template="plotly_dark",
        ))
        fig_py.update_layout(
        title=f'Age Distribution in China in {pyramid_data["Year"].iloc[year].astype(int)} ',
        xaxis_title='Population',
        yaxis_title='Age Group',
        barmode='relative',
        bargap=.1,
        width=800,
        height=500,
        margin=dict(l=100, r=100, t=50, b=50),
        template="plotly_dark", 
        xaxis=go.layout.XAxis( 
        tickvals=[-60000000, -40000000, -20000000, 0, 20000000, 40000000, 60000000],                      
        ticktext=["60M", "40M", "20M", "0", "20M",
                                    "40M", '60M'],) 
)
        
        
        
        fig_pie = go.Figure()
        fig_pie.add_trace(go.Pie(labels=df2.columns[5:7],
                                 values=df2.iloc[year,5:7]  ),)
        
        
        fig_pie.update_traces(hoverinfo='label+value', textfont_size=10,
                      marker=dict(line=dict(color='#000000', width=1 ),  ))
        

        fig_pie.update_layout(title=f'Gender Distribution in China in {pyramid_data["Year"].iloc[year].astype(int)}',
            template="plotly_dark", legend_title="Gender",
            legend=dict(x=0.80, y=0.90,),height=500, width=800)
    return fig_py , fig_pie






    

app.run_server()





Dash app running on http://127.0.0.1:8050/


`Banes`

In [48]:
#BAN 1 : Total pop
#Ratio from world pop

#*********************

#BAN 2 : GDP


#*********************



# `Implemnent Dash`