# Import

## Libraries 

In [386]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from functools import reduce
import folium
import folium.plugins
import requests
import plotly.figure_factory as ff
from urllib.request import urlopen
import json
from branca.colormap import linear, LinearColormap
from dash import Dash, dcc, html, Input, Output, callback
import dash_bootstrap_components as dbc

In [387]:
pd.set_option('display.max_columns', None)

## Data

In [388]:
df_CD_all = pd.read_csv('Datasets/Disease/cardiovascular_disease_all.csv')
df_CD_U75 = pd.read_csv('Datasets/Disease/cardiovascular_disease_U75.csv')
df_LC_all = pd.read_csv('Datasets/Disease/lung_cancer_all.csv')
df_LC_U75 = pd.read_csv('Datasets/Disease/lung_cancer_U75.csv')
df_RD_all = pd.read_csv('Datasets/Disease/respiratory_disease_all.csv')
df_RD_U75 = pd.read_csv('Datasets/Disease/respiratory_disease_U75.csv')
df_CRD_all = pd.read_csv('Datasets/Disease/respiratory_disease_cronic_all.csv')
df_CRD_U75 = pd.read_csv('Datasets/Disease/respiratory_disease_cronic_U75.csv')

In [389]:
df_IS=pd.read_excel('Datasets/Disease/Index_scores.xlsx')
df_O3=pd.read_excel('Datasets/Pollution/O3.xlsx')
df_NO2=pd.read_excel('Datasets/Pollution/NO2.xlsx')
df_PM10=pd.read_excel('Datasets/Pollution/PM10.xlsx')
df_A2R=pd.read_excel('Datasets/Pollution/Area_to_Region.xlsx')

# EDA

## Data cleaning

### Health datasets
All of the health datasets contain additional collumns that is not relevant for the dashboard. Additionally, the data collected is a collection of different groups, why the irrelevant rows are sorted out. 

In [390]:
df_CD_all[(df_CD_all['Area Name'] == 'England') & (df_CD_all['Time period range'] == '1y')].head()


Unnamed: 0,Indicator ID,Indicator Name,Parent Code,Parent Name,Area Code,Area Name,Area Type,Sex,Age,Category Type,Category,Time period,Value,Lower CI 95.0 limit,Upper CI 95.0 limit,Lower CI 99.8 limit,Upper CI 99.8 limit,Count,Denominator,Value note,Recent Trend,Compared to England value or percentiles,Compared to percentiles,Time period Sortable,New data,Compared to goal,Time period range
0,93956,Mortality rate from cardiovascular disease,,,E92000001,England,England,Male,All ages,,,2001,570.8773,566.89546,574.87896,564.60847,577.19198,88545.87049,24165587.0,,,Not compared,Not compared,20010000.0,,,1y
1,93956,Mortality rate from cardiovascular disease,,,E92000001,England,England,Female,All ages,,,2001,374.56014,372.16964,376.9621,370.79645,378.35034,95451.97463,25284159.0,,,Not compared,Not compared,20010000.0,,,1y
2,93956,Mortality rate from cardiovascular disease,,,E92000001,England,England,Persons,All ages,,,2001,457.79384,455.68964,459.90531,454.47958,461.1249,183997.84512,49449746.0,,,Not compared,Not compared,20010000.0,,,1y
3,93956,Mortality rate from cardiovascular disease,E92000001,England,E92000001,England,England,Male,All ages,,,2001,570.8773,566.89546,574.87896,564.60847,577.19198,88545.87049,24165587.0,,,Similar,Not compared,20010000.0,,,1y
4,93956,Mortality rate from cardiovascular disease,E92000001,England,E92000001,England,England,Female,All ages,,,2001,374.56014,372.16964,376.9621,370.79645,378.35034,95451.97463,25284159.0,,,Similar,Not compared,20010000.0,,,1y


In [391]:
df_CD_all['Area Name'].unique()

array(['England', 'North East region', 'North West region',
       'Yorkshire and the Humber region', 'East Midlands region',
       'West Midlands region', 'East of England region', 'London region',
       'South East region', 'South West region', nan], dtype=object)

In [392]:
# https://www.geeksforgeeks.org/how-to-select-rows-from-a-dataframe-based-on-column-values/

df_CD_all = df_CD_all[(df_CD_all['Time period range'] == '1y') & ((df_CD_all['Area Type'] == 'Regions') | (df_CD_all['Area Name'] == 'England'))]
df_CD_U75 = df_CD_U75[(df_CD_U75['Time period range'] == '1y') & ((df_CD_U75['Area Type'] == 'Regions') | (df_CD_U75['Area Name'] == 'England'))]
df_LC_all = df_LC_all[(df_LC_all['Time period range'] == '1y') & ((df_LC_all['Area Type'] == 'Regions') | (df_LC_all['Area Name'] == 'England'))]
df_LC_U75 = df_LC_U75[(df_LC_U75['Time period range'] == '1y') & ((df_LC_U75['Area Type'] == 'Regions') | (df_LC_U75['Area Name'] == 'England'))]
df_RD_all = df_RD_all[(df_RD_all['Time period range'] == '1y') & ((df_RD_all['Area Type'] == 'Regions') | (df_RD_all['Area Name'] == 'England'))]
df_RD_U75 = df_RD_U75[(df_RD_U75['Time period range'] == '1y') & ((df_RD_U75['Area Type'] == 'Regions') | (df_RD_U75['Area Name'] == 'England'))]
df_CRD_all = df_CRD_all[(df_CRD_all['Time period range'] == '1y') & ((df_CRD_all['Area Type'] == 'Regions') | (df_CRD_all['Area Name'] == 'England'))]
df_CRD_U75 = df_CRD_U75[(df_CRD_U75['Time period range'] == '1y') & ((df_CRD_U75['Area Type'] == 'Regions') | (df_CRD_U75['Area Name'] == 'England'))]

In [393]:
df_CD_all['Area Name'].unique()

array(['England', 'North East region', 'North West region',
       'Yorkshire and the Humber region', 'East Midlands region',
       'West Midlands region', 'East of England region', 'London region',
       'South East region', 'South West region'], dtype=object)

In [394]:
df_CD_all.head(1)

Unnamed: 0,Indicator ID,Indicator Name,Parent Code,Parent Name,Area Code,Area Name,Area Type,Sex,Age,Category Type,Category,Time period,Value,Lower CI 95.0 limit,Upper CI 95.0 limit,Lower CI 99.8 limit,Upper CI 99.8 limit,Count,Denominator,Value note,Recent Trend,Compared to England value or percentiles,Compared to percentiles,Time period Sortable,New data,Compared to goal,Time period range
0,93956,Mortality rate from cardiovascular disease,,,E92000001,England,England,Male,All ages,,,2001,570.8773,566.89546,574.87896,564.60847,577.19198,88545.87049,24165587.0,,,Not compared,Not compared,20010000.0,,,1y


In [395]:
# https://stackoverflow.com/questions/20110170/turn-pandas-multi-index-into-column

df_CD_all_value = df_CD_all.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_CD_U75_value = df_CD_U75.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_LC_all_value = df_LC_all.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_LC_U75_value = df_LC_U75.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_RD_all_value = df_RD_all.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_RD_U75_value = df_RD_U75.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_CRD_all_value = df_CRD_all.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()
df_CRD_U75_value = df_CRD_U75.groupby(['Time period', 'Area Code', 'Area Name', 'Age', 'Sex'])['Value'].sum().reset_index()

In [396]:
df_CD_all_value

Unnamed: 0,Time period,Area Code,Area Name,Age,Sex,Value
0,2001,E12000001,North East region,All ages,Female,412.73573
1,2001,E12000001,North East region,All ages,Male,642.33102
2,2001,E12000001,North East region,All ages,Persons,507.92696
3,2001,E12000002,North West region,All ages,Female,426.25791
4,2001,E12000002,North West region,All ages,Male,645.77167
...,...,...,...,...,...,...
685,2023,E12000009,South West region,All ages,Male,274.25684
686,2023,E12000009,South West region,All ages,Persons,218.27538
687,2023,E92000001,England,All ages,Female,362.40294
688,2023,E92000001,England,All ages,Male,588.33416


In [397]:
df_CD_all_value = df_CD_all_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Cardiovascular Disease'})
df_CD_U75_value = df_CD_U75_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Cardiovascular Disease'})
df_LC_all_value = df_LC_all_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Lung Cancer'})
df_LC_U75_value = df_LC_U75_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Lung Cancer'})
df_RD_all_value = df_RD_all_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Respiratory Disease'})
df_RD_U75_value = df_RD_U75_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Respiratory Disease'})
df_CRD_all_value = df_CRD_all_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Chronic Respiratory Disease'})
df_CRD_U75_value = df_CRD_U75_value.rename(columns={'Time period':'Year', 'Area Name':'Area', 'Value': 'Chronic Respiratory Disease'})

In [398]:
df_CD_all_value

Unnamed: 0,Year,Area Code,Area,Age,Sex,Cardiovascular Disease
0,2001,E12000001,North East region,All ages,Female,412.73573
1,2001,E12000001,North East region,All ages,Male,642.33102
2,2001,E12000001,North East region,All ages,Persons,507.92696
3,2001,E12000002,North West region,All ages,Female,426.25791
4,2001,E12000002,North West region,All ages,Male,645.77167
...,...,...,...,...,...,...
685,2023,E12000009,South West region,All ages,Male,274.25684
686,2023,E12000009,South West region,All ages,Persons,218.27538
687,2023,E92000001,England,All ages,Female,362.40294
688,2023,E92000001,England,All ages,Male,588.33416


In [399]:
# https://stackoverflow.com/questions/64385747/valueerror-you-are-trying-to-merge-on-object-and-int64-columns-when-use-pandas

df_CD_all_value['Year']=df_CD_all_value['Year'].astype(str)
df_CD_U75_value['Year']=df_CD_U75_value['Year'].astype(str)
df_LC_all_value['Year']=df_LC_all_value['Year'].astype(str)
df_LC_U75_value['Year']=df_LC_U75_value['Year'].astype(str)
df_RD_all_value['Year']=df_RD_all_value['Year'].astype(str)
df_RD_U75_value['Year']=df_RD_U75_value['Year'].astype(str)
df_CRD_all_value['Year']=df_CRD_all_value['Year'].astype(str)
df_CRD_U75_value['Year']=df_CRD_U75_value['Year'].astype(str)

In [400]:
df_CRD_U75_value

Unnamed: 0,Year,Area Code,Area,Age,Sex,Chronic Respiratory Disease
0,2001,E12000001,North East region,<75 yrs,Female,29.29710
1,2001,E12000001,North East region,<75 yrs,Male,35.16460
2,2001,E12000001,North East region,<75 yrs,Persons,32.02125
3,2001,E12000002,North West region,<75 yrs,Female,28.01327
4,2001,E12000002,North West region,<75 yrs,Male,31.43078
...,...,...,...,...,...,...
685,2023,E12000009,South West region,<75 yrs,Male,16.87367
686,2023,E12000009,South West region,<75 yrs,Persons,15.66280
687,2023,E92000001,England,<75 yrs,Female,36.35150
688,2023,E92000001,England,<75 yrs,Male,43.25906


In [401]:
df_CD_value = pd.merge(left=df_CD_all_value, right=df_CD_U75_value, left_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Cardiovascular Disease'], right_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Cardiovascular Disease'], how='outer')
df_LC_value = pd.merge(left=df_LC_all_value, right=df_LC_U75_value, left_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Lung Cancer'], right_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Lung Cancer'], how='outer')
df_RD_value = pd.merge(left=df_RD_all_value, right=df_RD_U75_value, left_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Respiratory Disease'], right_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Respiratory Disease'], how='outer')
df_CRD_value = pd.merge(left=df_CRD_all_value, right=df_CRD_U75_value, left_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Chronic Respiratory Disease'], right_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Chronic Respiratory Disease'], how='outer')

In [402]:
data_frames = [df_CD_value, df_LC_value, df_RD_value, df_CRD_value]
diseases = reduce(lambda left,right: pd.merge(left, right, on=['Year', 'Area Code','Area', 'Age', 'Sex'], how='outer'), data_frames)

In [403]:
geojson_codes = ['E15000001', 'E15000002', 'E15000003', 'E15000004', 'E15000005', 'E15000006', 'E15000007', 'E15000008', 'E15000009']

def areaCode(ac_name):
    for i in geojson_codes:
        if ac_name == 'E92000001':
            return ac_name
        elif ac_name[-1] == i[-1]:
            return i

In [404]:
diseases['Area Code'] = diseases['Area Code'].apply(areaCode)

In [405]:
diseases

Unnamed: 0,Year,Area Code,Area,Age,Sex,Cardiovascular Disease,Lung Cancer,Respiratory Disease,Chronic Respiratory Disease
0,2001,E15000001,North East region,All ages,Female,412.73573,69.35044,156.44507,65.34273
1,2001,E15000001,North East region,All ages,Male,642.33102,137.22104,242.72228,109.63734
2,2001,E15000001,North East region,All ages,Persons,507.92696,96.63929,185.05776,80.09560
3,2001,E15000002,North West region,All ages,Female,426.25791,57.86182,159.03130,63.78286
4,2001,E15000002,North West region,All ages,Male,645.77167,114.24087,238.29274,106.93175
...,...,...,...,...,...,...,...,...,...
1375,2023,E15000009,South West region,<75 yrs,Male,98.36301,23.74500,31.42868,16.87367
1376,2023,E15000009,South West region,<75 yrs,Persons,68.58065,21.25784,26.75364,15.66280
1377,2023,E92000001,England,<75 yrs,Female,94.56300,44.99686,57.40516,36.35150
1378,2023,E92000001,England,<75 yrs,Male,218.84884,53.24198,78.18246,43.25906


In [406]:
under75 = diseases[(diseases['Age'] == '<75 yrs')].sort_values(by=['Year', 'Area Code', 'Area', 'Sex']).reset_index()
allAges = diseases[(diseases['Age'] == 'All ages')].sort_values(by=['Year', 'Area Code', 'Area', 'Sex']).reset_index()

In [407]:
ages = pd.DataFrame()

In [408]:
ages['Cardiovascular Disease'] = allAges['Cardiovascular Disease']-under75['Cardiovascular Disease']
ages['Lung Cancer'] = allAges['Lung Cancer']-under75['Lung Cancer']
ages['Respiratory Disease'] = allAges['Respiratory Disease']-under75['Respiratory Disease']
ages['Chronic Respiratory Disease'] = allAges['Chronic Respiratory Disease']-under75['Chronic Respiratory Disease']

In [409]:
ages[['Year', 'Area Code', 'Area', 'Sex']] = under75[['Year', 'Area Code', 'Area', 'Sex']]

In [410]:
ages['Age'] = '>75 yrs'

In [412]:
diseases = pd.merge(left=diseases, 
                right=ages, 
                left_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Cardiovascular Disease', 'Lung Cancer', 'Respiratory Disease', 'Chronic Respiratory Disease'], 
                right_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Cardiovascular Disease', 'Lung Cancer', 'Respiratory Disease', 'Chronic Respiratory Disease'], 
                how='outer'
)

In [413]:
sex_CD = diseases.groupby(['Year', 'Area Code','Area', 'Age'])['Cardiovascular Disease'].sum().reset_index()
sex_LC = diseases.groupby(['Year', 'Area Code','Area', 'Age'])['Lung Cancer'].sum().reset_index()
sex_RD = diseases.groupby(['Year', 'Area Code','Area', 'Age'])['Respiratory Disease'].sum().reset_index()
sex_CRD = diseases.groupby(['Year', 'Area Code','Area', 'Age'])['Chronic Respiratory Disease'].sum().reset_index()

In [414]:
sex_CD['Sex'] = 'All'
sex_LC['Sex'] = 'All'
sex_RD['Sex'] = 'All'
sex_CRD['Sex'] = 'All'

In [415]:
tmp_sex = pd.merge(
    left=sex_CD,
    right=sex_LC,
    left_on=['Year', 'Area Code','Area', 'Age', 'Sex'],
    right_on=['Year', 'Area Code','Area', 'Age', 'Sex'],
    how='outer'
)

tmp_sex = pd.merge(
    left=tmp_sex,
    right=sex_RD,
    left_on=['Year', 'Area Code','Area', 'Age', 'Sex'],
    right_on=['Year', 'Area Code','Area', 'Age', 'Sex'],
    how='outer'
)

tmp_sex = pd.merge(
    left=tmp_sex,
    right=sex_CRD,
    left_on=['Year', 'Area Code','Area', 'Age', 'Sex'],
    right_on=['Year', 'Area Code','Area', 'Age', 'Sex'],
    how='outer'
)

In [417]:
diseases = pd.merge(
    left=diseases,
    right=tmp_sex,
    left_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Cardiovascular Disease', 'Lung Cancer', 'Respiratory Disease', 'Chronic Respiratory Disease'],
    right_on=['Year', 'Area Code','Area', 'Age', 'Sex', 'Cardiovascular Disease', 'Lung Cancer', 'Respiratory Disease', 'Chronic Respiratory Disease'],
    how='outer'
)


### Pollution datasets
All of the pollution datasets have been collected from areas across the UK, and does therefore not have the correct regional names. The df_A2R has the conversion of the areas to regions.

In [419]:
def areaToRegion(area_name):
    # https://stackoverflow.com/questions/16476924/how-can-i-iterate-over-rows-in-a-pandas-dataframe 
    for index, row in df_A2R.iterrows():
        if area_name == row['Area name']:
            return row['Region']

In [420]:
df_O3['Site'] = df_O3['Site'].apply(areaToRegion)
df_NO2['Site'] = df_NO2['Site'].apply(areaToRegion)
df_PM10['Site'] = df_PM10['Site'].apply(areaToRegion)

In [421]:
df_O3 = df_O3[(df_O3['Site'] != 'Not England') & (df_O3['Year'] > 2000)]
df_NO2 = df_NO2[(df_NO2['Site'] != 'Not England') & (df_NO2['Year'] > 2000)]
df_PM10 = df_PM10[(df_PM10['Site'] != 'Not England') & (df_PM10['Year'] > 2000)]

In [422]:
O3_mean = df_O3.groupby(['Year', 'Site'])['Annual average maximum daily 8-hour mean O3 concentration (µg/m3)'].mean().reset_index()
NO2_mean = df_NO2.groupby(['Year', 'Site'])['Annual Mean NO2 concentration (µg/m3)'].mean().reset_index()
PM10_mean = df_PM10.groupby(['Year', 'Site'])['Annual Mean PM10 concentration (µg/m3)'].mean().reset_index()

df_O3_E = df_O3.groupby(['Year'])['Annual average maximum daily 8-hour mean O3 concentration (µg/m3)'].mean().reset_index()
df_NO2_E = df_NO2.groupby(['Year'])['Annual Mean NO2 concentration (µg/m3)'].mean().reset_index()
df_PM10_E = df_PM10.groupby(['Year'])['Annual Mean PM10 concentration (µg/m3)'].mean().reset_index()

In [423]:
df_O3_E['Site']='England'
df_NO2_E['Site']='England'
df_PM10_E['Site']='England'

In [424]:
O3_mean=O3_mean.rename(columns={'Site':'Area', 'Annual average maximum daily 8-hour mean O3 concentration (µg/m3)':'O3 concentration'})
NO2_mean=NO2_mean.rename(columns={'Site':'Area', 'Annual Mean NO2 concentration (µg/m3)':'NO2 concentration'})
PM10_mean=PM10_mean.rename(columns={'Site':'Area', 'Annual Mean PM10 concentration (µg/m3)':'PM10 concentration'})

df_O3_E=df_O3_E.rename(columns={'Site':'Area', 'Annual average maximum daily 8-hour mean O3 concentration (µg/m3)':'O3 concentration'})
df_NO2_E=df_NO2_E.rename(columns={'Site':'Area', 'Annual Mean NO2 concentration (µg/m3)':'NO2 concentration'})
df_PM10_E=df_PM10_E.rename(columns={'Site':'Area', 'Annual Mean PM10 concentration (µg/m3)':'PM10 concentration'})

In [425]:
O3_mean['Year']=O3_mean['Year'].astype(str)
NO2_mean['Year']=NO2_mean['Year'].astype(str)
PM10_mean['Year']=PM10_mean['Year'].astype(str)

df_O3_E['Year']=df_O3_E['Year'].astype(str)
df_NO2_E['Year']=df_NO2_E['Year'].astype(str)
df_PM10_E['Year']=df_PM10_E['Year'].astype(str)


In [426]:
O3_full = pd.merge(left=O3_mean, right=df_O3_E, left_on=['Year', 'Area', 'O3 concentration'], right_on=['Year', 'Area', 'O3 concentration'], how='outer')
NO2_full = pd.merge(left=NO2_mean, right=df_NO2_E, left_on=['Year', 'Area', 'NO2 concentration'], right_on=['Year', 'Area', 'NO2 concentration'], how='outer')
PM10_full = pd.merge(left=PM10_mean, right=df_PM10_E, left_on=['Year', 'Area', 'PM10 concentration'], right_on=['Year', 'Area', 'PM10 concentration'], how='outer')

In [427]:
# https://stackoverflow.com/questions/44327999/how-to-merge-multiple-dataframes

data_frames = [O3_full, NO2_full, PM10_full]
pollution = reduce(lambda left,right: pd.merge(left, right, on=['Year', 'Area'], how='outer'), data_frames)

In [428]:
pollution['O3 %'] = (pollution['O3 concentration']/(pollution['O3 concentration']+pollution['NO2 concentration']+pollution['PM10 concentration']))*100
pollution['NO2 %'] = (pollution['NO2 concentration']/(pollution['O3 concentration']+pollution['NO2 concentration']+pollution['PM10 concentration']))*100
pollution['PM10 %'] = (pollution['PM10 concentration']/(pollution['O3 concentration']+pollution['NO2 concentration']+pollution['PM10 concentration']))*100


In [429]:
pollution

Unnamed: 0,Year,Area,O3 concentration,NO2 concentration,PM10 concentration,O3 %,NO2 %,PM10 %
0,2001,East Midlands region,46.496841,30.978146,22.775677,46.380581,30.900689,22.718729
1,2001,East of England region,59.509682,30.141896,21.270263,53.650103,27.173995,19.175902
2,2001,London region,47.240321,45.506742,25.830108,39.839305,38.377321,21.783374
3,2001,North East region,51.901862,31.246800,17.048860,51.799546,31.185203,17.015251
4,2001,North West region,52.960391,35.470394,24.437665,46.922228,31.426314,21.651458
...,...,...,...,...,...,...,...,...
215,2018,England,62.562130,20.542080,15.352150,63.543005,20.864147,15.592847
216,2019,England,60.527400,20.584080,14.287679,63.446471,21.576794,14.976735
217,2020,England,64.419072,15.436177,13.711120,68.848533,16.497570,14.653897
218,2021,England,62.576544,16.239624,13.211689,67.997393,17.646422,14.356185


# Graf generation 

In [430]:
disease_pollution = pd.merge(left=diseases, right=pollution, left_on=['Area','Year'], right_on=['Area','Year'], how='outer')

In [431]:
disease_pollution = disease_pollution[(disease_pollution['Year']!='2023')]

## Line graphs

In [432]:
colors = {
    'O3 concentration': '#cae0e8',
    'NO2 concentration': '#d7dce8',
    'PM10 concentration': '#e3d9e5',
    'O3 %': '#cae0e8',
    'NO2 %': '#d7dce8',
    'PM10 %': '#e3d9e5',
    'All': '#3C3D37',
    'Female': '#cf67b7',
    'Male': '#5c9ee1',
    'Persons': '#83b148'
}


In [434]:
def create_linegraph(area_name, disease_name, pollution_name, sex, age):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Bar(
            name=pollution_name, 
            x=disease_pollution['Year'][(disease_pollution['Area'] == area_name) & (disease_pollution['Sex'] == sex[0]) & (disease_pollution['Age'] == age)],
            y=disease_pollution[pollution_name][(disease_pollution['Area'] == area_name) & (disease_pollution['Sex'] == sex[0]) & (disease_pollution['Age'] == age)],
            marker=dict(color=colors[pollution_name])
        ),
        secondary_y=False,
    )

    if len(sex) != 0: 
        for i in range(len(sex)):
            fig.add_trace(
                go.Scatter(
                    name='Mortality rate for {}'.format(sex[i]), 
                    x=disease_pollution['Year'][(disease_pollution['Area'] == area_name) & (disease_pollution['Sex'] == sex[i]) & (disease_pollution['Age'] == age)], 
                    y=disease_pollution[disease_name][(disease_pollution['Area'] == area_name) & (disease_pollution['Sex'] == sex[i]) & (disease_pollution['Age'] == age)],
                    mode='lines',
                    marker=dict(color=colors[sex[i]]),
                ),
                secondary_y=True,
            )

    fig.update_layout(
        barmode='stack',
        margin={'l': 40, 'b': 40, 't': 10, 'r': 0},
        legend=dict(
            # https://plotly.com/python/legend/
            yanchor="bottom",
            y=0.01,
            xanchor="left",
            x=0.01
        ),
        plot_bgcolor='rgba(0,0,0,0)'
    )

    fig.update_yaxes(
        title=pollution_name,
        secondary_y=False
    )

    fig.update_yaxes(
        title='{} cases pr. 100,000'.format(disease_name),
        secondary_y=True
    )

    return fig

## Map

### Declaring base elements for the map

In [443]:
# https://www.youtube.com/watch?v=TDlo7s4SZA8 

topojson_url = 'https://martinjc.github.io/UK-GeoJSON/json/eng/topo_eer.json'

response = requests.get(topojson_url)
topojson = response.json()

In [444]:
UK_map = folium.Map(
    location=[53,-2],
    zoom_start=6.5
)

In [446]:
regeon_lat_lon = {
    'E15000001':[54.9513175692519, -1.9054853090068706],
    'E15000002':[54.60744218692464, -2.9333830265113643],
    'E15000003':[54.074132682016106, -1.108344830717731],
    'E15000004':[52.96633574786156, -0.6905312712550848],
    'E15000005':[52.50408695756206, -2.5911090557254255],
    'E15000006':[52.26355962180764, 0.4113732410272191],
    'E15000007':[51.49964309495347, -0.09515721448286894],
    'E15000008':[51.42177877749887, -1.0492100487647125],
    'E15000009':[51.166290657840676, -2.3825972164526505]
}

In [447]:
min_radius = 30000
max_radius = 60000

### Creating functions

In [450]:
color_lst = {
    'O3 concentration': ['#E8F9FF', '#7797A8', '#003F5B'],
    'NO2 concentration': ['#F2F6FF', '#8B92A4', '#313951'],
    'PM10 concentration': ['#FEF3FF', '#978F99', '#3C373E'],
    'O3 %': ['#E8F9FF', '#7797A8', '#003F5B'],
    'NO2 %': ['#F2F6FF', '#8B92A4', '#313951'],
    'PM10 %': ['#FEF3FF', '#978F99', '#3C373E'],
    'All': '#3C3D37',
    'Female': '#cf67b7',
    'Male': '#5c9ee1',
    'Persons': '#83b148',
    'Cardiovascular Disease': '#86527a', 
    'Lung Cancer': '#6e608e', 
    'Respiratory Disease': '#2a7177', 
    'Chronic Respiratory Disease': '#5a6740'
}

In [451]:
# https://python-visualization.github.io/folium/latest/user_guide/geojson/geojson.html#Styling

def regionMap(json, pollution, year, r_map):
    if disease_pollution[pollution][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].max()-disease_pollution[pollution].min() > 15:
        tmp_max = disease_pollution[pollution][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].min()+10
        tmp_min = disease_pollution[pollution][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].min()
    else: 
        tmp_max = disease_pollution[pollution][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].max()
        tmp_min = disease_pollution[pollution][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].min()

    colormap = LinearColormap(color_lst[pollution], vmin=tmp_min, vmax=tmp_max)

    folium.TopoJson(
        data=json,
        object_path='objects.eer',
        style_function=lambda feature: {
            "fillColor": colormap(
                disease_pollution[pollution][(disease_pollution['Area Code']==feature["id"]) & (disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].values[0]
            ),
            "color": "black",
            "weight": 1,
            "dashArray": "5, 5",
            "fillOpacity": 0.9,
        }
    ).add_to(r_map)

    colormap.caption = '{} (µg/m3)'.format(pollution)
    colormap.add_to(r_map)

In [452]:
def circleCalculator(disease, year, r_map):
    tmp_max = disease_pollution[disease][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].max()
    tmp_min = disease_pollution[disease][(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].min()

    for index, row in disease_pollution[(disease_pollution['Area Code']!='E92000001')&(disease_pollution['Year']==year) & (disease_pollution['Sex']=='All') & (disease_pollution['Age']=='All ages')].iterrows():
        tmp_lat_lon = regeon_lat_lon[row['Area Code']]
        tmp_pct = (row[disease]-tmp_min)/(tmp_max-tmp_min)*100
        tmp_size = tmp_pct*(max_radius-min_radius)/(100)+min_radius

        folium.Circle(
            location=tmp_lat_lon,
            radius=tmp_size,
            color=color_lst[disease],
            stroke=True,
            fill=True,
            fill_opacity=0.6,
            opacity=1,
            tooltip="{} cases pr. 100,000".format(round(row[disease],2))
        ).add_to(r_map)

        folium.Marker(
            location=tmp_lat_lon,
            icon=folium.DivIcon(
                html=f"""<div style="font-size: 15px; color: white; display: flex; justify-content: center; align-items:center">
                        {int(row[disease])}
                        </div>"""
            )
        ).add_to(r_map)

In [453]:
map_location = {
    'North East region': [[55,-1.8],8.3],
    'North West region': [[54.1,-2.8],8.3],
    'Yorkshire and the Humber region': [[54.1,-1],8.3],
    'East Midlands region': [[53,-0.7],8],
    'West Midlands region': [[52.4,-2.3],8],
    'East of England region': [[52.3,0.4],8],
    'London region': [[51.5,-0.1],9],
    'South East region': [[51.2,-0.3],8],
    'South West region': [[50.8,-2.9],7.2],
    'England': [[53,-2],6.1]
}

In [454]:
def create_map(area_name, pollution, disease, year):
    region_map = folium.Map(
        location=map_location[area_name][0], 
        zoom_start=map_location[area_name][1]
    )

    regionMap(topojson, pollution, year, region_map)

    circleCalculator(disease, year, region_map,)

    region_map.save('tmp_map.html')

    return open('tmp_map.html', 'r').read()
    

# Dashboard creation

In [464]:
app = Dash()

In [465]:
app.layout = html.Div([
    html.Div([
        html.H1('Pollutions effect on the English peoples health'),

    ], style={
        'background-color': '#fdfdfe',
        'border-radius': '5px',
        'padding-top': '10px',
        'padding-right': '30px',
        'padding-bottom': '10px',
        'padding-left': '30px',
        'margin-bottom': '20px',
    }),

    html.Div([
        html.Div([ 
            
            html.H3(id='region_card_head', children='Region selected:'),
            html.H2(
                id='region_card', 
                children='England', 
                style={
                    'font-family': 'Arial',
                    'font-size': '20px'
                }
            )
        ], id='card_region'),

        html.Div([
            html.H3(id='age_card_head', children='Age range selected: '),
            html.H2(
                id='age_card', 
                children='All ages', 
                style={
                    'font-family': 'Arial',
                    'font-size': '20px',
                    'text-align': 'center'
                }
            )
        ], id='card_age'),
        
        html.Div([
            html.H3(id='avg_dis_head', children='Average for disease'),
            html.H2(
                id='avg_dis', 
                children='Average for disease', 
                style={
                    'font-family': 'Arial',
                    'font-size': '40px',
                    'text-align': 'center'
                }
            )
        ], id='card_disease'),

        html.Div([
            html.H3(id='avg_pol_head', children='Average for pollution'),
            html.H2(
                id='avg_pol', 
                children='Average for pollution',
                style={
                    'font-family': 'Arial',
                    'font-size': '40px',
                    'text-align': 'center'
                }
            )
        ], id='card_pollution')
    ], style={
        'display': 'flex',
        'justify-content': 'center',
        'align-items': 'flex-start',
        'gap': '20px',
        'margin-bottom': '20px'
    }),

    html.Div([
        html.Div([
            html.Div([ 
                html.H2('Disease mortality between 2001-2022', id='linegraph_title'),
                dcc.Graph(id='linegraph'),    
            ], style={
                'background-color': '#fdfdfe',
                'border-radius': '5px',
                'padding-top': '10px',
                'padding-right': '40px',
                'padding-bottom': '30px',
                'padding-left': '40px',  
            }),

            html.Div([
                html.Div([
                    html.Div([
                        html.H3('Variables'),
                        dcc.Dropdown(
                            options=disease_pollution['Area'].unique(),
                            value='England',
                            id='area',
                            style={
                                'margin-bottom': '5px',
                                'borderRadius': '5px' 
                            }
                        ),
                        
                        dcc.Dropdown(
                            options=['Cardiovascular Disease', 'Lung Cancer', 'Respiratory Disease', 'Chronic Respiratory Disease'],
                            value='Cardiovascular Disease',
                            id='disease',
                            style={
                                'margin-bottom': '5px',
                                'borderRadius': '5px' 
                            }
                        ),

                        dcc.Dropdown(
                            options=['O3 concentration', 'NO2 concentration', 'PM10 concentration', 'O3 %', 'NO2 %', 'PM10 %'], 
                            value='O3 concentration',
                            id='pollution',
                            style={
                                'margin-bottom': '5px',
                                'borderRadius': '5px' 
                            }
                        ),

                        dcc.Dropdown(
                            options=['All ages', '<75 yrs', '>75 yrs'],
                            value='All ages',
                            id='age_type',
                            style={
                                'margin-bottom': '5px',
                                'borderRadius': '5px' 
                            }
                        )
                    ], style={
                        'width': '60%',
                        'background-color': '#fdfdfe',
                        'border-radius': '5px',
                        'padding-top': '10px',
                        'padding-right': '40px',
                        'padding-bottom': '30px',
                        'padding-left': '40px', 

                    }),

                    html.Div([
                        html.H3('Gender'),

                        html.Div([
                            dcc.Checklist(
                                options=['All', 'Female', 'Male', 'Persons'],
                                value=['All'],
                                id='sex_type',
                                labelStyle={
                                    'margin-bottom': '5px',
                                }
                            )
                        ])
                        ], style={
                            'width': '40%',
                            'background-color': '#fdfdfe',
                            'border-radius': '5px',
                            'padding-top': '10px',
                            'padding-right': '30px',
                            'padding-bottom': '30px',
                            'padding-left': '30px',
                    })
                ], style={
                    'display': 'flex',
                    'justify-content': 'center',
                    'align-items': 'flex-start',
                    'gap': '20px',
                    'margin-top': '20px'
                })
            ])
        ], style={
            'width':'60%'
        }),

        html.Div([
            html.H2('Disease and pollution across England regions'),

            html.Div([
                dcc.Slider(
                    int(float(disease_pollution['Year'].min())), 
                    int(float(disease_pollution['Year'].max())),
                    step=None, 
                    id='year_slider',
                    value=int(float(disease_pollution['Year'].max())),
                    marks={year: str(year) if i % 2 == 0 else "" for i, year in enumerate(disease_pollution['Year'].unique())},
                    tooltip={
                        'placement': 'bottom', 
                        'always_visible': True
                    },
                    included=False,
                ),
            ]), 

            html.Iframe(
                id='mapfigure', 
                height='700',
                style={
                    'border-radius': '5px',
                    'border': 'none',
                    'width': '100%'
                }
            )
        ], style={
            'width': '40%',
            'background-color': '#fdfdfe',
            'border-radius': '5px',
            'padding-top': '10px',
            'padding-right': '40px',
            'padding-bottom': '30px',
            'padding-left': '40px',  
        })
    ], style={
        'display': 'flex',
        'justify-content': 'center',
        'align-items': 'flex-start',
        'gap': '20px'
    })
], style={
    'background-color': '#e0e0e2',
    'padding-top': '40px',
    'padding-right': '10%',
    'padding-bottom': '20%',
    'padding-left': '10%'
})

In [467]:
@callback(
    Output('linegraph', 'figure'),
    Input('area', 'value'),
    Input('disease', 'value'),
    Input('pollution', 'value'),
    Input('sex_type', 'value'),
    Input('age_type', 'value')
) 
def update_linegraph(area_name, disease_name, pollution_name, sex_type, age_type):
    return create_linegraph(area_name, disease_name, pollution_name, sex_type, age_type)

In [468]:
@callback(
    Output('mapfigure', 'srcDoc'),
    Input('area', 'value'),
    Input('disease', 'value'),
    Input('pollution', 'value'),
    Input('year_slider', 'value'),
) 
def update_map(area_name, disease_name, pollution_name, year):
    return create_map(area_name, pollution_name, disease_name, str(year))

In [475]:
def averageCalc(input, area, age):
    avg = disease_pollution[input][(disease_pollution['Area']==area) & (disease_pollution['Age']==age) & (disease_pollution['Sex']=='All')].mean()

    return round(avg)

In [470]:
color_cards= {
    'O3 concentration': '#7797A8',
    'NO2 concentration': '#8B92A4',
    'PM10 concentration': '#978F99',
    'O3 %': '#7797A8',
    'NO2 %': '#8B92A4',
    'PM10 %': '#978F99',
    'Cardiovascular Disease': '#86527a', 
    'Lung Cancer': '#6e608e', 
    'Respiratory Disease': '#2a7177', 
    'Chronic Respiratory Disease': '#5a6740'
}

In [471]:
@callback(
    Output('avg_dis_head', 'children'),
    Output('avg_dis', 'children'),
    Output('card_disease', 'style'),
    Input('disease', 'value'),
    Input('area', 'value'),
    Input('age_type', 'value')
)
def update_disease(disease, area, age):
    average = averageCalc(disease, area, age)
    style={
            'width': '30%',
            'height': '155px',
            'text-align': 'center',
            'background-color': color_cards[disease],
            'border-radius': '5px',
            'padding-top': '20px',
            'padding-right': '20px',
            'padding-bottom': '20px',
            'padding-left': '20px',
        }
    return 'Average {dis} cases pr. 100.000'.format(dis=disease), '{}'.format(average), style

In [472]:
@callback(
    Output('avg_pol_head', 'children'),
    Output('avg_pol', 'children'),
    Output('card_pollution', 'style'),
    Input('pollution', 'value'),
    Input('area', 'value'),
    Input('age_type', 'value')
)
def update_pollution(pollution, area, age):
    average = averageCalc(pollution, area, age)
    style={
            'width': '30%',
            'height': '155px',
            'text-align': 'center',
            'background-color': color_cards[pollution],
            'border-radius': '5px',
            'padding-top': '20px',
            'padding-right': '20px',
            'padding-bottom': '20px',
            'padding-left': '20px',  
        }
    
    return 'Average {dis}'.format(dis=pollution), '{}'.format(average), style

In [473]:
@callback(
    Output('region_card', 'children'),
    Output('age_card', 'children'),
    Output('card_region', 'style'),
    Output('card_age', 'style'),
    Input('area', 'value'),
    Input('age_type', 'value')
)
def update_cards(area, age):
    style={
            'width': '20%',
            'height': '155px',
            'text-align': 'center',
            'background-color': '#fdfdfe',
            'border-radius': '5px',
            'padding-top': '20px',
            'padding-right': '20px',
            'padding-bottom': '20px',
            'padding-left': '20px',  
        }
    
    return '{}'.format(area), '{}'.format(age), style, style

In [474]:
if __name__ == '__main__':
    app.run(
        jupyter_mode="external", 
        debug=True
    )

Dash app running on http://127.0.0.1:8050/
