In [22]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly
from lxml import etree as ET
from lxml.html import builder as E
import lxml.html

In [299]:
city_dfs = {}
city_dfs['by-borough'] = pd.read_csv('data/boro.csv')
city_dfs['by-age'] = pd.read_csv('data/by-age.csv')
city_dfs['by-sex'] = pd.read_csv('data/by-sex.csv')
city_dfs['hosp-trends'] = pd.read_csv('data/case-hosp-death.csv')
city_dfs['summary'] = pd.read_csv('data/summary.csv', names = ['Metric', 'Number'])
city_dfs['zipcode'] = pd.read_csv('data/US Census/cleaned/TOTAL POPULATION.csv', index_col = 'MODZCTA')

In [300]:
city_dfs['by-borough'].columns = ['Borough', 'Total Cases', 'Cases Rate (Per 100k)']
city_dfs['by-age'].columns = ['Age Group', 'Cases Rate', 'Hospitalization Rate', 'Death Rate']
city_dfs['by-sex'].columns = ['Sex', 'Cases Rate', 'Hospitalization Rate', 'Death Rate']
city_dfs['hosp-trends'].columns = ['Date', 'New Cases', 'Hospitalized Cases', 'Deaths']

city_dfs['by-age']['Age Group'].loc[4] = '75 and older'



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [313]:
layout = go.Layout(
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)')

cases_fig = px.bar(city_dfs['by-borough'].iloc[0:5], x='Borough', y='Total Cases', color = 'Cases Rate (Per 100k)', title = 'Total Cases by Borough', color_continuous_scale = 'Reds')
cases_fig.update_layout(title = {'text': 'Total Cases by Borough',
                                 'x': 0.5,
                                 'xanchor': 'center'},
                        font = dict(family = 'Montserrat', size = 14, color = '#FFFFFF'))
cases_fig.update_layout({'paper_bgcolor':'rgba(0,0,0,0)',
    'plot_bgcolor':'rgba(0,0,0,0)'})
cases_fig.show()

In [314]:
age_fig = go.Figure(data = [go.Bar(name = 'Cases', x = city_dfs['by-age'].iloc[0:5]['Age Group'], y = city_dfs['by-age'].iloc[0:5]['Cases Rate']),
                            go.Bar(name = 'Hospitalizations', x = city_dfs['by-age'].iloc[0:5]['Age Group'], y = city_dfs['by-age'].iloc[0:5]['Hospitalization Rate']),
                            go.Bar(name = 'Deaths', x = city_dfs['by-age'].iloc[0:5]['Age Group'], y = city_dfs['by-age'].iloc[0:5]['Death Rate'], marker_color = 'black')],layout = layout)
age_fig.update_layout(title = {'text': 'Cases by Age Group (Per 100k)',
                                 'x': 0.5,
                                 'xanchor': 'center'},
                        font = dict(family = 'Montserrat', size = 14, color = '#FFFFFF'))
age_fig.show()

In [315]:
hosp_fig = go.Figure(data = [go.Line(name = 'New Cases', x = city_dfs['hosp-trends']['Date'], y = city_dfs['hosp-trends']['New Cases']),
                             go.Line(name = 'New Hospitalizations', x = city_dfs['hosp-trends']['Date'], y = city_dfs['hosp-trends']['Hospitalized Cases']),
                             go.Line(name = 'Deaths', x = city_dfs['hosp-trends']['Date'], y = city_dfs['hosp-trends']['Deaths'], marker_color = 'black')], layout = layout)
hosp_fig.update_layout(title = {'text': 'Hospital Trends',
                                 'x': 0.5,
                                 'xanchor': 'center'},
                        font = dict(family = 'Montserrat', size = 14, color = '#FFFFFF'))

In [316]:
cases_fig.write_html('htmls/cases_fig.html')
age_fig.write_html('htmls/age_fig.html')
hosp_fig.write_html('htmls/hosp_fig.html')

In [317]:
age_div = plotly.offline.plot(age_fig, include_plotlyjs=False, output_type='div')
cases_div = plotly.offline.plot(cases_fig, include_plotlyjs=False, output_type='div')
hosp_div = plotly.offline.plot(hosp_fig, include_plotlyjs=False, output_type='div')

In [201]:
def generate_html(metrics_dict):
    html = E.HTML(
    E.HEAD(
        E.META(name = 'viewport', content = 'width=device-width, initial-scale=1'),
        E.LINK(href="https://www.w3schools.com/w3css/4/w3.css", rel="stylesheet"),
        E.LINK(href="https://fonts.googleapis.com/css2?family=Montserrat:wght@300&amp;display=swap", rel="stylesheet"),
        E.LINK(rel="stylesheet", href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css", integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm", crossorigin="anonymous"),
        E.SCRIPT(src="https://cdn.plot.ly/plotly-latest.min.js"),
        E.STYLE(r".w3-montserrat {font-family: 'Montserrat', sans-serif;}")),
    E.BODY(
        E.DIV(E.CLASS('page-header w3-montserrat text-center'),
                    E.H2('NYC COVID-19 Cases by Zip Code'), style = "width:100%"),
        E.DIV(E.CLASS("container-fluid w3-montserrat"),
              E.DIV(E.CLASS('row no-gutters'), 
                    E.DIV(E.CLASS('col-sm-2 flex-column d-flex justify-content-between'),
                            E.DIV(E.CLASS('card'),
                               E.DIV(E.CLASS('card-body'), 
                                     E.H6(E.CLASS('card-title'), 'Total Confirmed Cases'),
                                     E.H4(E.CLASS('card-text'), str(metrics_dict['Cases']))
                                    )
                                 ),
                            E.DIV(E.CLASS('card'),
                               E.DIV(E.CLASS('card-body'), 
                                     E.H6(E.CLASS('card-title'), 'Total Deaths'),
                                     E.H4(E.CLASS('card-text'), str(metrics_dict['Deaths']))
                                    )
                                 ),
                            E.DIV(E.CLASS('card'),
                               E.DIV(E.CLASS('card-body'), 
                                     E.H6(E.CLASS('card-title'), 'Total Hospitalized'),
                                     E.H4(E.CLASS('card-text'), str(metrics_dict['Hosp']))
                                    )
                                 )
                         ),
                    E.DIV(E.CLASS('col-sm-8'),
                         E.IFRAME(name ="main-map", style="border:0;", width ="100%", height="500", frameborder="0", src=r"https://sleepingtuna.github.io/NYCCovid-19Map.github.io/docs/master.html")),
                    E.DIV(E.CLASS('col-sm-2'), 
                          E.TABLE(E.CLASS('table table-bordered table-sm'),
                                 E.TBODY(E.CLASS('thead-dark'), 
                                        E.TH('ZIP CODE'),
                                        E.TH('CONFIRMED CASES'))
                                 ),
                          style = 'background-color:yellow;')
                    
                   ),
              align = 'center'
             ),
        E.DIV(E.CLASS('container-fluid'), 
              E.DIV(E.CLASS('row no-gutters'),
                    E.DIV(E.CLASS('col-sm-6'), lxml.html.fragment_fromstring(age_div, parser = ET.HTMLParser())),
                    E.DIV(E.CLASS('col-sm-6'), lxml.html.fragment_fromstring(cases_div, parser = ET.HTMLParser()))
                   ),
              E.DIV(E.CLASS('row no-gutters'), 
                    E.DIV(E.CLASS('col-sm-12'), lxml.html.fragment_fromstring(hosp_div, parser = ET.HTMLParser()))
                   )
             ),
        E.SCRIPT(src="https://code.jquery.com/jquery-3.2.1.slim.min.js", integrity="sha384-KJ3o2DKtIkvYIK3UENzmM7KCkRr/rE9/Qpg6aAZGJwFDMVNA/GpGFF93hXpG5KkN", crossorigin="anonymous"),
        E.SCRIPT(src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js", integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q", crossorigin="anonymous"),
        E.SCRIPT(src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js", integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl", crossorigin="anonymous")
    )
    )
    
    return html

In [202]:
metrics = {}

In [203]:
metrics['Cases'] = city_dfs['summary']['Number'].iloc[0]
metrics['Deaths'] = city_dfs['summary']['Number'].iloc[1]
metrics['Hosp'] = city_dfs['summary']['Number'].iloc[2]

In [222]:
lxml.html.open_in_browser(generate_html(metrics))

file://C:/Users/DANIEL~1/AppData/Local/Temp/tmpddoygkpl.html


ParserError: Multiple elements found (div, script)

In [223]:
top_5 = city_dfs['zipcode'].sort_values(by='Positive', ascending = False).iloc[0:5]

In [224]:
city_dfs['zipcode']['rate'] = city_dfs['zipcode']['Positive'] / city_dfs['zipcode']['Total_TOTAL POPULATION'] * 1000

In [225]:
top_5_rate = city_dfs['zipcode'].sort_values(by='rate', ascending = False).iloc[0:5]

In [226]:
top_5

Unnamed: 0_level_0,Unnamed: 0,Positive,Total,zcta_cum.perc_pos,Zip,City,State,Latitude,Longitude,geopoint,0,id,Geographic Area Name,Total_TOTAL POPULATION,rate
MODZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11368.0,137,1446,1861,77.7,11368,Corona,NY,40.747106,-73.85838,"40.747106,-73.85838",1096,8710000US3611368,"ZCTA5 11368, New York",109931,13.153706
11219.0,104,1136,1644,69.1,11219,Brooklyn,NY,40.632449,-73.99629,"40.632449,-73.99629",1395,8710000US3611219,"ZCTA5 11219, New York",92221,12.318236
11373.0,141,983,1425,68.98,11373,Elmhurst,NY,40.736076,-73.87804,"40.736076,-73.87804",534,8710000US3611373,"ZCTA5 11373, New York",100820,9.75005
11230.0,114,946,1487,63.62,11230,Brooklyn,NY,40.622994,-73.96427,"40.622994,-73.96427",1125,8710000US3611230,"ZCTA5 11230, New York",86408,10.94806
10467.0,69,941,1625,57.91,10467,Bronx,NY,40.872265,-73.86937,"40.872265,-73.86937",709,8710000US3610467,"ZCTA5 10467, New York",97060,9.695034


In [227]:
top_5_rate

Unnamed: 0_level_0,Unnamed: 0,Positive,Total,zcta_cum.perc_pos,Zip,City,State,Latitude,Longitude,geopoint,0,id,Geographic Area Name,Total_TOTAL POPULATION,rate
MODZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
10018.0,14,106,232,45.69,10018,New York,NY,40.755101,-73.99337,"40.755101,-73.99337",832,8710000US3610018,"ZCTA5 10018, New York",5229,20.271562
11411.0,148,271,368,73.64,11411,Cambria Heights,NY,40.693538,-73.73574,"40.693538,-73.73574",1594,8710000US3611411,"ZCTA5 11411, New York",18556,14.604441
11370.0,139,573,1007,56.9,11370,East Elmhurst,NY,40.763015,-73.89052,"40.763015,-73.89052",650,8710000US3611370,"ZCTA5 11370, New York",39688,14.437613
11004.0,79,200,316,63.29,11004,Glen Oaks,NY,40.742944,-73.70956,"40.742944,-73.70956",1783,8710000US3611004,"ZCTA5 11004, New York",14016,14.269406
11369.0,138,547,728,75.14,11369,East Elmhurst,NY,40.762854,-73.87051,"40.762854,-73.87051",1066,8710000US3611369,"ZCTA5 11369, New York",38615,14.16548


In [228]:
top5_metrics = {}
top5_metrics['total'] = top_5
top5_metrics['rate'] = top_5_rate

In [318]:
def generate_html(metrics_dict, top5):
    html = E.HTML(
    E.HEAD(
        E.META(name = 'viewport', content = 'width=device-width, initial-scale=1'),
        E.LINK(href="https://www.w3schools.com/w3css/4/w3.css", rel="stylesheet"),
        E.LINK(href="https://fonts.googleapis.com/css2?family=Montserrat:wght@300&amp;display=swap", rel="stylesheet"),
        E.LINK(rel="stylesheet", href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css", integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm", crossorigin="anonymous"),
        E.SCRIPT(src="https://cdn.plot.ly/plotly-latest.min.js"),
        E.STYLE(r".w3-montserrat {font-family: 'Montserrat', sans-serif;}")),
    E.BODY(E.CLASS('bg-dark'),
        E.DIV(E.CLASS('page-header w3-montserrat text-center bg-dark text-white'),
                    E.H2('NYC COVID-19 Cases by Zip Code'), style = "width:100%"),
        E.DIV(E.CLASS("container-fluid w3-montserrat bg-dark text-white"),
              E.DIV(E.CLASS('row no-gutters'), 
                    E.DIV(E.CLASS('col-sm-2 flex-column d-flex justify-content-between'),
                            E.DIV(E.CLASS('card bg-dark text-white'),
                               E.DIV(E.CLASS('card-body'), 
                                     E.H6(E.CLASS('card-title'), 'Total Confirmed Cases'),
                                     E.H4(E.CLASS('card-text'), str('{:,.0f}'.format(int(metrics_dict['Cases']))))
                                    )
                                 ),
                            E.DIV(E.CLASS('card bg-dark text-white'),
                               E.DIV(E.CLASS('card-body'), 
                                     E.H6(E.CLASS('card-title'), 'Total Deaths'),
                                     E.H4(E.CLASS('card-text'), str('{:,.0f}'.format(int(metrics_dict['Deaths']))))
                                    )
                                 ),
                            E.DIV(E.CLASS('card bg-dark text-white'),
                               E.DIV(E.CLASS('card-body'), 
                                     E.H6(E.CLASS('card-title'), 'Total Hospitalized'),
                                     E.H4(E.CLASS('card-text'), str('{:,.0f}'.format(int(metrics_dict['Hosp']))))
                                    )
                                 )
                         ),
                    E.DIV(E.CLASS('col-sm-8'),
                         E.IFRAME(name ="main-map", style="border:0;", width ="100%", height="500", frameborder="0", src=r"https://sleepingtuna.github.io/NYCCovid-19Map.github.io/docs/master.html")),
                    E.DIV(E.CLASS('col-sm-2 my-auto'), 
                          E.H5('Zip Codes with Highest Case Counts'),
                          E.TABLE(E.CLASS('table table-bordered table-dark w-auto'),
                                  E.THEAD(E.CLASS('text-small'), 
                                        E.TH('ZIP CODE'),
                                        E.TH('CONFIRMED CASES')),
                                  E.TBODY(
                                      E.TR(E.TD(str(top5_metrics['total']['Zip'].iloc[0])),
                                           E.TD(str(top5_metrics['total']['Positive'].iloc[0]))
                                          ),
                                      E.TR(E.TD(str(top5_metrics['total']['Zip'].iloc[1])),
                                           E.TD(str(top5_metrics['total']['Positive'].iloc[1]))
                                          ),
                                      E.TR(E.TD(str(top5_metrics['total']['Zip'].iloc[2])),
                                           E.TD(str(top5_metrics['total']['Positive'].iloc[2]))
                                          ),
                                      E.TR(E.TD(str(top5_metrics['total']['Zip'].iloc[3])),
                                           E.TD(str(top5_metrics['total']['Positive'].iloc[3]))
                                          ),
                                      E.TR(E.TD(str(top5_metrics['total']['Zip'].iloc[4])),
                                           E.TD(str(top5_metrics['total']['Positive'].iloc[4]))
                                          )
                                         )),
                          style = 'background-color:bg-dark;', align = 'center')
                    
                   ),
              align = 'center'
             ),
        E.DIV(E.CLASS('container-fluid'), 
              E.DIV(E.CLASS('row no-gutters'),
                    E.DIV(E.CLASS('col-sm-6'), lxml.html.fragment_fromstring(age_div, parser = ET.HTMLParser())),
                    E.DIV(E.CLASS('col-sm-6'), lxml.html.fragment_fromstring(cases_div, parser = ET.HTMLParser()))
                   ),
              E.DIV(E.CLASS('row no-gutters'), 
                    E.DIV(E.CLASS('col-sm-12'), lxml.html.fragment_fromstring(hosp_div, parser = ET.HTMLParser()))
                   )
             ),
        E.SCRIPT(src="https://code.jquery.com/jquery-3.2.1.slim.min.js", integrity="sha384-KJ3o2DKtIkvYIK3UENzmM7KCkRr/rE9/Qpg6aAZGJwFDMVNA/GpGFF93hXpG5KkN", crossorigin="anonymous"),
        E.SCRIPT(src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js", integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q", crossorigin="anonymous"),
        E.SCRIPT(src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js", integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl", crossorigin="anonymous")
    )
    )
    
    return html

In [327]:
lxml.html.open_in_browser(generate_html(metrics, top5_metrics))

file://C:/Users/DANIEL~1/AppData/Local/Temp/tmpmvq9bp0t.html


In [324]:
page = generate_html(metrics, top5_metrics)

AttributeError: module 'lxml.html' has no attribute 'ElementTree'