# Covid data study

This notebook includes figures tracking cases, deaths, and trends of the coronavirus in U.S. This is only intended to be a quick overview to better understand the trends

## U.S. coronavirus map

Import data from csse

In [1]:
import json, requests
import pandas as pd
from pyecharts.charts import Map, Line
import pyecharts.options as opts

In [2]:
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/08-19-2021.csv'

Check data format.

In [3]:
data = pd.read_csv(url)
data.head()

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,FIPS,Incident_Rate,Total_Test_Results,People_Hospitalized,Case_Fatality_Ratio,UID,ISO3,Testing_Rate,Hospitalization_Rate
0,Alabama,US,2021-08-20 04:31:00,32.3182,-86.9023,649741,11914,,,1.0,13251.406994,2756534.0,,1.833654,84000001.0,USA,56219.253404,
1,Alaska,US,2021-08-20 04:31:00,61.3707,-152.4044,82687,412,,,2.0,11303.064063,2619092.0,,0.498265,84000002.0,USA,358021.994546,
2,American Samoa,US,2021-08-20 04:31:00,-14.271,-170.132,0,0,,,60.0,0.0,2140.0,,,16.0,ASM,3846.084722,
3,Arizona,US,2021-08-20 04:31:00,33.7298,-111.4312,976471,18508,,,4.0,13415.427472,11261953.0,,1.895397,84000004.0,USA,154724.424648,
4,Arkansas,US,2021-08-20 04:31:00,34.9697,-92.3731,429100,6581,,,5.0,14218.948613,3492013.0,,1.533675,84000005.0,USA,115713.71103,


Select confirmed cases and death cases for each state and area

In [4]:
Confirmed_data = list(zip(list(data['Province_State']), list(data['Confirmed'])))
Deaths_data = list(zip(list(data['Province_State']), list(data['Deaths'])))

In [5]:
map=(Map()
        .add('Confirmed', Confirmed_data, '美国')
        .set_series_opts(label_opts=opts.LabelOpts(is_show= True)) 
        .set_global_opts(
            title_opts=opts.TitleOpts(title='US Covid Map（Confirmed）'),
            visualmap_opts=opts.VisualMapOpts(is_show=True,
                                              split_number=7,
                                              is_piecewise=True,  # chop
                                              pos_top='center',
                                              pieces=[
                                                   {'min': 2000000, 'color': '#7f1818'},  #no max
                                                   {'min': 1000000, 'max': 1999999}, 
                                                   {'min': 500000, 'max': 999999},
                                                   {'min': 100000, 'max': 499999},
                                                   {'min': 50001,  'max': 99999},
                                                   {'min': 10000, 'max': 50000},
                                                   {'min': 0, 'max': 9999}],                                              
                                              ),
        )
    )

Using pyechart to visualize the confirmed case till today, with piecewise case numbers, hover each range to see the states in the range, or hover each state to show numbers

In [6]:
map.render_notebook()

In [7]:
map=(Map()
        .add('Deaths', Deaths_data, '美国')
        .set_series_opts(label_opts=opts.LabelOpts(is_show= True)) 
        .set_global_opts(
            title_opts=opts.TitleOpts(title='US Covid Map（Deaths）'),
            visualmap_opts=opts.VisualMapOpts(is_show=True,
                                              split_number=5,
                                              is_piecewise=True,  # chop
                                              pos_top='center',
                                              pieces=[
                                                   {'min': 50001,  'max': 99999},
                                                   {'min': 10000, 'max': 50000},
                                                   {'min': 4000, 'max': 9999},
                                                   {'min': 1000, 'max': 4999},
                                                   {'min': 0, 'max': 999}],                                              
                                              ),
        )
    )

Using pyechart to visualize the death case till today, with piecewise case numbers, hover each range to see the states in the range, or hover each state to show numbers.

In [8]:
map.render_notebook()

In [9]:
map=(Map()
        .add('Confirmed', Confirmed_data, '美国')
        .add('Deaths', Deaths_data, '美国')
        .set_series_opts(label_opts=opts.LabelOpts(is_show= True)) 
        .set_global_opts(
            #tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
            title_opts=opts.TitleOpts(title='US Covid Map till today'),
            legend_opts=opts.LegendOpts(selected_mode='single'),
            visualmap_opts=opts.VisualMapOpts(is_show=True,
                                              split_number=9,
                                              is_piecewise=True,  # chop
                                              pos_top='center',
                                              pieces=[
                                                   {'min': 2000000, 'color': '#7f1818'},  #no max
                                                   {'min': 1000000, 'max': 1999999}, 
                                                   {'min': 500000, 'max': 999999},
                                                   {'min': 100000, 'max': 499999},
                                                   {'min': 50001,  'max': 99999},
                                                   {'min': 10000, 'max': 50000},
                                                   {'min': 4000, 'max': 9999},
                                                   {'min': 1000, 'max': 4999},
                                                   {'min': 0, 'max': 999}],                                             
                                              ),
        )
    )



Select single map and show numbers of confirmed or death case

In [10]:
map.render_notebook()

## Trend for each state

Import data from csse, check for the start time.

In [11]:
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
data = pd.read_csv(url)
data = data.groupby(['Province_State']).sum()
data = data.T
data = data.drop(['UID',	'code3',	'FIPS',		'Lat',	'Long_'])
data.head()

Province_State,Alabama,Alaska,American Samoa,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,Diamond Princess,...,Tennessee,Texas,Utah,Vermont,Virgin Islands,Virginia,Washington,West Virginia,Wisconsin,Wyoming
1/22/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1/23/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1/24/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1/25/20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1/26/20,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0





Check for the end time

In [12]:
data.tail()

Province_State,Alabama,Alaska,American Samoa,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,Diamond Princess,...,Tennessee,Texas,Utah,Vermont,Virgin Islands,Virginia,Washington,West Virginia,Wisconsin,Wyoming
8/15/21,634897.0,79823.0,0.0,965462.0,419807.0,4132332.0,589526.0,361836.0,114486.0,49.0,...,941261.0,3339500.0,444385.0,26040.0,5295.0,717826.0,507294.0,171997.0,704857.0,68272.0
8/16/21,637363.0,80885.0,0.0,967862.0,420663.0,4148367.0,592372.0,363417.0,114770.0,49.0,...,954610.0,3356260.0,446808.0,26040.0,5326.0,723727.0,514884.0,173479.0,707327.0,68944.0
8/17/21,641386.0,81337.0,0.0,970523.0,422866.0,4163848.0,593562.0,364298.0,114912.0,49.0,...,958169.0,3384466.0,447771.0,26397.0,5326.0,725971.0,517214.0,174818.0,709290.0,69356.0
8/18/21,645851.0,82002.0,0.0,972925.0,425551.0,4176356.0,595585.0,364891.0,115236.0,49.0,...,963647.0,3402025.0,449259.0,26494.0,5413.0,728523.0,520733.0,175638.0,710975.0,69844.0
8/19/21,649741.0,82687.0,0.0,976471.0,429100.0,4188640.0,597281.0,365425.0,115665.0,49.0,...,969998.0,3419098.0,450539.0,26663.0,5432.0,731287.0,524670.0,176608.0,712751.0,70150.0


Make a dictionary for the data index by each state

In [13]:
date_list = list(data.index)
stateDict={}
for col in data.columns:
  stateDict[col] = list(data[col])

Select Alaska, Tennessee, and California for a test line plot, with zoom option at time and select option for each state. Hover on the figure to show the information for each state at time.

In [14]:
line = (Line()
      .add_xaxis(date_list)
      # avg max min
      .add_yaxis('Alaska', stateDict['Alaska'], is_smooth=True)
      .add_yaxis('Tennessee', stateDict['Tennessee'], is_smooth=True)
      .add_yaxis('California', stateDict['California'], is_smooth=True)
      .set_series_opts(
          areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
          label_opts=opts.LabelOpts(is_show=False))
      .set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)), 
                      yaxis_opts=opts.AxisOpts(name='population', min_=0),
                      tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
                      datazoom_opts=opts.DataZoomOpts(),  
                      title_opts=opts.TitleOpts(title='Covid Trend')
                
                      )               
    )

line.render_notebook()

Sort the cases for the latest data.

In [15]:
sorted_df = data.sort_values(data.last_valid_index(), axis=1)
sorted_df.tail()

Province_State,American Samoa,Diamond Princess,Grand Princess,Northern Mariana Islands,Virgin Islands,Guam,Vermont,District of Columbia,Hawaii,Wyoming,...,New Jersey,North Carolina,Ohio,Pennsylvania,Georgia,Illinois,New York,Florida,Texas,California
8/15/21,0.0,49.0,103.0,183.0,5295.0,8911.0,26040.0,52201.0,51200.0,68272.0,...,1060934.0,1107414.0,1159759.0,1254613.0,1252615.0,1457687.0,2205869.0,2920749.0,3339500.0,4132332.0
8/16/21,0.0,49.0,103.0,183.0,5326.0,9018.0,26040.0,52777.0,51739.0,68944.0,...,1062238.0,1122412.0,1161573.0,1256689.0,1272014.0,1466813.0,2209767.0,2920749.0,3356260.0,4148367.0
8/17/21,0.0,49.0,103.0,183.0,5326.0,9081.0,26397.0,52843.0,52199.0,69356.0,...,1064059.0,1125987.0,1164808.0,1258774.0,1279653.0,1470452.0,2213176.0,2920749.0,3384466.0,4163848.0
8/18/21,0.0,49.0,103.0,183.0,5413.0,9118.0,26494.0,53024.0,52846.0,69844.0,...,1065736.0,1131243.0,1168111.0,1261160.0,1287667.0,1474285.0,2217969.0,2920749.0,3402025.0,4176356.0
8/19/21,0.0,49.0,103.0,183.0,5432.0,9200.0,26663.0,53162.0,53598.0,70150.0,...,1067758.0,1138263.0,1171557.0,1265068.0,1297503.0,1477465.0,2223052.0,2920749.0,3419098.0,4188640.0


Remove areas far from main land.

In [16]:
sorted_df=sorted_df.drop(['American Samoa', 
                          'Diamond Princess',
                          'Grand Princess',
                          'Northern Mariana Islands',
                          'Virgin Islands','Guam'], axis=1)
sorted_df.tail()

Province_State,Vermont,District of Columbia,Hawaii,Wyoming,Maine,Alaska,New Hampshire,North Dakota,Delaware,Montana,...,New Jersey,North Carolina,Ohio,Pennsylvania,Georgia,Illinois,New York,Florida,Texas,California
8/15/21,26040.0,52201.0,51200.0,68272.0,72522.0,79823.0,102901.0,113330.0,114486.0,119864.0,...,1060934.0,1107414.0,1159759.0,1254613.0,1252615.0,1457687.0,2205869.0,2920749.0,3339500.0,4132332.0
8/16/21,26040.0,52777.0,51739.0,68944.0,72522.0,80885.0,103462.0,113404.0,114770.0,120325.0,...,1062238.0,1122412.0,1161573.0,1256689.0,1272014.0,1466813.0,2209767.0,2920749.0,3356260.0,4148367.0
8/17/21,26397.0,52843.0,52199.0,69356.0,72897.0,81337.0,103733.0,113673.0,114912.0,120946.0,...,1064059.0,1125987.0,1164808.0,1258774.0,1279653.0,1470452.0,2213176.0,2920749.0,3384466.0,4163848.0
8/18/21,26494.0,53024.0,52846.0,69844.0,73088.0,82002.0,103989.0,113925.0,115236.0,121382.0,...,1065736.0,1131243.0,1168111.0,1261160.0,1287667.0,1474285.0,2217969.0,2920749.0,3402025.0,4176356.0
8/19/21,26663.0,53162.0,53598.0,70150.0,73270.0,82687.0,104263.0,114179.0,115665.0,121877.0,...,1067758.0,1138263.0,1171557.0,1265068.0,1297503.0,1477465.0,2223052.0,2920749.0,3419098.0,4188640.0


Show 5 states with the least cases.

In [17]:
sorted_df.columns[:5]

Index(['Vermont', 'District of Columbia', 'Hawaii', 'Wyoming', 'Maine'], dtype='object', name='Province_State')

Show 5 states with the most cases.

In [18]:
sorted_df.columns[-5:]

Index(['Illinois', 'New York', 'Florida', 'Texas', 'California'], dtype='object', name='Province_State')

Plot the trend for the state with least 5 cases, with zoom option at time and select option for each state. Hover on the figure to show the information for each state at time.

In [19]:
line = Line()
            
line.add_xaxis(date_list)    



for col in sorted_df.columns[:5]:
    line.add_yaxis('{}'.format(col), stateDict[col], is_smooth=True)
    

      # avg max min
line.set_series_opts(
          areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
          label_opts=opts.LabelOpts(is_show=False))

line.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)), 
                      yaxis_opts=opts.AxisOpts(name='population', min_=0),
                      tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
                      datazoom_opts=opts.DataZoomOpts(),  
                      legend_opts= opts.LegendOpts(pos_top=20),
                      title_opts=opts.TitleOpts(title='Covid Trend sorted by State (least 5)'))   
        
line.render_notebook()

Plot the trend for the state with top 5 cases, with zoom option at time and select option for each state. Hover on the figure to show the information for each state at time.

In [20]:
line = Line()
line.add_xaxis(date_list)    
  
for col in sorted_df.columns[-5:]:    line.add_yaxis('{}'.format(col), stateDict[col], is_smooth=True)
    
      # avg max min
line.set_series_opts(
          areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
          label_opts=opts.LabelOpts(is_show=False))

line.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)), 
                      yaxis_opts=opts.AxisOpts(name='population', min_=0),
                      tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
                      datazoom_opts=opts.DataZoomOpts(),  
                      legend_opts= opts.LegendOpts(pos_top=20),
                      title_opts=opts.TitleOpts(title='Covid Trend sorted by State (most 5)'))   
        
line.render_notebook()

Give options for single state trends

In [21]:
line = Line(init_opts = opts.InitOpts(height = "600px"),)
line.add_xaxis(date_list)    
  
for col in data.columns:
    line.add_yaxis('{}'.format(col), stateDict[col], is_smooth=True)
    
line.set_series_opts(
          areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
          label_opts=opts.LabelOpts(is_show=False))

line.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)), 
                      yaxis_opts=opts.AxisOpts(name='population', min_=0),
                      tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
                      datazoom_opts=opts.DataZoomOpts(),  
                      legend_opts= opts.LegendOpts(selected_mode='single',
                                                   pos_top=20,pos_left=120,
                                                   pos_right=120),
                      title_opts=opts.TitleOpts(title='Covid Trend for Each State'))   
        
line.render_notebook()