[](http://)![image.png](attachment:97577d7e-75a6-4705-b47e-4f1e7119bd2e.png)

In [1]:
#pip install lets_plot -U

In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np


from lets_plot import *
LetsPlot.setup_html()

import lets_plot
lets_plot.__version__

'4.5.1'

## Data: India statewise COVID-19 testing details 

In [3]:
dat = pd.read_csv("Data/StatewiseTestingDetails.csv")
dat.head()

Unnamed: 0,Date,State,TotalSamples,Negative,Positive
0,2020-04-17,Andaman and Nicobar Islands,1403.0,1210.0,12.0
1,2020-04-24,Andaman and Nicobar Islands,2679.0,,27.0
2,2020-04-27,Andaman and Nicobar Islands,2848.0,,33.0
3,2020-05-01,Andaman and Nicobar Islands,3754.0,,33.0
4,2020-05-16,Andaman and Nicobar Islands,6677.0,,33.0


In [4]:
dat.Date = pd.to_datetime(dat.Date)
dat = dat[["Date","State","TotalSamples","Positive"]].dropna()

### Cumulative number of total samples tested till the given date (by State)

In [5]:
(ggplot(dat) 
 + geom_line(aes(x="Date", y="TotalSamples", color="State"),
             size=1.5,
             show_legend=False,
             tooltips=layer_tooltips()
                .line("@State: @TotalSamples")
                .format("TotalSamples", ".1s")) 
 + scale_x_datetime(name="", format="%b\'%y") 
 + ggsize(785, 350)
)

### Cumulative number of positive samples till the given date (by State)

In [6]:
(ggplot(dat) + ggsize(785, 1500) 
 + geom_line(aes(x="Date", y="Positive"), size=1.5, color='brush') 
 + scale_x_datetime(name="", format="%b\'%y")
 + facet_wrap(facets="State", ncol=3)
 + theme_grey())

## Taking snapshots from the statewise COVID-19 testing data

#### Unfortunately, the data for majority of the India states was not updated since the end of August 2020. 
#### Wherefore we will be focusing on the older period: from April 2020 though August 2020.
#### We will take three snapshots from the April, June and August data. 
To take a snapshot we will consider a 2-week period in each of the months. 
Then we will compute how much testing was done for the "snapshot" period of time (2 weeks).
And finally, we will compute the number of positive test results relative to the total number of samples (in percents).

In [7]:
# Snapshots

from datetime import timedelta

snapshot_end = [
    pd.to_datetime("2020-04-20"),     # Apr 20, 2020 
    pd.to_datetime("2020-06-20"),     # Jun 20, 2020 
    pd.to_datetime("2020-08-20"),     # Aug 20, 2020 
]

snapshot_start = [ t - timedelta(weeks=2) for t in snapshot_end ]
snapshot_lab = [ t.strftime('%d %b\'%y') for t in snapshot_end ]


In [8]:
(ggplot(dat) 
 + geom_line(aes(x="Date", y="Positive", color="State"), show_legend=False,
             tooltips=layer_tooltips(["State", "Positive"]).format("Positive", ".1s"),
             size=1.5) 
 + geom_vline(aes(xintercept=snapshot_end), color="gray", linetype="dashed")
 + geom_vline(aes(xintercept=snapshot_start), color="gray", linetype="dashed")
 + geom_text(aes(x=snapshot_start), y=1400000, label="2 weeks", hjust=1)
 + scale_x_datetime(name="", breaks=snapshot_end, labels=snapshot_lab, limits=[None, pd.to_datetime("2020-09-20")]) 
 + theme(panel_grid_major_x="blank")
 + ggsize(785, 350)
)

In [9]:
def take_snapshot(t_start, t_end):
    dat_window = dat[(dat.Date >= t_start) & (dat.Date <= t_end)].drop("Date", axis=1).groupby("State")
    dat_0 = dat_window.min().rename(columns={"TotalSamples" : "Samples_0", "Positive" : "Positive_0"})
    dat_1 = dat_window.max().rename(columns={"TotalSamples" : "Samples_1", "Positive" : "Positive_1"})
    dat_snapshot = dat_0.join(dat_1)
    dat_snapshot["Date"] = [t_end] * dat_snapshot.shape[0]
    dat_snapshot["Samples"] = dat_snapshot["Samples_1"] - dat_snapshot["Samples_0"]
    dat_snapshot["Positive"] = dat_snapshot["Positive_1"] - dat_snapshot["Positive_0"]
    # Percent of "positive" tests.     
    dat_snapshot["PositivePCT"] = dat_snapshot["Positive"] / dat_snapshot["Samples"] * 100
    dat_snapshot.drop(["Samples_0","Positive_0","Samples_1","Positive_1"], axis=1, inplace=True)
    return dat_snapshot.reset_index()



In [10]:
dat_Apr20 = take_snapshot(snapshot_start[0], snapshot_end[0])
dat_Jun20 = take_snapshot(snapshot_start[1], snapshot_end[1])
dat_Aug20 = take_snapshot(snapshot_start[2], snapshot_end[2])
dat_Apr20.head()

Unnamed: 0,State,Date,Samples,Positive,PositivePCT
0,Andaman and Nicobar Islands,2020-04-20,0.0,0.0,
1,Andhra Pradesh,2020-04-20,24359.0,357.0,1.465577
2,Arunachal Pradesh,2020-04-20,233.0,0.0,0.0
3,Assam,2020-04-20,2249.0,5.0,0.222321
4,Bihar,2020-04-20,6723.0,58.0,0.86271


### Positive test results relative to the total number of samples (%) - snapshots

In [11]:
dat_AprJunAug20 = pd.concat([dat_Apr20, dat_Jun20, dat_Aug20], ignore_index=True)

(ggplot(data=dat_AprJunAug20) 
 + geom_line(aes(x="Date", y="PositivePCT", color="State"), 
             show_legend=False,
             tooltips="none") 
 + geom_point(aes(x="Date", y="PositivePCT", color="State"), 
              show_legend=False, size=7,
              tooltips=layer_tooltips(["State", "PositivePCT"])
                          .format("@Y", "{.1f}%")
                          .format("PositivePCT", "{.1f}%")) 
 + scale_x_datetime(breaks=snapshot_end, labels=snapshot_lab) 
 + scale_y_continuous(format="{.0f} %")
 + ggtitle("% of Positive Test Results")
 + ggsize(785, 350) 
 + flavor_darcula() + theme(axis_title_x="blank", title=element_text(margin=margin(t=16)))
)


WARN: The margin() is deprecated and will be removed in future releases.
      Please, use a number or list of numbers to specify margins (see description of the parameter used).


## Spatial Visualization

### Boundaries of states and union territories of India.
We will obtain boundaries of states and union territories of India using Lets-Plot's built-in geocoding package. 

In [12]:
from lets_plot.geo_data import *

The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).


In [13]:
state_geocoder = geocode_states(dat.State.unique()).scope("India")
state_boundaries = state_geocoder.get_boundaries(resolution=5)
state_boundaries.head()

Unnamed: 0,state,found name,geometry
0,Andaman and Nicobar Islands,Andaman and Nicobar Islands,"MULTIPOLYGON (((92.26476 11.5139, 92.22218 11...."
1,Andhra Pradesh,Andhra Pradesh,"MULTIPOLYGON (((80.38263 16.86518, 80.40866 16..."
2,Arunachal Pradesh,Arunachal Pradesh,"MULTIPOLYGON (((91.56231 27.63221, 91.64479 27..."
3,Assam,Assam,"MULTIPOLYGON (((89.71931 26.16638, 89.71824 26..."
4,Bihar,Bihar,"MULTIPOLYGON (((83.32126 25.02036, 83.36748 25..."


### The dynamics of COVID-19 positive test results (%): April trough August 2020 

In [14]:
def COVID19_map(data, title):
    return (ggplot() 
     + geom_livemap()
     + geom_polygon(aes(fill="PositivePCT"),
               data=data,
               map=state_boundaries,
               map_join=[["State"], ["state"]],
               tooltips=layer_tooltips()
                    .title("@State")
                    .line("@PositivePCT")
                    .format("PositivePCT", "{.1f}%")
                    .min_width(200)
                    .anchor('middle_right'), 
               show_legend=False,
               color="white", size=.1)
     + scale_fill_brewer(palette="PuRd", limits=[0, 52.5], trans="sqrt")
     + ggtitle(title)
     + ggsize(785, 770)
)


In [15]:
map_Apr20 = COVID19_map(dat_Apr20, "April 2020")
map_Jun20 = COVID19_map(dat_Jun20, "June 2020")
map_Aug20 = COVID19_map(dat_Aug20, "August 2020")
map_Apr20.show()
map_Jun20.show()
map_Aug20.show()

In [16]:
gggrid([
   map_Apr20, map_Jun20, map_Aug20  
], hspace=0) + ggsize(1200, 450)

In [17]:
def COVID19_choropleth(data, title):
    return (ggplot() 
     + geom_polygon(aes(fill="PositivePCT"),
               data=data,
               map=state_boundaries,
               map_join=[["State"], ["state"]],
               tooltips=layer_tooltips()
                    .title("@State")
                    .line("@PositivePCT")
                    .format("PositivePCT", "{.1f}%")
                    .min_width(200)
                    .anchor('middle_right'), 
               show_legend=False,
               color="white", size=.1)
     + scale_fill_brewer(palette="PuRd", limits=[0, 52.5], trans="sqrt")
     + ggtitle(title)
     + coord_map() 
     + theme(axis="blank", panel_grid_major="blank")   
)


In [18]:
map_Apr20_1 = COVID19_choropleth(dat_Apr20, "April 2020")
map_Jun20_1 = COVID19_choropleth(dat_Jun20, "June 2020")
map_Aug20_1 = COVID19_choropleth(dat_Aug20, "August 2020")

gggrid([
   map_Apr20_1, map_Jun20_1, map_Aug20_1  
], hspace=0) + ggsize(1200, 450)