### Reading CSV

In [33]:
import pandas as pd

#there are many options in read_csv, study the documentation
url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
data = pd.read_csv(url) # coronavirus data

In [34]:
data.head(10) # if we have large data, watch only few rows with .head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21,4/3/21,4/4/21,4/5/21,4/6/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,56294,56322,56384,56454,56517,56572,56595,56676,56717,56779
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,124134,124419,124723,125157,125506,125842,126183,126531,126795,126936
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,116836,116946,117061,117192,117304,117429,117524,117622,117739,117879
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,11850,11888,11944,12010,12053,12115,12174,12231,12286,12328
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,22063,22132,22182,22311,22399,22467,22579,22631,22717,22885
5,,Antigua and Barbuda,17.0608,-61.7964,0,0,0,0,0,0,...,1128,1136,1136,1136,1147,1152,1170,1170,1173,1173
6,,Argentina,-38.4161,-63.6167,0,0,0,0,0,0,...,2308597,2322611,2332765,2348821,2363251,2373153,2383537,2393492,2407159,2428029
7,,Armenia,40.0691,45.0382,0,0,0,0,0,0,...,190317,190741,191491,192639,193736,194852,196044,196634,197113,197873
8,Australian Capital Territory,Australia,-35.4735,149.0124,0,0,0,0,0,0,...,123,123,123,123,123,123,123,123,123,123
9,New South Wales,Australia,-33.8688,151.2093,0,0,0,0,3,4,...,5283,5288,5291,5296,5296,5299,5300,5303,5310,5316


In [35]:
mask = data["3/27/21"] == data["3/27/21"].max() #who has the biggest number on 27th of March?
data[mask]

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21,4/3/21,4/4/21,4/5/21,4/6/21
249,,US,40.0,-100.0,1,1,2,2,5,5,...,30263145,30332565,30393805,30460834,30539868,30609690,30672664,30707596,30785390,30847348


## Interactive visualization using plotly

In [36]:
import plotly
import plotly.express as px

In [37]:
# world map visualization
fig = px.scatter_geo(data,                  # pandas dataframe
                     lat="Lat", lon="Long", # latitude and longitude 
                     size="3/27/21",        # size of markers are proportional to the number from 27th of March
                     projection="natural earth") # type of a world map projection - study other options

plotly.offline.plot(fig, filename='covid_worldmap.html') #save the interactive plot to html.
                                                         #you can open it in a web browser

'covid_worldmap.html'

In [38]:
# time series of USA (index 244 in dataframe)
data_time_series = data.iloc[244,4:-1]
data_time_series

1/22/20      0
1/23/20      0
1/24/20      0
1/25/20      0
1/26/20      0
          ... 
4/1/21     643
4/2/21     677
4/3/21     714
4/4/21     714
4/5/21     766
Name: 244, Length: 440, dtype: object

In [39]:
# interactive line visualization
fig2 = px.line(x=data_time_series.index, y=data_time_series.values)
plotly.offline.plot(fig2, filename='covid_time_series.html') #save the interactive plot to html.
                                                             #you can open it in a web browser

'covid_time_series.html'

### Subplots

In [40]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [41]:
# make instance of subplots
fig3 = make_subplots(rows=2, cols=1)

# add trace to row 1, col 1
fig3.add_trace(go.Scatter(x=data_time_series.index, y=data_time_series.values,
                          name="Line infections", showlegend=True),
               row=1, col=1)

# add trace to row 2, col 1
fig3.add_trace(go.Bar(x=data_time_series.index, y=data_time_series.values,
                     name="Bar infections", showlegend=True),
               row=2, col=1)

# you can manage figure parameters here
fig3.update_layout(width=800, title_text="Subplots")

# export to html
plotly.offline.plot(fig3, filename='subplots.html')

'subplots.html'

# Melt and pivot
can be useful for further data processing

In [42]:
data

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21,4/3/21,4/4/21,4/5/21,4/6/21
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,56294,56322,56384,56454,56517,56572,56595,56676,56717,56779
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,124134,124419,124723,125157,125506,125842,126183,126531,126795,126936
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,116836,116946,117061,117192,117304,117429,117524,117622,117739,117879
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,11850,11888,11944,12010,12053,12115,12174,12231,12286,12328
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,22063,22132,22182,22311,22399,22467,22579,22631,22717,22885
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,2591,2594,2594,2603,2617,2620,2626,2631,2637,2648
270,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,236462,238248,240065,242353,244645,246893,248482,251288,253922,256461
271,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,4033,4115,4247,4357,4531,4620,4697,4798,4881,4975
272,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,87872,88012,88199,88418,88549,88730,88800,88930,89009,89071


In [43]:
#Choose a Country
df = data[data["Country/Region"]=="Czechia"]
df

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21,4/3/21,4/4/21,4/5/21,4/6/21
100,,Czechia,49.8175,15.473,0,0,0,0,0,0,...,1515029,1516772,1523668,1532332,1539617,1545865,1549734,1551896,1553820,1555245


In [44]:
#deleting Province/State column, because it is not useful in this case
df.drop(columns="Province/State", inplace=True)
df



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/28/21,3/29/21,3/30/21,3/31/21,4/1/21,4/2/21,4/3/21,4/4/21,4/5/21,4/6/21
100,Czechia,49.8175,15.473,0,0,0,0,0,0,0,...,1515029,1516772,1523668,1532332,1539617,1545865,1549734,1551896,1553820,1555245


In [45]:
# convert each date column into a separate row item, using the melt() method
melt = df.melt(id_vars=['Country/Region', 'Lat', 'Long'], var_name="Date" ,value_name='TotalCases')       
melt.head()

Unnamed: 0,Country/Region,Lat,Long,Date,TotalCases
0,Czechia,49.8175,15.473,1/22/20,0
1,Czechia,49.8175,15.473,1/23/20,0
2,Czechia,49.8175,15.473,1/24/20,0
3,Czechia,49.8175,15.473,1/25/20,0
4,Czechia,49.8175,15.473,1/26/20,0


In [46]:
#do it backwards using pivot method
melt.pivot(index=['Country/Region', 'Lat', 'Long'], columns='Date', values='TotalCases')

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,1/1/21,1/10/21,1/11/21,1/12/21,1/13/21,1/14/21,1/15/21,1/16/21,1/17/21,1/18/21,...,9/28/20,9/29/20,9/3/20,9/30/20,9/4/20,9/5/20,9/6/20,9/7/20,9/8/20,9/9/20
Country/Region,Lat,Long,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Czechia,49.8175,15.473,732022,831165,835454,844799,855600,866522,874605,883906,889159,891852,...,65883,67843,26452,70763,27249,27752,28156,28716,29877,31036
