# COVID-19 Data Visualization


## By Candida Noronha

### Importing modules

In [33]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt 
import datetime
print('Modules are imported')

Modules are imported



#### Loading the Dataset

In [34]:
dataset_url = 'https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv'
df = pd.read_csv(dataset_url)


#### Checking the dataframe 

In [35]:
df.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
0,2020-01-22,Afghanistan,0,0,0
1,2020-01-23,Afghanistan,0,0,0
2,2020-01-24,Afghanistan,0,0,0
3,2020-01-25,Afghanistan,0,0,0
4,2020-01-26,Afghanistan,0,0,0


In [36]:
df.tail()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
115240,2021-08-30,Zimbabwe,124581,0,4416
115241,2021-08-31,Zimbabwe,124773,0,4419
115242,2021-09-01,Zimbabwe,124960,0,4438
115243,2021-09-02,Zimbabwe,125118,0,4449
115244,2021-09-03,Zimbabwe,125331,0,4457


#### Check the shape of the dataframe 

In [37]:
df.shape

(115245, 5)


#### Some preprocessing 

In [38]:
df = df[df.Confirmed > 0]

In [39]:
df.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
33,2020-02-24,Afghanistan,1,0,0
34,2020-02-25,Afghanistan,1,0,0
35,2020-02-26,Afghanistan,1,0,0
36,2020-02-27,Afghanistan,1,0,0
37,2020-02-28,Afghanistan,1,0,0


#### Displaying data related to a country for example India

In [40]:
df[df.Country == 'India']

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
46697,2020-01-30,India,1,0,0
46698,2020-01-31,India,1,0,0
46699,2020-02-01,India,1,0,0
46700,2020-02-02,India,2,0,0
46701,2020-02-03,India,3,0,0
...,...,...,...,...,...
47275,2021-08-30,India,32768880,0,438560
47276,2021-08-31,India,32810845,0,439020
47277,2021-09-01,India,32857937,0,439529
47278,2021-09-02,India,32903289,0,439895


## Visualizing the Global spread of COVID-19

In [41]:
fig  = px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Confirmed', animation_frame = 'Date')
fig.update_layout(title_text= 'Global Spread of COVID-19 ')
fig.show()

Unsupported


## Visualizing the Global Deaths caused due to COVID-19

In [42]:
fig  = px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Deaths', animation_frame = 'Date')
fig.update_layout(title_text= 'Global Deaths due to COVID-19 ')
fig.show()

Unsupported


#### Visualizing how intensive the COVID-19 Transmission has been in each of the country

In [43]:
df_china = df[df.Country == 'China']
df_china.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
21276,2020-01-22,China,548,28,17
21277,2020-01-23,China,643,30,18
21278,2020-01-24,China,920,36,26
21279,2020-01-25,China,1406,39,42
21280,2020-01-26,China,2075,49,56


In [44]:
df_china = df_china[['Date','Confirmed']]

In [45]:
df_china

Unnamed: 0,Date,Confirmed
21276,2020-01-22,548
21277,2020-01-23,643
21278,2020-01-24,920
21279,2020-01-25,1406
21280,2020-01-26,2075
...,...,...
21862,2021-08-30,107052
21863,2021-08-31,107073
21864,2021-09-01,107102
21865,2021-09-02,107130


#### Calculating the first derivative of the Confrimed Column

In [46]:
df_china['Infection Rate'] = df_china['Confirmed'].diff()

In [47]:
df_china

Unnamed: 0,Date,Confirmed,Infection Rate
21276,2020-01-22,548,
21277,2020-01-23,643,95.0
21278,2020-01-24,920,277.0
21279,2020-01-25,1406,486.0
21280,2020-01-26,2075,669.0
...,...,...,...
21862,2021-08-30,107052,40.0
21863,2021-08-31,107073,21.0
21864,2021-09-01,107102,29.0
21865,2021-09-02,107130,28.0


In [48]:
px.line(df_china, 'Date', y=['Confirmed','Infection Rate'])

Unsupported

In [49]:
df_china['Infection Rate'].max()

15136.0


### Calculating the Maximum Infection Rate for all countries

In [50]:
df.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
33,2020-02-24,Afghanistan,1,0,0
34,2020-02-25,Afghanistan,1,0,0
35,2020-02-26,Afghanistan,1,0,0
36,2020-02-27,Afghanistan,1,0,0
37,2020-02-28,Afghanistan,1,0,0


In [51]:
countries = list(df['Country'].unique())
max_infection_rates = []

for c in countries:
    max_infec = df[df.Country == c].Confirmed.diff().max()
    max_infection_rates.append(max_infec)
    


#### Creating a new Dataframe 

In [52]:
df_max_infec = pd.DataFrame()
df_max_infec['Country'] = countries
df_max_infec['Maximum Infection Rate'] = max_infection_rates
df_max_infec

#df_max_infec[df_max_infec.Country == 'India']

Unnamed: 0,Country,Maximum Infection Rate
0,Afghanistan,5721.0
1,Albania,1239.0
2,Algeria,1927.0
3,Andorra,299.0
4,Angola,405.0
...,...,...
190,Vietnam,17428.0
191,West Bank and Gaza,2884.0
192,Yemen,174.0
193,Zambia,3594.0


#### Plot a barchart : Maximum Infection Rate of each country

In [53]:
px.bar(df_max_infec, x='Country', y='Maximum Infection Rate', color='Country', title = 'Global Maximum Infection Rate',log_y=True)

Unsupported

## Visualizing the Infection Rate of COVID-19 from when it began till today in India

### COVID-19 pandemic in India 
**The first cases of COVID-19 in India were reported on 30 January 2020 in three towns of Kerala, among three Indian medical students who had returned from Wuhan, the epicenter of the pandemic. - <a href="https://en.wikipedia.org/wiki/COVID-19_pandemic_in_India">Source</a>**

In [54]:
india_covid_start_date = '2020-01-30'
india_covid_today = datetime.datetime.now()

In [55]:
df.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
33,2020-02-24,Afghanistan,1,0,0
34,2020-02-25,Afghanistan,1,0,0
35,2020-02-26,Afghanistan,1,0,0
36,2020-02-27,Afghanistan,1,0,0
37,2020-02-28,Afghanistan,1,0,0


### Getting data related to India

In [56]:
df_india = df[df.Country == 'India']

In [57]:
df_india

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
46697,2020-01-30,India,1,0,0
46698,2020-01-31,India,1,0,0
46699,2020-02-01,India,1,0,0
46700,2020-02-02,India,2,0,0
46701,2020-02-03,India,3,0,0
...,...,...,...,...,...
47275,2021-08-30,India,32768880,0,438560
47276,2021-08-31,India,32810845,0,439020
47277,2021-09-01,India,32857937,0,439529
47278,2021-09-02,India,32903289,0,439895


#### Calculating the infection rate in India

In [58]:
df_india['Infection Rate'] = df_india.Confirmed.diff()
df_india



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths,Infection Rate
46697,2020-01-30,India,1,0,0,
46698,2020-01-31,India,1,0,0,0.0
46699,2020-02-01,India,1,0,0,0.0
46700,2020-02-02,India,2,0,0,1.0
46701,2020-02-03,India,3,0,0,1.0
...,...,...,...,...,...,...
47275,2021-08-30,India,32768880,0,438560,30941.0
47276,2021-08-31,India,32810845,0,439020,41965.0
47277,2021-09-01,India,32857937,0,439529,47092.0
47278,2021-09-02,India,32903289,0,439895,45352.0


#### Visualization

In [59]:

fig = px.line(df_india, x='Date',y='Infection Rate', title='Infection Rate of COVID-19 in India')
fig.add_shape(
              
    dict(
         type='line',
         x0=india_covid_start_date,
         y0=0,
         x1=india_covid_start_date,
         y1=df_india['Infection Rate'].max(),
         line = dict(color='red',width=2)
    )
)
fig.add_annotation(
    dict(
        x = india_covid_start_date,
        y = df_india['Infection Rate'].max(),
        text = 'First Case of COVID-19 in India'
    )
)



fig.add_shape(
              
    dict(
         type='line',
         x0=india_covid_today,
         y0=0,
         x1=datetime.datetime.now(),
         y1=df_india['Infection Rate'].max(),
         line = dict(color='orange',width=2)
    )
)
fig.add_annotation(
    dict(
        x = datetime.datetime.now(),
        y = df_india['Infection Rate'].max(),
        text = 'Present Infection Rate'
    )
)

Unsupported

### Visualizing the Death Rate

In [60]:
df_india['Death Rate'] = df_india.Deaths.diff()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#### Check the dataframe 

In [61]:
df_india.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths,Infection Rate,Death Rate
46697,2020-01-30,India,1,0,0,,
46698,2020-01-31,India,1,0,0,0.0,0.0
46699,2020-02-01,India,1,0,0,0.0,0.0
46700,2020-02-02,India,2,0,0,1.0,0.0
46701,2020-02-03,India,3,0,0,1.0,0.0


In [62]:
fig = px.line(df_india,x='Date',y=['Infection Rate','Death Rate'])
fig.show()

Unsupported

#### Normalizing the columns

In [63]:
df_india['Infection Rate'] = df_india['Infection Rate']/df_india['Infection Rate'].max()
df_india['Death Rate'] = df_india['Death Rate']/df_india['Death Rate'].max()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



### Plotting the Chart of Infection Rate and Death Rate

In [64]:
fig = px.line(df_india,x='Date',y=['Infection Rate','Death Rate'])

fig.add_shape(
              
    dict(
         type='line',
         x0=india_covid_start_date,
         y0=0,
         x1=india_covid_start_date,
         y1=df_india['Infection Rate'].max(),
         line = dict(color='red',width=2)
    )
)
fig.add_annotation(
    dict(
        x = india_covid_start_date,
        y = df_india['Infection Rate'].max(),
        text = 'First Case of COVID-19'
    )
)



fig.add_shape(
              
    dict(
         type='line',
         x0=india_covid_today,
         y0=0,
         x1=india_covid_today,
         y1=df_india['Infection Rate'].max(),
         line = dict(color='orange',width=2)
    )
)
fig.add_annotation(
    dict(
        x = india_covid_today,
        y = 0,
        text = 'Situation Today'
    )
)
fig.show()

Unsupported