In [2]:
# Data analysis and manipulation
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import plotly.express as px

# Data visualization
import matplotlib.pyplot as plt

# Import plotly
import plotly.offline as py

py.init_notebook_mode(connected=True)
pio.renderers.default = "colab"

In [3]:
# importing dataset - 'covid.csv'
dataset1 = pd.read_csv("covid.csv")
dataset1.head()

Unnamed: 0,Country/Region,Continent,Population,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,WHO Region,iso_alpha
0,USA,North America,331198100.0,5032179,,162804.0,,2576668.0,,2292707.0,18296.0,15194.0,492.0,63139605.0,190640.0,Americas,USA
1,Brazil,South America,212710700.0,2917562,,98644.0,,2047660.0,,771258.0,8318.0,13716.0,464.0,13206188.0,62085.0,Americas,BRA
2,India,Asia,1381345000.0,2025409,,41638.0,,1377384.0,,606387.0,8944.0,1466.0,30.0,22149351.0,16035.0,South-EastAsia,IND
3,Russia,Europe,145940900.0,871894,,14606.0,,676357.0,,180931.0,2300.0,5974.0,100.0,29716907.0,203623.0,Europe,RUS
4,South Africa,Africa,59381570.0,538184,,9604.0,,387316.0,,141264.0,539.0,9063.0,162.0,3149807.0,53044.0,Africa,ZAF


In [4]:
# Returns a tuple of shape (rows, columns)
print(dataset1.shape)

# Returns size of dataframe
print(dataset1.size)

(209, 17)
3553


In [5]:
# Returns information about the dataset.
# Return consise summary about dataframe.
dataset1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209 entries, 0 to 208
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country/Region    209 non-null    object 
 1   Continent         208 non-null    object 
 2   Population        208 non-null    float64
 3   TotalCases        209 non-null    int64  
 4   NewCases          4 non-null      float64
 5   TotalDeaths       188 non-null    float64
 6   NewDeaths         3 non-null      float64
 7   TotalRecovered    205 non-null    float64
 8   NewRecovered      3 non-null      float64
 9   ActiveCases       205 non-null    float64
 10  Serious,Critical  122 non-null    float64
 11  Tot Cases/1M pop  208 non-null    float64
 12  Deaths/1M pop     187 non-null    float64
 13  TotalTests        191 non-null    float64
 14  Tests/1M pop      191 non-null    float64
 15  WHO Region        184 non-null    object 
 16  iso_alpha         209 non-null    object 
dt

In [6]:
# importing the second dataset - "covid_grouped.csv"
dataset2 = pd.read_csv("covid_grouped.csv")

dataset2.head()

# print last 5 rows of dataset.
print(dataset2.tail())

# return consize inforamtion about the dataset
print(dataset2.info())

             Date      Country/Region  Confirmed  Deaths  Recovered  Active  \
35151  2020-07-27  West Bank and Gaza      10621      78       3752    6791   
35152  2020-07-27      Western Sahara         10       1          8       1   
35153  2020-07-27               Yemen       1691     483        833     375   
35154  2020-07-27              Zambia       4552     140       2815    1597   
35155  2020-07-27            Zimbabwe       2704      36        542    2126   

       New cases  New deaths  New recovered             WHO Region iso_alpha  
35151        152           2              0  Eastern Mediterranean            
35152          0           0              0                 Africa       ESH  
35153         10           4             36  Eastern Mediterranean       YEM  
35154         71           1            465                 Africa       ZMB  
35155        192           2             24                 Africa       ZWE  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3

In [7]:
dataset2.columns

Index(['Date', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active',
       'New cases', 'New deaths', 'New recovered', 'WHO Region', 'iso_alpha'],
      dtype='object')

In [8]:
dataset1.columns

# If we want to delete a column, we can remove it by the drop() function.

Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
       'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
       'TotalTests', 'Tests/1M pop', 'WHO Region', 'iso_alpha'],
      dtype='object')

In [9]:
# Import create_table Figure Factory
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)
from plotly.figure_factory import create_table

colorscale = [[0, "#4d004c"], [0.5, "#f2e5ff"], [1, "#ffffff"]]
table = create_table(dataset1.head(15), colorscale=colorscale)
py.iplot(table)

In [10]:
py.iplot(table)

In [11]:
# Bar graph: Comparison between COVID infected country in terms of
# total cases, total deaths, total tests, and total recovers.

px.bar(
    dataset1.head(15),
    x="Country/Region",
    y="TotalCases",
    color="TotalCases",
    height=500,
    hover_data=["Country/Region", "Continent"],
)

### As the following plot clearly shows the data for the top 15 countries, now again each country is shown with
###respect to the total number of cases from the top 15 countries, color the total deaths, hover data is set as 'Country/Resgion'.'Contninent and analyze the visualization

In [12]:
# Let's now visualize the chart bar and analyze it by coloring the number of death cases
px.bar(
    dataset1.head(15),
    x="Country/Region",
    y="TotalCases",
    color="TotalDeaths",
    height=500,
    hover_data=["Country/Region", "Continent"],
)

In [13]:
# Let's now visualize the chart bar and analyze it by coloring the number of recovered cases
px.bar(
    dataset1.head(15),
    x="Country/Region",
    y="TotalCases",
    color="TotalRecovered",
    height=500,
    hover_data=["Country/Region", "Continent"],
)

In [14]:
# Let's now visualize the chart bar and analyze it by coloring the number of test cases
px.bar(
    dataset1.head(15),
    x="Country/Region",
    y="TotalCases",
    color="TotalTests",
    height=500,
    hover_data=["Country/Region", "Continent", "Population"],
)

In [15]:
px.bar(
    dataset1.head(15),
    x="Country/Region",
    y="TotalCases",
    color="TotalRecovered",
    height=500,
    hover_data=["Country/Region", "Continent", "Population"],
)

In [26]:
"""Now, we will create a horizontal oriernation plot
wit Y-azis as 'TotalTests' and Y-axis as 'Country/Region'
with passing parameter orientation='h' and color the
plot by 'TotalTests'"""

px.bar(
    data_frame=dataset1.head(15),
    x="TotalTests",
    y="Country/Region",
    color="TotalDeaths",
    orientation="h",
    hover_data=["Country/Region", "Population", "Continent"],
)