In [None]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra
import seaborn as sns
import matplotlib.pyplot as plt
import math

from google.colab import drive
drive.mount("/content/drive")

#import data from google drive
covid_data = pd.read_csv('/content/drive/My Drive/COVID-data/covid_19_data.csv')
covid_data.head()

In [None]:
#check for NaN if any present
covid_data.isnull().sum()

In [None]:
#Total number of countries effected by Covid-19
uniqueValues = covid_data['Country/Region'].unique()
print('Total number of countries effected by Covid-19: %s' % len(uniqueValues)) 
print(uniqueValues);

In [None]:
#VISUALIZATION
# Convert ObservationDate to datetime object
covid_data['ObservationDate'] = pd.to_datetime(covid_data['ObservationDate'], format='%m/%d/%Y', utc=True);

In [None]:
# Group data by date
date_grouped=covid_data.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})

#Calculate Active cases around the world
date_grouped["Active Cases"] = date_grouped["Confirmed"] - date_grouped["Recovered"] - date_grouped["Deaths"]

# Total number of Confirmed cases around the world
print('Total number of Confirmed cases around the world: %s' % date_grouped["Confirmed"].iloc[-1])

# Total number of Recovered cases around the world
print('Total number of Recovered cases around the world: %s' % date_grouped["Recovered"].iloc[-1])

# Total number of Death cases around the world
print('Total number of Death cases around the world: %s' % date_grouped["Deaths"].iloc[-1])

date_grouped.tail()

In [None]:
import plotly.graph_objects as go
fig=go.Figure()
fig.add_trace(go.Scatter(x=date_grouped.index, y=date_grouped["Confirmed"],
                    mode='lines+markers',
                    name='Confirmed',marker_color='purple'))
fig.add_trace(go.Scatter(x=date_grouped.index, y=date_grouped["Recovered"],
                    mode='lines+markers',
                    name='Recovered', marker_color='green'))
fig.add_trace(go.Scatter(x=date_grouped.index, y=date_grouped["Deaths"],
                    mode='lines+markers',
                    name='Death', marker_color='red'))
fig.update_layout(title="Confirmed, Recovered, Death case counts",
                 xaxis_title="Date",yaxis_title="Number of Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()

In [None]:
# Group data by country
country_grouped=covid_data.groupby(['Country/Region','ObservationDate']).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})

country_grouped["Death_Percent"] = country_grouped["Deaths"] / country_grouped["Confirmed"] * 100 
country_grouped["Recovered_Percent"] = country_grouped["Recovered"] / country_grouped["Confirmed"] * 100

# get total sum of each country
total_sum_country = country_grouped.groupby(['Country/Region']).tail(1)
total_sum_country.tail(20)

In [None]:
# Top 10 countries with confirmed cases
total_sum_country = total_sum_country.reset_index()
top_10_confirmed_country = total_sum_country.sort_values(by=['Confirmed'],ascending=False).head(10)

In [None]:
# Plot pie-chart
fig1, ax1 = plt.subplots()
ax1.pie(top_10_confirmed_country['Confirmed'], labels=top_10_confirmed_country['Country/Region'], autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()