In [51]:
# Importing the necessary libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import requests
import plotly.express as px

In [2]:
# covid_df = requests.get("https://api.covid19api.com/all").json()

In [3]:
# assigning the link to the data to a variable:
url = "https://api.covid19api.com/all"

In [4]:
# reading/requesting from APIs
df = requests.get(url).json()

In [5]:
# covert the json format to Dataframe in pandas:
covid_19_df = pd.DataFrame(df)

## Reading the Structure of the Dataframe

In [17]:
covid_19_df.head()

Unnamed: 0,Country,CountryCode,Province,City,CityCode,Lat,Lon,Confirmed,Deaths,Recovered,Active,Date
0,Afghanistan,AF,,,,33.94,67.71,0,0,0,0,2020-01-22T00:00:00Z
1,Afghanistan,AF,,,,33.94,67.71,0,0,0,0,2020-01-23T00:00:00Z
2,Afghanistan,AF,,,,33.94,67.71,0,0,0,0,2020-01-24T00:00:00Z
3,Afghanistan,AF,,,,33.94,67.71,0,0,0,0,2020-01-25T00:00:00Z
4,Afghanistan,AF,,,,33.94,67.71,0,0,0,0,2020-01-26T00:00:00Z


In [7]:
covid_19_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 435984 entries, 0 to 435983
Data columns (total 12 columns):
Country        435984 non-null object
CountryCode    435984 non-null object
Province       435984 non-null object
City           435984 non-null object
CityCode       435984 non-null object
Lat            435984 non-null object
Lon            435984 non-null object
Confirmed      435984 non-null int64
Deaths         435984 non-null int64
Recovered      435984 non-null int64
Active         435984 non-null int64
Date           435984 non-null object
dtypes: int64(4), object(8)
memory usage: 39.9+ MB


In [8]:
covid_19_df.shape

(435984, 12)

In [9]:
covid_19_df.describe()

Unnamed: 0,Confirmed,Deaths,Recovered,Active
count,435984.0,435984.0,435984.0,435984.0
mean,532.963,34.123385,129.465373,369.4144
std,13270.11,920.83264,3307.065523,9972.054
min,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0
75%,13.0,0.0,0.0,12.0
max,1643246.0,97720.0,366736.0,1184027.0


In [19]:
covid_19_df.columns

Index(['Country', 'CountryCode', 'Province', 'City', 'CityCode', 'Lat', 'Lon',
       'Confirmed', 'Deaths', 'Recovered', 'Active', 'Date'],
      dtype='object')

## Statistical calculations

#### How many cases have there been in total?

In [43]:
# sum up by country using groupby function:
sum_case = covid_19_df.groupby(['Country'])[["Confirmed"]].max()

# suming up the confirm cases:
sum_case.Confirmed.sum()

5382575

#### What is the trend of number of cases, per country?

In [42]:
# The trend of number of cases, per country
No_Ccases = covid_19_df.groupby(['Date','Country']).agg({'Confirmed' : 'sum','Deaths' : "sum", 'Recovered' : "sum", 'Active' : "sum"})
No_Ccases

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Deaths,Recovered,Active
Date,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-22T00:00:00Z,Afghanistan,0,0,0,0
2020-01-22T00:00:00Z,Albania,0,0,0,0
2020-01-22T00:00:00Z,Algeria,0,0,0,0
2020-01-22T00:00:00Z,Andorra,0,0,0,0
2020-01-22T00:00:00Z,Angola,0,0,0,0
...,...,...,...,...,...
2020-05-24T00:00:00Z,Viet Nam,325,0,267,58
2020-05-24T00:00:00Z,Western Sahara,9,0,6,3
2020-05-24T00:00:00Z,Yemen,222,42,10,170
2020-05-24T00:00:00Z,Zambia,920,7,336,577


#### What is the % of people infected, per country

In [45]:
cases_country_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")
cases_country_df

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Australia,2020-05-26 02:32:35,-25.000000,133.000000,7126.0,102.0,6552.0,472.0,27.989332,,,1.431378,36,AUS
1,Austria,2020-05-26 02:32:35,47.516200,14.550100,16539.0,641.0,15138.0,760.0,183.636081,,,3.875688,40,AUT
2,Canada,2020-05-26 02:32:35,60.001000,-95.001000,87119.0,6655.0,44651.0,35813.0,230.134419,,,7.638977,124,CAN
3,China,2020-05-26 02:32:35,30.592800,114.305500,84102.0,4638.0,79352.0,112.0,5.987287,,,5.514732,156,CHN
4,Denmark,2020-05-26 02:32:35,56.263900,9.501800,11586.0,563.0,10162.0,861.0,200.027520,,,4.859313,208,DNK
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,West Bank and Gaza,2020-05-26 02:32:35,31.952200,35.233200,423.0,3.0,357.0,63.0,8.291815,,,0.709220,275,PSE
184,Western Sahara,2020-05-26 02:32:35,24.215500,-12.885800,9.0,0.0,6.0,3.0,1.506705,,,0.000000,732,ESH
185,Yemen,2020-05-26 02:32:35,15.552727,48.516388,233.0,44.0,10.0,179.0,0.781198,,,18.884120,887,YEM
186,Zambia,2020-05-26 02:32:35,-13.133897,27.849332,920.0,7.0,336.0,577.0,5.004364,,,0.760870,894,ZMB


In [49]:
global_cases = cases_country_df.copy().drop(['Country_Region', 'Last_Update', 'Lat','Long_', 
                                             'Incident_Rate', 'People_Hospitalized', 
                                             'Mortality_Rate', 'UID', 'ISO3', 'People_Tested'], axis=1)
gcase_summary = pd.DataFrame(global_cases.sum()).transpose()
gcase_summary.style.format("{:,.0f}")

Unnamed: 0,Confirmed,Deaths,Recovered,Active
0,5495061,346232,2231738,2950638
