Import relevant libraries

In [49]:
import numpy as np
import pandas as pd
from scipy import stats

Load data into dataframe

In [None]:
covid_data = pd.read_csv("covid-data.csv")

Inspect the dataframe

In [51]:
covid_data.head(5)
# it will give just first 5 rows

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,24/02/2020,5,5,,,,,...,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,25/02/2020,5,0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,26/02/2020,5,0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,27/02/2020,5,0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,28/02/2020,5,0,,,,,...,,,37.746,0.5,64.83,0.511,,,,


In [52]:
covid_data = covid_data[['iso_code','continent','location','date','total_cases','new_cases']]
# it will give only 6 columns

In [53]:
covid_data.head(5)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases
0,AFG,Asia,Afghanistan,24/02/2020,5,5
1,AFG,Asia,Afghanistan,25/02/2020,5,0
2,AFG,Asia,Afghanistan,26/02/2020,5,0
3,AFG,Asia,Afghanistan,27/02/2020,5,0
4,AFG,Asia,Afghanistan,28/02/2020,5,0


In [54]:
# directly copy that part to you clip board
covid_data.head(5).to_clipboard()

In [55]:
covid_data.dtypes # for datatypes

iso_code       object
continent      object
location       object
date           object
total_cases     int64
new_cases       int64
dtype: object

In [56]:
# will copy datatypes to clipboard
covid_data.dtypes.to_clipboard()

In [57]:
covid_data.shape # it will give the shape of original covid_data but with obly 6 columns

(5818, 6)

Analysing the mean of a dataset

In [58]:
data_mean = np.mean(covid_data["new_cases"])
data_mean

np.float64(8814.365761430045)

Checking the median of a dataset

In [59]:
data_median = np.median(covid_data["new_cases"])
data_median

np.float64(261.0)

Identifying the standard deviation of a dataset

In [60]:
data_sd = np.std(covid_data["new_cases"])
data_sd

np.float64(21244.338444114834)

In [61]:
# Same concept, but Pandas uses a slightly different formula (divides by n-1 instead of n), 
# so you’ll see a tiny difference:
covid_data["new_cases"].std()

np.float64(21246.164421895)

Checking the variance of a dataset

In [62]:
# square of std it measures allover data spread
data_variance = np.var(covid_data["new_cases"])
data_variance

np.float64(451321915.9280954)

In [63]:
covid_data["new_cases"].var()
# (again a small difference due to n vs n-1).

np.float64(451399502.6421969)

Identifying the percentile of a dataset

In [64]:
data_percentile = np.percentile(covid_data["new_cases"],60)
data_percentile

np.float64(591.3999999999996)

Checking the quartile of a dataset

In [65]:
data_quartile = np.quantile(covid_data["new_cases"],0.75)
data_quartile

np.float64(3666.0)

Generating the range of a dataset

In [66]:
data_max = np.max(covid_data["new_cases"])
data_min = np.min(covid_data["new_cases"])

In [67]:
print(data_max,data_min)

287149 0


In [68]:
data_range = data_max - data_min
data_range

np.int64(287149)

In [69]:
data_max = covid_data["new_cases"].max()
data_min = covid_data["new_cases"].min()

In [70]:
data_range = data_max - data_min
data_range

np.int64(287149)

Analysing the mode of a dataset

In [71]:
data_mode = stats.mode(covid_data["new_cases"])
data_mode
# Meaning → 0 occurs 805 times → it’s the most common value.

ModeResult(mode=np.int64(0), count=np.int64(805))

In [72]:
data_mode[0]

np.int64(0)

In [None]:
data_mode = stats.mode(covid_data["continent"])
data_mode
# it will show an error strings are not suppoerted in this version

Analysing the interquartile range (IQR) of a dataset

In [74]:
data_IQR = stats.iqr(covid_data["new_cases"], interpolation = 'midpoint')
data_IQR

np.float64(3641.0)

In [75]:
data_IQR = stats.iqr(covid_data["new_cases"])
data_IQR

np.float64(3642.0)