# Covid19 Data Analysis Notebook
------------------------------------------

### Let's Import the modules 

In [2]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 

### Importing covid19 dataset


In [4]:
corona_deaths_csv = pd.read_csv("covid19_deaths_dataset.csv")
corona_deaths_csv.head(10)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,36,40,42,43,47,50,57,58,60,64
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,26,27,27,27,27,28,28,30,30,31
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,392,402,407,415,419,425,432,437,444,450
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,37,37,37,40,40,40,40,41,42,42
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
5,,Antigua and Barbuda,17.0608,-61.7964,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
6,,Argentina,-38.4161,-63.6167,0,0,0,0,0,0,...,147,152,165,176,185,192,197,207,214,218
7,,Armenia,40.0691,45.0382,0,0,0,0,0,0,...,24,24,24,27,28,28,29,30,30,32
8,Australian Capital Territory,Australia,-35.4735,149.0124,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
9,New South Wales,Australia,-33.8688,151.2093,0,0,0,0,0,0,...,26,26,31,33,33,34,34,39,40,41


#### Let's check the shape of the dataframe

In [5]:
corona_deaths_csv.shape

(266, 104)

### Delete the useless columns

In [6]:
corona_deaths_csv.drop(["Lat","Long"],axis=1,inplace=True)

In [7]:
corona_deaths_csv.head(10)

Unnamed: 0,Province/State,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20
0,,Afghanistan,0,0,0,0,0,0,0,0,...,36,40,42,43,47,50,57,58,60,64
1,,Albania,0,0,0,0,0,0,0,0,...,26,27,27,27,27,28,28,30,30,31
2,,Algeria,0,0,0,0,0,0,0,0,...,392,402,407,415,419,425,432,437,444,450
3,,Andorra,0,0,0,0,0,0,0,0,...,37,37,37,40,40,40,40,41,42,42
4,,Angola,0,0,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
5,,Antigua and Barbuda,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
6,,Argentina,0,0,0,0,0,0,0,0,...,147,152,165,176,185,192,197,207,214,218
7,,Armenia,0,0,0,0,0,0,0,0,...,24,24,24,27,28,28,29,30,30,32
8,Australian Capital Territory,Australia,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
9,New South Wales,Australia,0,0,0,0,0,0,0,0,...,26,26,31,33,33,34,34,39,40,41


### Aggregating the rows by the country

In [8]:
corona_deaths_aggregated = corona_deaths_csv.groupby("Country/Region").sum()

In [9]:
corona_deaths_aggregated.head(10)

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,36,40,42,43,47,50,57,58,60,64
Albania,0,0,0,0,0,0,0,0,0,0,...,26,27,27,27,27,28,28,30,30,31
Algeria,0,0,0,0,0,0,0,0,0,0,...,392,402,407,415,419,425,432,437,444,450
Andorra,0,0,0,0,0,0,0,0,0,0,...,37,37,37,40,40,40,40,41,42,42
Angola,0,0,0,0,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
Antigua and Barbuda,0,0,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
Argentina,0,0,0,0,0,0,0,0,0,0,...,147,152,165,176,185,192,197,207,214,218
Armenia,0,0,0,0,0,0,0,0,0,0,...,24,24,24,27,28,28,29,30,30,32
Australia,0,0,0,0,0,0,0,0,0,0,...,67,67,75,79,80,83,83,89,91,93
Austria,0,0,0,0,0,0,0,0,0,0,...,491,510,522,530,536,542,549,569,580,584


In [10]:
corona_deaths_aggregated.shape

(187, 100)

### Visualizing data related to a country for example Kazakhstan

In [15]:
corona_deaths_aggregated.loc["Kazakhstan"].plot()


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

### Calculating a good measure  

In [26]:
corona_deaths_aggregated.loc['Kazakhstan'][:3].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

### Calculating the first derivative of the curve

In [20]:
corona_deaths_aggregated.loc["Kazakhstan"].diff().plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

### Find maxmimum death rate for Kazakhstan

In [30]:
corona_deaths_aggregated.loc["Kazakhstan"].diff().max()

5.0

### Find maximum infection rate for all of the countries. 

In [32]:
countries=list(corona_deaths_aggregated.index)
max_death_rates = []
for c in countries :
    max_death_rates.append(corona_deaths_aggregated.loc[c].diff().max())
corona_deaths_aggregated["max_death_rate"]=max_infection_rates

In [33]:
corona_deaths_aggregated.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,max_infection_rate,max_death_rate
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,42,43,47,50,57,58,60,64,7.0,7.0
Albania,0,0,0,0,0,0,0,0,0,0,...,27,27,27,28,28,30,30,31,4.0,4.0
Algeria,0,0,0,0,0,0,0,0,0,0,...,407,415,419,425,432,437,444,450,30.0,30.0
Andorra,0,0,0,0,0,0,0,0,0,0,...,37,40,40,40,40,41,42,42,4.0,4.0
Angola,0,0,0,0,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2.0,2.0


### Create a new dataframe with only needed column 

In [34]:
corona_data=pd.DataFrame(corona_deaths_aggregated["max_death_rate"])

In [35]:
corona_data.head()

Unnamed: 0_level_0,max_death_rate
Country/Region,Unnamed: 1_level_1
Afghanistan,7.0
Albania,4.0
Algeria,30.0
Andorra,4.0
Angola,2.0


### Importing the happiness dataset

In [36]:
happines_report_csv=pd.read_csv("worldwide_happiness_report.csv")

In [37]:
happines_report_csv.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


### Let's drop the useless columns 

In [38]:
useless_cols=["Overall rank","Score","Generosity","Perceptions of corruption"]

In [39]:
happines_report_csv.drop(useless_cols,axis=1,inplace=True)
happines_report_csv.head()

Unnamed: 0,Country or region,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices
0,Finland,1.34,1.587,0.986,0.596
1,Denmark,1.383,1.573,0.996,0.592
2,Norway,1.488,1.582,1.028,0.603
3,Iceland,1.38,1.624,1.026,0.591
4,Netherlands,1.396,1.522,0.999,0.557


### Changing the indices of the dataframe

In [40]:
happiness_data=happines_report_csv.set_index("Country or region")
happiness_data.head()

Unnamed: 0_level_0,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices
Country or region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Finland,1.34,1.587,0.986,0.596
Denmark,1.383,1.573,0.996,0.592
Norway,1.488,1.582,1.028,0.603
Iceland,1.38,1.624,1.026,0.591
Netherlands,1.396,1.522,0.999,0.557


### Now let's join two dataset we have prepared  

#### Corona Dataset :

In [41]:
corona_data.head()

Unnamed: 0_level_0,max_death_rate
Country/Region,Unnamed: 1_level_1
Afghanistan,7.0
Albania,4.0
Algeria,30.0
Andorra,4.0
Angola,2.0


#### wolrd happiness report Dataset :

In [42]:
happiness_data.head()

Unnamed: 0_level_0,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices
Country or region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Finland,1.34,1.587,0.986,0.596
Denmark,1.383,1.573,0.996,0.592
Norway,1.488,1.582,1.028,0.603
Iceland,1.38,1.624,1.026,0.591
Netherlands,1.396,1.522,0.999,0.557


In [43]:
data = corona_data.join(happiness_data,how="inner")
data.head()

Unnamed: 0,max_death_rate,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices
Afghanistan,7.0,0.35,0.517,0.361,0.0
Albania,4.0,0.947,0.848,0.874,0.383
Algeria,30.0,1.002,1.16,0.785,0.086
Argentina,13.0,1.092,1.432,0.881,0.471
Armenia,3.0,0.85,1.055,0.815,0.283


### Correlation matrix 

In [44]:
data.corr()

Unnamed: 0,max_death_rate,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices
max_death_rate,1.0,0.259893,0.204148,0.309666,0.080166
GDP per capita,0.259893,1.0,0.759468,0.863062,0.394603
Social support,0.204148,0.759468,1.0,0.765286,0.456246
Healthy life expectancy,0.309666,0.863062,0.765286,1.0,0.427892
Freedom to make life choices,0.080166,0.394603,0.456246,0.427892,1.0


### Visualization of the results
our Analysis is not finished unless we visualize the results in terms figures and graphs so that everyone can understand what you get out of our analysis

In [45]:
data.head()

Unnamed: 0,max_death_rate,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices
Afghanistan,7.0,0.35,0.517,0.361,0.0
Albania,4.0,0.947,0.848,0.874,0.383
Algeria,30.0,1.002,1.16,0.785,0.086
Argentina,13.0,1.092,1.432,0.881,0.471
Armenia,3.0,0.85,1.055,0.815,0.283


### Plotting GDP vs maximum Death rate

In [48]:
x = data["GDP per capita"]
y = data["max_death_rate"]
sns.scatterplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

In [49]:
sns.regplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)
  overwrite_input, interpolation)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

### Plotting Social support vs maximum Death rate

In [50]:
x = data["Social support"]
y = data["max_death_rate"]
sns.scatterplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

In [51]:
sns.regplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)
  overwrite_input, interpolation)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

### Plotting Healthy life expectancy vs maximum Death rate

In [54]:
x = data["Healthy life expectancy"]
y = data["max_death_rate"]
sns.scatterplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

In [52]:
sns.regplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)
  overwrite_input, interpolation)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

### Task 5.4: Plotting Freedom to make life choices vs maximum death rate

In [55]:
x = data["Freedom to make life choices"]
y = data["max_death_rate"]
sns.scatterplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>

In [53]:
sns.regplot(x,np.log(y))

  result = getattr(ufunc, method)(*inputs, **kwargs)
  overwrite_input, interpolation)


<matplotlib.axes._subplots.AxesSubplot at 0x7f457615c190>