# **Analysis of Covid-19 dataset**

In [1]:
# importing required libraries

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
data = pd.read_csv(r'/content/Covid data analysis.csv')
data

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7
...,...,...,...,...,...,...
316,4/29/2020,Wyoming,US,545,7,0
317,4/29/2020,Xinjiang,Mainland China,76,3,73
318,4/29/2020,Yukon,Canada,11,0,0
319,4/29/2020,Yunnan,Mainland China,185,2,181


In [16]:
data.count()

Date         321
State        140
Region       321
Confirmed    321
Deaths       321
Recovered    321
dtype: int64

In [17]:
data.isnull().sum()

Date           0
State        181
Region         0
Confirmed      0
Deaths         0
Recovered      0
dtype: int64

# **Show the numbers of confirmed, Deaths and recovered cases in each region**

In [18]:
data.head(4)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423


In [19]:
data.groupby(['Region']).sum()

  data.groupby(['Region']).sum()


Unnamed: 0_level_0,Confirmed,Deaths,Recovered
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1939,60,252
Albania,766,30,455
Algeria,3848,444,1702
Andorra,743,42,423
Angola,27,2,7
...,...,...,...
West Bank and Gaza,344,2,71
Western Sahara,6,0,5
Yemen,6,0,1
Zambia,97,3,54


In [27]:
data.groupby(['Region'])['Confirmed'].sum()

Region
Afghanistan           1939
Albania                766
Algeria               3848
Andorra                743
Angola                  27
                      ... 
West Bank and Gaza     344
Western Sahara           6
Yemen                    6
Zambia                  97
Zimbabwe                32
Name: Confirmed, Length: 187, dtype: int64

In [32]:
# sorting with numbers

data.groupby(['Region'])['Confirmed'].sum().sort_values(ascending=False).head(10)

Region
US                1039909
Spain              236899
Italy              203591
France             166543
UK                 166441
Germany            161539
Turkey             117589
Russia              99399
Iran                93657
Mainland China      82862
Name: Confirmed, dtype: int64

In [35]:
data.groupby(['Region'])['Confirmed','Deaths'].sum().sort_values(by='Confirmed', ascending=False).head(10)

  data.groupby(['Region'])['Confirmed','Deaths'].sum().sort_values(by='Confirmed', ascending=False).head(10)


Unnamed: 0_level_0,Confirmed,Deaths
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
US,1039909,60967
Spain,236899,24275
Italy,203591,27682
France,166543,24121
UK,166441,26166
Germany,161539,6467
Turkey,117589,3081
Russia,99399,972
Iran,93657,5957
Mainland China,82862,4633


# **Remove all the recorders where confirmed cases is less than 10**

In [37]:
data.head()

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7


In [44]:
data = data[~(data['Confirmed'] < 10)]
data

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7
...,...,...,...,...,...,...
316,4/29/2020,Wyoming,US,545,7,0
317,4/29/2020,Xinjiang,Mainland China,76,3,73
318,4/29/2020,Yukon,Canada,11,0,0
319,4/29/2020,Yunnan,Mainland China,185,2,181


# **In which region, maximum number of confirmed cases were recorded ?**

In [46]:
data.head()

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7


In [52]:
# Maximum confirmed cases are recorded

data.groupby('Region')['Confirmed'].sum().sort_values(ascending=False).head(10)

Region
US                1039909
Spain              236899
Italy              203591
France             166536
UK                 166432
Germany            161539
Turkey             117589
Russia              99399
Iran                93657
Mainland China      82861
Name: Confirmed, dtype: int64

In [53]:
# Minimum confirmed cases are recorded

data.groupby('Region')['Confirmed'].sum().sort_values(ascending=True).head(10)

Region
Suriname                            10
Gambia                              10
Holy See                            10
Burundi                             11
Seychelles                          11
Nicaragua                           13
Saint Kitts and Nevis               15
Dominica                            16
Namibia                             16
Saint Vincent and the Grenadines    16
Name: Confirmed, dtype: int64

# **In which region, minimum number of deaths cases were recorded ?**

In [54]:
data.groupby('Region')['Deaths'].sum().sort_values(ascending=True).head(10)

Region
Cambodia                    0
Seychelles                  0
Saint Lucia                 0
Central African Republic    0
Saint Kitts and Nevis       0
South Sudan                 0
Rwanda                      0
Grenada                     0
Macau                       0
Madagascar                  0
Name: Deaths, dtype: int64

# **How many confirmed, deaths and recovered cases were reported in India till 29 April 2020 ?**

In [55]:
data.head()

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7


In [61]:
data.dtypes

Date         object
State        object
Region       object
Confirmed     int64
Deaths        int64
Recovered     int64
dtype: object

In [62]:
# Converting Date column into datetime format

data['Date'] = pd.to_datetime(data['Date'])

In [63]:
data.dtypes

Date         datetime64[ns]
State                object
Region               object
Confirmed             int64
Deaths                int64
Recovered             int64
dtype: object

In [64]:
data.head(2)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,2020-04-29,,Afghanistan,1939,60,252
1,2020-04-29,,Albania,766,30,455


In [65]:
data[data['Region'] == 'India']

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
74,2020-04-29,,India,33062,1079,8437


# **Sort the entire data by confirmed cases in ascending order**

In [66]:
data.head(2)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,2020-04-29,,Afghanistan,1939,60,252
1,2020-04-29,,Albania,766,30,455


In [71]:
data.sort_values(by='Confirmed', ascending=True).head(20)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
156,2020-04-29,,Suriname,10,1,8
70,2020-04-29,,Holy See,10,0,2
59,2020-04-29,,Gambia,10,1,8
318,2020-04-29,Yukon,Canada,11,0,0
217,2020-04-29,Greenland,Denmark,11,0,11
256,2020-04-29,Montserrat,UK,11,1,2
144,2020-04-29,,Seychelles,11,0,6
27,2020-04-29,,Burundi,11,1,4
306,2020-04-29,Turks and Caicos Islands,UK,12,1,5
118,2020-04-29,,Nicaragua,13,3,7


# **Sort the entire data by recovered cases in ascending order**

In [76]:
data.sort_values(by='Recovered', ascending=False).head(20)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
153,2020-04-29,,Spain,236899,24275,132929
61,2020-04-29,,Germany,161539,6467,120400
76,2020-04-29,,Iran,93657,5957,73791
80,2020-04-29,,Italy,203591,27682,71252
229,2020-04-29,Hubei,Mainland China,68128,4512,63616
57,2020-04-29,,France,165093,24087,48228
167,2020-04-29,,Turkey,117589,3081,44040
22,2020-04-29,,Brazil,79685,5513,34132
158,2020-04-29,,Switzerland,29407,1716,22600
78,2020-04-29,,Ireland,20253,1190,13386
