# Import Libraries 

In [8]:
import pandas as pd 
import plotly.express as px 
import numpy as np

# Read Data

In [9]:
df = pd.read_csv('covid_19_data.csv')
df.head()

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7


# Data Info


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 321 entries, 0 to 320
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       321 non-null    object
 1   State      140 non-null    object
 2   Region     321 non-null    object
 3   Confirmed  321 non-null    int64 
 4   Deaths     321 non-null    int64 
 5   Recovered  321 non-null    int64 
dtypes: int64(3), object(3)
memory usage: 15.2+ KB


In [11]:
df.duplicated().sum()

0

In [12]:
df.isnull().sum()

Date           0
State        181
Region         0
Confirmed      0
Deaths         0
Recovered      0
dtype: int64

In [14]:
df.isnull().mean()*100

Date          0.000000
State        56.386293
Region        0.000000
Confirmed     0.000000
Deaths        0.000000
Recovered     0.000000
dtype: float64

In [15]:
df.drop(['State'] , axis = 1 , inplace = True )

In [17]:
df['Date'] = pd.to_datetime(df['Date'])

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 321 entries, 0 to 320
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       321 non-null    datetime64[ns]
 1   Region     321 non-null    object        
 2   Confirmed  321 non-null    int64         
 3   Deaths     321 non-null    int64         
 4   Recovered  321 non-null    int64         
dtypes: datetime64[ns](1), int64(3), object(1)
memory usage: 12.7+ KB


### Q. 1) Show the number of Confirmed, Deaths and Recovered cases in each Region.

In [20]:
df.groupby(['Region'])[['Confirmed','Deaths','Recovered']].sum()

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1939,60,252
Albania,766,30,455
Algeria,3848,444,1702
Andorra,743,42,423
Angola,27,2,7
...,...,...,...
West Bank and Gaza,344,2,71
Western Sahara,6,0,5
Yemen,6,0,1
Zambia,97,3,54


### Q. 2) Remove all the records where the Confirmed Cases is Less Than 10.

In [23]:
idx = df[df['Confirmed'] < 10].index
idx

Int64Index([ 18,  98, 105, 126, 140, 177, 178, 184, 192, 194, 203, 272, 284,
            285, 288, 289, 305],
           dtype='int64')

In [24]:
df.drop(idx , axis = 0 , inplace = True )

In [26]:
df.reset_index(inplace = True , drop = True )

In [27]:
df

Unnamed: 0,Date,Region,Confirmed,Deaths,Recovered
0,2020-04-29,Afghanistan,1939,60,252
1,2020-04-29,Albania,766,30,455
2,2020-04-29,Algeria,3848,444,1702
3,2020-04-29,Andorra,743,42,423
4,2020-04-29,Angola,27,2,7
...,...,...,...,...,...
299,2020-04-29,US,545,7,0
300,2020-04-29,Mainland China,76,3,73
301,2020-04-29,Canada,11,0,0
302,2020-04-29,Mainland China,185,2,181


### Q. 3) In which Region, maximum number of Confirmed cases were recorded ?

In [30]:
df.groupby(['Region'])['Confirmed'].sum().sort_values(ascending = False ).head(1)

Region
US    1039909
Name: Confirmed, dtype: int64

### Q. 4) In which Region, minimum number of Deaths cases were recorded ?

In [31]:
df.groupby(['Region'])['Deaths'].sum().sort_values( ).head(1)

Region
Cambodia    0
Name: Deaths, dtype: int64

### Q. 5) How many Confirmed, Deaths & Recovered cases were reported from India till 29 April 2020 ?

In [33]:
df['Date'].unique()

array(['2020-04-29T00:00:00.000000000'], dtype='datetime64[ns]')

In [36]:
sorted(df['Region'].unique().tolist())

['Afghanistan',
 'Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burma',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'Colombia',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czech Republic',
 'Denmark',
 'Diamond Princess',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Grenada',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Holy See',
 'Honduras',
 'Hong 

In [37]:
df[df['Region'] == 'India']

Unnamed: 0,Date,Region,Confirmed,Deaths,Recovered
73,2020-04-29,India,33062,1079,8437


### Q. 6-A ) Sort the entire data wrt No. of Confirmed cases in ascending order.

In [38]:
df.sort_values(by = 'Confirmed')

Unnamed: 0,Date,Region,Confirmed,Deaths,Recovered
151,2020-04-29,Suriname,10,1,8
69,2020-04-29,Holy See,10,0,2
58,2020-04-29,Gambia,10,1,8
301,2020-04-29,Canada,11,0,0
206,2020-04-29,Denmark,11,0,11
...,...,...,...,...,...
56,2020-04-29,France,165093,24087,48228
163,2020-04-29,UK,165221,26097,0
79,2020-04-29,Italy,203591,27682,71252
148,2020-04-29,Spain,236899,24275,132929


### Q. 6-B ) Sort the entire data wrt No. of Recovered cases in descending order.

In [39]:
df.sort_values(by = 'Recovered' , ascending = False )

Unnamed: 0,Date,Region,Confirmed,Deaths,Recovered
148,2020-04-29,Spain,236899,24275,132929
60,2020-04-29,Germany,161539,6467,120400
75,2020-04-29,Iran,93657,5957,73791
79,2020-04-29,Italy,203591,27682,71252
218,2020-04-29,Mainland China,68128,4512,63616
...,...,...,...,...,...
247,2020-04-29,US,4934,230,0
246,2020-04-29,US,3851,56,0
244,2020-04-29,US,451,16,0
243,2020-04-29,US,7660,338,0
