#### Importing python libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Problem Statement

- We have taken data from the website given below.
- Try to create the dashboard as shown on website using power bi.

#### Reading datasets

`datasets_link` :- https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

In [2]:
confirmed = pd.read_csv('time_series_covid19_confirmed_global.csv')
deaths = pd.read_csv('time_series_covid19_deaths_global.csv')
recovered = pd.read_csv('time_series_covid19_recovered_global.csv')

In [3]:
recovered

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
270,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
271,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
272,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Data Cleaning and Preprocessing

- After looking at all dataframes.
- We have to create a seperate dataframe for countries.
- And the data in remaining dataframe are given in wide format that is difficult to access and work upon it.
- Convert this wide data format to long data format.

In [4]:
confirmed

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,209322,209340,209358,209362,209369,209390,209406,209436,209451,209451
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,334391,334408,334408,334427,334427,334427,334427,334427,334443,334457
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,271441,271448,271463,271469,271469,271477,271477,271490,271494,271496
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,47866,47875,47875,47875,47875,47875,47875,47875,47890,47890
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,105255,105277,105277,105277,105277,105277,105277,105277,105288,105288
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,703228,703228,703228,703228,703228,703228,703228,703228,703228,703228
285,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,535,535,535,535,535,535,535,535,535,535
286,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,11945,11945,11945,11945,11945,11945,11945,11945,11945,11945
287,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,343012,343012,343079,343079,343079,343135,343135,343135,343135,343135


#### Location dataframe 
holding countries "Names" and "coordinates"

In [5]:
# Create seperate df for countries

location = confirmed[['Country/Region', 'Lat', 'Long']]
location

Unnamed: 0,Country/Region,Lat,Long
0,Afghanistan,33.939110,67.709953
1,Albania,41.153300,20.168300
2,Algeria,28.033900,1.659600
3,Andorra,42.506300,1.521800
4,Angola,-11.202700,17.873900
...,...,...,...
284,West Bank and Gaza,31.952200,35.233200
285,Winter Olympics 2022,39.904200,116.407400
286,Yemen,15.552727,48.516388
287,Zambia,-13.133897,27.849332


In [6]:
location.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 289 entries, 0 to 288
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country/Region  289 non-null    object 
 1   Lat             287 non-null    float64
 2   Long            287 non-null    float64
dtypes: float64(2), object(1)
memory usage: 6.9+ KB


In [7]:
# there are certain countries names are repeated because they are shown using provinces
# now if we remove provinces we have to group by data according to country
# when we group data that value is been filled by mean of its another coordinates

location[location.Lat.isna()]

Unnamed: 0,Country/Region,Lat,Long
53,Canada,,
89,China,,


In [8]:
# rename 'country/region' column to just 'country'
location.rename(columns={'Country/Region':'Country'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location.rename(columns={'Country/Region':'Country'}, inplace=True)


In [9]:
location.nunique()

Country    201
Lat        283
Long       284
dtype: int64

In [10]:
location = location.groupby('Country').agg('mean').reset_index()

In [11]:
location.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201 entries, 0 to 200
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  201 non-null    object 
 1   Lat      201 non-null    float64
 2   Long     201 non-null    float64
dtypes: float64(2), object(1)
memory usage: 4.8+ KB


#### Confirmed cases dataframe

In [12]:
confirmed

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,209322,209340,209358,209362,209369,209390,209406,209436,209451,209451
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,334391,334408,334408,334427,334427,334427,334427,334427,334443,334457
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,271441,271448,271463,271469,271469,271477,271477,271490,271494,271496
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,47866,47875,47875,47875,47875,47875,47875,47875,47890,47890
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,105255,105277,105277,105277,105277,105277,105277,105277,105288,105288
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,703228,703228,703228,703228,703228,703228,703228,703228,703228,703228
285,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,535,535,535,535,535,535,535,535,535,535
286,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,11945,11945,11945,11945,11945,11945,11945,11945,11945,11945
287,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,343012,343012,343079,343079,343079,343135,343135,343135,343135,343135


In [13]:
confirmed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 289 entries, 0 to 288
Columns: 1147 entries, Province/State to 3/9/23
dtypes: float64(2), int64(1143), object(2)
memory usage: 2.5+ MB


In [14]:
confirmed.isna().sum()/ len(confirmed) * 100

Province/State    68.512111
Country/Region     0.000000
Lat                0.692042
Long               0.692042
1/22/20            0.000000
                    ...    
3/5/23             0.000000
3/6/23             0.000000
3/7/23             0.000000
3/8/23             0.000000
3/9/23             0.000000
Length: 1147, dtype: float64

In [15]:
# the "Province/State" column contains information about the states of countries which has larger area and population
# We just want cases according to country not by its state and province.
# so, no need of that column.

confirmed.drop(columns=['Province/State', 'Lat', 'Long'], axis='columns', inplace=True)
confirmed

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,209322,209340,209358,209362,209369,209390,209406,209436,209451,209451
1,Albania,0,0,0,0,0,0,0,0,0,...,334391,334408,334408,334427,334427,334427,334427,334427,334443,334457
2,Algeria,0,0,0,0,0,0,0,0,0,...,271441,271448,271463,271469,271469,271477,271477,271490,271494,271496
3,Andorra,0,0,0,0,0,0,0,0,0,...,47866,47875,47875,47875,47875,47875,47875,47875,47890,47890
4,Angola,0,0,0,0,0,0,0,0,0,...,105255,105277,105277,105277,105277,105277,105277,105277,105288,105288
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284,West Bank and Gaza,0,0,0,0,0,0,0,0,0,...,703228,703228,703228,703228,703228,703228,703228,703228,703228,703228
285,Winter Olympics 2022,0,0,0,0,0,0,0,0,0,...,535,535,535,535,535,535,535,535,535,535
286,Yemen,0,0,0,0,0,0,0,0,0,...,11945,11945,11945,11945,11945,11945,11945,11945,11945,11945
287,Zambia,0,0,0,0,0,0,0,0,0,...,343012,343012,343079,343079,343079,343135,343135,343135,343135,343135


In [16]:
# create a dict for below group by aggregation function
cols = ['Lat', 'Long']
agg_fun = {col:('mean' if col in cols else 'sum') for col in confirmed.columns if col !='Country/Region'}

confirmed_df = confirmed.groupby('Country/Region').agg(agg_fun)

In [17]:
confirmed_df.reset_index(inplace=True)

In [18]:
confirmed_df = confirmed_df.melt(id_vars=['Country/Region'], var_name='Date', value_name='Confirmed_case')

In [19]:
confirmed_df.rename(columns = {'Country/Region': 'Country'}, inplace=True)
confirmed_df

Unnamed: 0,Country,Date,Confirmed_case
0,Afghanistan,1/22/20,0
1,Albania,1/22/20,0
2,Algeria,1/22/20,0
3,Andorra,1/22/20,0
4,Angola,1/22/20,0
...,...,...,...
229738,West Bank and Gaza,3/9/23,703228
229739,Winter Olympics 2022,3/9/23,535
229740,Yemen,3/9/23,11945
229741,Zambia,3/9/23,343135


In [20]:
confirmed_df.shape

(229743, 3)

In [21]:
confirmed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229743 entries, 0 to 229742
Data columns (total 3 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   Country         229743 non-null  object
 1   Date            229743 non-null  object
 2   Confirmed_case  229743 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 5.3+ MB


In [22]:
# calculate unique countries in dataframe.
confirmed_df.Country.nunique()

201

In [23]:
# check for duplicates
confirmed_df[confirmed_df.duplicated()]

Unnamed: 0,Country,Date,Confirmed_case


In [24]:
confirmed_df.Date = pd.to_datetime(confirmed_df.Date)

  confirmed_df.Date = pd.to_datetime(confirmed_df.Date)


In [25]:
confirmed_df.Confirmed_case.sum()

# the value of total cases is too high.
# in confirmed cases they adds cummulative sum for each day.
# calculate the total cases equals to website
# we also wants daily confirmed cases

316910296319

In [26]:
confirmed_df[confirmed_df.Country == 'India']

Unnamed: 0,Country,Date,Confirmed_case
80,India,2020-01-22,0
281,India,2020-01-23,0
482,India,2020-01-24,0
683,India,2020-01-25,0
884,India,2020-01-26,0
...,...,...,...
228818,India,2023-03-05,44689327
229019,India,2023-03-06,44689593
229220,India,2023-03-07,44689919
229421,India,2023-03-08,44690298


In [27]:
confirmed_df.sort_values(['Country','Date'], inplace=True, ignore_index=True)

In [28]:
confirmed_df[confirmed_df.Country == 'India']

Unnamed: 0,Country,Date,Confirmed_case
91440,India,2020-01-22,0
91441,India,2020-01-23,0
91442,India,2020-01-24,0
91443,India,2020-01-25,0
91444,India,2020-01-26,0
...,...,...,...
92578,India,2023-03-05,44689327
92579,India,2023-03-06,44689593
92580,India,2023-03-07,44689919
92581,India,2023-03-08,44690298


In [29]:
confirmed_df.Confirmed_case.diff()  # Calculates the difference of a Series element compared with another element in the Series

0           NaN
1           0.0
2           0.0
3           0.0
4           0.0
          ...  
229738      0.0
229739      0.0
229740      0.0
229741    149.0
229742      0.0
Name: Confirmed_case, Length: 229743, dtype: float64

In [30]:
# grouped by countries and calculates the difference.
country = confirmed_df.groupby('Country') 
country.Confirmed_case.diff().sum()
# this difference is approximately equals to that is shown on website

676569592.0

In [31]:
confirmed_df['Daily_confirmed'] = country.Confirmed_case.diff()
confirmed_df

Unnamed: 0,Country,Date,Confirmed_case,Daily_confirmed
0,Afghanistan,2020-01-22,0,
1,Afghanistan,2020-01-23,0,0.0
2,Afghanistan,2020-01-24,0,0.0
3,Afghanistan,2020-01-25,0,0.0
4,Afghanistan,2020-01-26,0,0.0
...,...,...,...,...
229738,Zimbabwe,2023-03-05,264127,0.0
229739,Zimbabwe,2023-03-06,264127,0.0
229740,Zimbabwe,2023-03-07,264127,0.0
229741,Zimbabwe,2023-03-08,264276,149.0


In [32]:
confirmed_df[confirmed_df.Country == 'India']

Unnamed: 0,Country,Date,Confirmed_case,Daily_confirmed
91440,India,2020-01-22,0,
91441,India,2020-01-23,0,0.0
91442,India,2020-01-24,0,0.0
91443,India,2020-01-25,0,0.0
91444,India,2020-01-26,0,0.0
...,...,...,...,...
92578,India,2023-03-05,44689327,281.0
92579,India,2023-03-06,44689593,266.0
92580,India,2023-03-07,44689919,326.0
92581,India,2023-03-08,44690298,379.0


In [33]:
confirmed_df.Daily_confirmed.sum() 

676569592.0

In [34]:
confirmed_df.isna().sum()

Country              0
Date                 0
Confirmed_case       0
Daily_confirmed    201
dtype: int64

In [35]:
confirmed_df[confirmed_df.Daily_confirmed.isna()]

# the first value of each country is null
# basically it's because of diff() operation that we used above.
# generally for most of the countries the first value at 2020-01-22 is 0 except some countries where covid starts early.
# filling those nan values with null won't affect our results

Unnamed: 0,Country,Date,Confirmed_case,Daily_confirmed
0,Afghanistan,2020-01-22,0,
1143,Albania,2020-01-22,0,
2286,Algeria,2020-01-22,0,
3429,Andorra,2020-01-22,0,
4572,Angola,2020-01-22,0,
...,...,...,...,...
224028,West Bank and Gaza,2020-01-22,0,
225171,Winter Olympics 2022,2020-01-22,0,
226314,Yemen,2020-01-22,0,
227457,Zambia,2020-01-22,0,


In [36]:
confirmed_df.fillna(0, inplace=True)

In [37]:
confirmed_df.isna().sum()

Country            0
Date               0
Confirmed_case     0
Daily_confirmed    0
dtype: int64

#### Death cases dataframe
perform the same operation on deaths dataframe

In [38]:
deaths

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,7896,7896,7896,7896,7896,7896,7896,7896,7896,7896
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,3598,3598,3598,3598,3598,3598,3598,3598,3598,3598
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,6881,6881,6881,6881,6881,6881,6881,6881,6881,6881
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,165,165,165,165,165,165,165,165,165,165
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,1933,1933,1933,1933,1933,1933,1933,1933,1933,1933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,5708,5708,5708,5708,5708,5708,5708,5708,5708,5708
285,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
286,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,2159,2159,2159,2159,2159,2159,2159,2159,2159,2159
287,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,4057,4057,4057,4057,4057,4057,4057,4057,4057,4057


In [39]:
# drop "Province/State" column
deaths.drop(columns=['Province/State', 'Lat', 'Long'], inplace=True)

# rename country column
deaths.rename(columns={'Country/Region':'Country'}, inplace=True)

In [40]:
deaths_df = deaths.groupby('Country').agg(agg_fun).reset_index()

In [41]:
# convert wide data to long data
deaths_df = deaths_df.melt(id_vars=['Country'], var_name='Date', value_name='Confirmed_deaths')

In [42]:
deaths_df.info() # here my dataset contains very much nan values so i'm  replacing those two columns with confirmed df columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229743 entries, 0 to 229742
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   Country           229743 non-null  object
 1   Date              229743 non-null  object
 2   Confirmed_deaths  229743 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 5.3+ MB


In [43]:
# change datatype of "Date" column
deaths_df['Date'] = pd.to_datetime(deaths_df['Date']) 

# sort values according country and date
deaths_df.sort_values(['Country', 'Date'], inplace=True, ignore_index=True)

  deaths_df['Date'] = pd.to_datetime(deaths_df['Date'])


In [44]:
death_gr = deaths_df.groupby('Country')
death_gr.Confirmed_deaths.diff().sum()

# deaths are approximately equals to shown on website

6881785.0

In [45]:
deaths_df['Daily_deaths'] = death_gr.Confirmed_deaths.diff()

# the first value for every country is set to null because of diff() operation.
# fill it with 0.
deaths_df.fillna(0, inplace=True) 

In [46]:
deaths_df

Unnamed: 0,Country,Date,Confirmed_deaths,Daily_deaths
0,Afghanistan,2020-01-22,0,0.0
1,Afghanistan,2020-01-23,0,0.0
2,Afghanistan,2020-01-24,0,0.0
3,Afghanistan,2020-01-25,0,0.0
4,Afghanistan,2020-01-26,0,0.0
...,...,...,...,...
229738,Zimbabwe,2023-03-05,5668,0.0
229739,Zimbabwe,2023-03-06,5668,0.0
229740,Zimbabwe,2023-03-07,5668,0.0
229741,Zimbabwe,2023-03-08,5671,3.0


#### recovererd cases dataframe

In [47]:
recovered

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
270,,Winter Olympics 2022,39.904200,116.407400,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
271,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
272,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
recovered.drop(['Province/State', 'Lat', 'Long'], axis=1, inplace=True)

recovered.rename({'Country/Region': 'Country'}, axis=1, inplace=True)

In [49]:
recovered_df = recovered.groupby('Country').agg(agg_fun).reset_index()

In [50]:
recovered_df = recovered_df.melt(id_vars=['Country'], var_name='Date', value_name='Recovered_cases')

recovered_df['Date'] = pd.to_datetime(recovered_df['Date'])

recovered_df.sort_values(['Country', 'Date'], inplace=True)

  recovered_df['Date'] = pd.to_datetime(recovered_df['Date'])


In [51]:
 recovered_df = recovered_df[recovered_df['Date'] <= pd.to_datetime('2021-08-04')]

In [52]:
recovered_df.Recovered_cases.sum()

23491455947

In [53]:
country_gr = recovered_df.groupby('Country')
country_gr.Recovered_cases.diff().sum()

130899031.0

In [54]:
recovered_df[recovered_df['Country'] == 'India']

# in the recovered dataset the data is filled upto 04 August 2021
# so, we just calculate upto that date

Unnamed: 0,Country,Date,Recovered_cases
80,India,2020-01-22,0
281,India,2020-01-23,0
482,India,2020-01-24,0
683,India,2020-01-25,0
884,India,2020-01-26,0
...,...,...,...
111836,India,2021-07-31,30820521
112037,India,2021-08-01,30857467
112238,India,2021-08-02,30896354
112439,India,2021-08-03,30933022


In [55]:
recovered_df['Daily_recovered'] = country_gr.Recovered_cases.diff()
recovered_df.fillna(0, inplace=True)

In [56]:
recovered_df

Unnamed: 0,Country,Date,Recovered_cases,Daily_recovered
0,Afghanistan,2020-01-22,0,0.0
201,Afghanistan,2020-01-23,0,0.0
402,Afghanistan,2020-01-24,0,0.0
603,Afghanistan,2020-01-25,0,0.0
804,Afghanistan,2020-01-26,0,0.0
...,...,...,...,...
111956,Zimbabwe,2021-07-31,75856,1294.0
112157,Zimbabwe,2021-08-01,76665,809.0
112358,Zimbabwe,2021-08-02,79420,2755.0
112559,Zimbabwe,2021-08-03,81570,2150.0


###  exporting data to csv file

In [57]:
confirmed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229743 entries, 0 to 229742
Data columns (total 4 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   Country          229743 non-null  object        
 1   Date             229743 non-null  datetime64[ns]
 2   Confirmed_case   229743 non-null  int64         
 3   Daily_confirmed  229743 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 7.0+ MB


In [58]:
deaths_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229743 entries, 0 to 229742
Data columns (total 4 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   Country           229743 non-null  object        
 1   Date              229743 non-null  datetime64[ns]
 2   Confirmed_deaths  229743 non-null  int64         
 3   Daily_deaths      229743 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 7.0+ MB


In [59]:
recovered_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 112761 entries, 0 to 112760
Data columns (total 4 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   Country          112761 non-null  object        
 1   Date             112761 non-null  datetime64[ns]
 2   Recovered_cases  112761 non-null  int64         
 3   Daily_recovered  112761 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 4.3+ MB


In [60]:
# confirmed_df.to_csv('covid_confirmed_cases.csv', index=False)

with pd.ExcelWriter('Covid_19.xlsx') as file:
    confirmed_df.to_excel(file, 'Confirmed Cases', index=False)
    deaths_df.to_excel(file, 'Death Cases', index=False),
    recovered_df.to_excel(file, 'Recovered Cases', index=False),
    location.to_excel(file, 'Locations', index=False)
