## Reshaping and cleaning the datasets

In [45]:
import pandas as pd

In [46]:
url_confirmed = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
url_deaths = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
url_recovered = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

In [47]:
confirmed = pd.read_csv(url_confirmed) #creating new dataframe "confirmed"
confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,12/13/21,12/14/21,12/15/21,12/16/21,12/17/21,12/18/21,12/19/21,12/20/21,12/21/21,12/22/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,157648,157660,157665,157725,157734,157745,157787,157797,157816,157841
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,203925,204301,204627,204928,205224,205549,205777,205897,206273,206616
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,213058,213288,213533,213745,214044,214330,214592,214835,215145,215430
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,19440,20136,20136,20549,20549,20549,20549,21062,21062,21372
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,65404,65431,65565,65648,65760,65868,65938,66086,66566,67199


In [48]:
date1 = confirmed.columns[4:]
# reshaping 'confirmed' dataframe as 'total dataframe'
total_confirmed = confirmed.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], value_vars=date1, var_name='date', value_name='confirmed')
total_confirmed.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed
196275,,Vietnam,14.058324,108.277199,12/22/21,1588335
196276,,West Bank and Gaza,31.9522,35.2332,12/22/21,467391
196277,,Yemen,15.552727,48.516388,12/22/21,10103
196278,,Zambia,-13.133897,27.849332,12/22/21,221880
196279,,Zimbabwe,-19.015438,29.154857,12/22/21,199404


In [49]:
deaths = pd.read_csv(url_deaths)#creating new dataframe "deaths"

In [50]:
date2 = deaths.columns[4:]
# reshaping 'deaths' dataframe as 'total_deaths'
total_deaths = deaths.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], value_vars=date2, var_name='date', value_name='death')
total_deaths.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,death
196275,,Vietnam,14.058324,108.277199,12/22/21,30251
196276,,West Bank and Gaza,31.9522,35.2332,12/22/21,4882
196277,,Yemen,15.552727,48.516388,12/22/21,1981
196278,,Zambia,-13.133897,27.849332,12/22/21,3685
196279,,Zimbabwe,-19.015438,29.154857,12/22/21,4828


In [51]:
recovered = pd.read_csv(url_recovered) #creating new dataframe "recovered"

In [52]:
date3 = recovered.columns[4:]
# reshaping 'recovered' dataframe as 'total_recovered'
total_recovered = recovered.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], value_vars=date3, var_name='date', value_name='recovered')
total_recovered.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,recovered
185760,,Vietnam,14.058324,108.277199,12/22/21,0
185761,,West Bank and Gaza,31.9522,35.2332,12/22/21,0
185762,,Yemen,15.552727,48.516388,12/22/21,0
185763,,Zambia,-13.133897,27.849332,12/22/21,0
185764,,Zimbabwe,-19.015438,29.154857,12/22/21,0


In [9]:
# merging dataframe 'total_deaths' with 'total_confirmed' as new dataframe 'covid_data'
covid_data = total_confirmed.merge(right = total_deaths, how = 'left', on = ['Province/State', 'Country/Region', 'date', 'Lat', 'Long'])
covid_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,death
0,,Afghanistan,33.93911,67.709953,1/22/20,0,0
1,,Albania,41.1533,20.1683,1/22/20,0,0
2,,Algeria,28.0339,1.6596,1/22/20,0,0
3,,Andorra,42.5063,1.5218,1/22/20,0,0
4,,Angola,-11.2027,17.8739,1/22/20,0,0


In [10]:
# merging dataframe 'total_recovered' with 'covid_data'
covid_data = covid_data.merge(right = total_recovered, how = 'left', on = ['Province/State', 'Country/Region', 'date', 'Lat', 'Long'])
covid_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,death,recovered
0,,Afghanistan,33.93911,67.709953,1/22/20,0,0,0.0
1,,Albania,41.1533,20.1683,1/22/20,0,0,0.0
2,,Algeria,28.0339,1.6596,1/22/20,0,0,0.0
3,,Andorra,42.5063,1.5218,1/22/20,0,0,0.0
4,,Angola,-11.2027,17.8739,1/22/20,0,0,0.0


In [11]:
covid_data['date'] = pd.to_datetime(covid_data['date']) #changing the formate of date column

In [12]:
covid_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,death,recovered
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0.0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0.0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0.0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0.0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0.0


In [13]:
covid_data.isna().sum()

Province/State    135293
Country/Region         0
Lat                 1402
Long                1402
date                   0
confirmed              0
death                  0
recovered          14721
dtype: int64

In [14]:
covid_data['recovered'] = covid_data['recovered'].fillna(0) #filling NA values in 'recovered' column

In [15]:
covid_data.isna().sum()

Province/State    135293
Country/Region         0
Lat                 1402
Long                1402
date                   0
confirmed              0
death                  0
recovered              0
dtype: int64

In [16]:
covid_data['active'] = covid_data['confirmed'] - covid_data['death'] - covid_data['recovered'] # finding number of active cases

In [17]:
covid_data.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,death,recovered,active
196275,,Vietnam,14.058324,108.277199,2021-12-22,1588335,30251,0.0,1558084.0
196276,,West Bank and Gaza,31.9522,35.2332,2021-12-22,467391,4882,0.0,462509.0
196277,,Yemen,15.552727,48.516388,2021-12-22,10103,1981,0.0,8122.0
196278,,Zambia,-13.133897,27.849332,2021-12-22,221880,3685,0.0,218195.0
196279,,Zimbabwe,-19.015438,29.154857,2021-12-22,199404,4828,0.0,194576.0


In [18]:
covid_data['date'].iloc[-1].strftime('%B %d, %Y')

'December 22, 2021'

In [19]:
covid_data_1 = covid_data.groupby(['date'])[['confirmed', 'death', 'recovered', 'active']].sum().reset_index()
covid_data_1

Unnamed: 0,date,confirmed,death,recovered,active
0,2020-01-22,557,17,30.0,510.0
1,2020-01-23,655,18,32.0,605.0
2,2020-01-24,941,26,39.0,876.0
3,2020-01-25,1434,42,42.0,1350.0
4,2020-01-26,2118,56,56.0,2006.0
...,...,...,...,...,...
696,2021-12-18,274259026,5349919,0.0,268909107.0
697,2021-12-19,274730178,5354456,0.0,269375722.0
698,2021-12-20,275495076,5361332,0.0,270133744.0
699,2021-12-21,276277066,5369161,0.0,270907905.0


In [20]:
covid_data_1['confirmed'].iloc[-1]

277153704

In [21]:
covid_data_1['confirmed'].iloc[-2]

276277066

In [22]:
covid_data_1['confirmed'].iloc[-1] - covid_data_1['confirmed'].iloc[-2]

876638

In [23]:
round(((covid_data_1['confirmed'].iloc[-1] - covid_data_1['confirmed'].iloc[-2]) / covid_data_1['confirmed'].iloc[-1]) * 100, 2)

0.32

In [44]:
covid_data_1['recovered'].iloc[-1]


0.0

In [24]:
covid_data['Country/Region'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guin

In [25]:
covid_data_2 = covid_data.groupby(['date', 'Country/Region'])[['confirmed', 'death', 'recovered', 'active']].sum().reset_index()
covid_data_2

Unnamed: 0,date,Country/Region,confirmed,death,recovered,active
0,2020-01-22,Afghanistan,0,0,0.0,0.0
1,2020-01-22,Albania,0,0,0.0,0.0
2,2020-01-22,Algeria,0,0,0.0,0.0
3,2020-01-22,Andorra,0,0,0.0,0.0
4,2020-01-22,Angola,0,0,0.0,0.0
...,...,...,...,...,...,...
137391,2021-12-22,Vietnam,1588335,30251,0.0,1558084.0
137392,2021-12-22,West Bank and Gaza,467391,4882,0.0,462509.0
137393,2021-12-22,Yemen,10103,1981,0.0,8122.0
137394,2021-12-22,Zambia,221880,3685,0.0,218195.0


In [26]:
value_confirmed = covid_data_2[covid_data_2['Country/Region'] == 'Vietnam']['confirmed'].iloc[-1] - covid_data_2[covid_data_2['Country/Region'] == 'Vietnam']['confirmed'].iloc[-2]
value_confirmed

16555

In [27]:
covid_data_2 = covid_data.groupby(['date', 'Country/Region'])[['confirmed', 'death', 'recovered', 'active']].sum().reset_index()
covid_data_2

Unnamed: 0,date,Country/Region,confirmed,death,recovered,active
0,2020-01-22,Afghanistan,0,0,0.0,0.0
1,2020-01-22,Albania,0,0,0.0,0.0
2,2020-01-22,Algeria,0,0,0.0,0.0
3,2020-01-22,Andorra,0,0,0.0,0.0
4,2020-01-22,Angola,0,0,0.0,0.0
...,...,...,...,...,...,...
137391,2021-12-22,Vietnam,1588335,30251,0.0,1558084.0
137392,2021-12-22,West Bank and Gaza,467391,4882,0.0,462509.0
137393,2021-12-22,Yemen,10103,1981,0.0,8122.0
137394,2021-12-22,Zambia,221880,3685,0.0,218195.0


In [28]:
covid_data_3 = covid_data_2[covid_data_2['Country/Region'] == 'Afghanistan'][['Country/Region', 'date', 'confirmed']].reset_index()
covid_data_3

Unnamed: 0,index,Country/Region,date,confirmed
0,0,Afghanistan,2020-01-22,0
1,196,Afghanistan,2020-01-23,0
2,392,Afghanistan,2020-01-24,0
3,588,Afghanistan,2020-01-25,0
4,784,Afghanistan,2020-01-26,0
...,...,...,...,...
696,136416,Afghanistan,2021-12-18,157745
697,136612,Afghanistan,2021-12-19,157787
698,136808,Afghanistan,2021-12-20,157797
699,137004,Afghanistan,2021-12-21,157816


In [29]:
covid_data_3['daily confirmed'] = covid_data_3['confirmed'] - covid_data_3['confirmed'].shift(1)

In [30]:
covid_data_3

Unnamed: 0,index,Country/Region,date,confirmed,daily confirmed
0,0,Afghanistan,2020-01-22,0,
1,196,Afghanistan,2020-01-23,0,0.0
2,392,Afghanistan,2020-01-24,0,0.0
3,588,Afghanistan,2020-01-25,0,0.0
4,784,Afghanistan,2020-01-26,0,0.0
...,...,...,...,...,...
696,136416,Afghanistan,2021-12-18,157745,11.0
697,136612,Afghanistan,2021-12-19,157787,42.0
698,136808,Afghanistan,2021-12-20,157797,10.0
699,137004,Afghanistan,2021-12-21,157816,19.0


In [31]:
covid_data_3['Rolling Ave.'] = covid_data_3['daily confirmed'].rolling(window=7).mean()

In [32]:
covid_data_3.head(20)

Unnamed: 0,index,Country/Region,date,confirmed,daily confirmed,Rolling Ave.
0,0,Afghanistan,2020-01-22,0,,
1,196,Afghanistan,2020-01-23,0,0.0,
2,392,Afghanistan,2020-01-24,0,0.0,
3,588,Afghanistan,2020-01-25,0,0.0,
4,784,Afghanistan,2020-01-26,0,0.0,
5,980,Afghanistan,2020-01-27,0,0.0,
6,1176,Afghanistan,2020-01-28,0,0.0,
7,1372,Afghanistan,2020-01-29,0,0.0,0.0
8,1568,Afghanistan,2020-01-30,0,0.0,0.0
9,1764,Afghanistan,2020-01-31,0,0.0,0.0


In [33]:
covid_data_4 = covid_data.groupby(['Lat', 'Long', 'Country/Region'])[['confirmed', 'death', 'recovered', 'active']].sum().reset_index()
covid_data_4

Unnamed: 0,Lat,Long,Country/Region,confirmed,death,recovered,active
0,-51.7963,-59.5236,United Kingdom,25780,0,14754.0,11026.0
1,-42.8821,147.3272,Australia,145262,7997,101796.0,35469.0
2,-40.9006,174.8860,New Zealand,1943381,16021,964499.0,962861.0
3,-38.4161,-63.6167,Argentina,1538945411,34430368,711610324.0,792904719.0
4,-37.8136,144.9631,Australia,17420350,433356,7099128.0,9887866.0
...,...,...,...,...,...,...,...
272,64.2823,-135.0000,Canada,177132,1973,0.0,175159.0
273,64.8255,-124.8457,Canada,201986,927,0.0,201059.0
274,64.9631,-19.0208,Iceland,4081597,14655,2058185.0,2008757.0
275,70.2998,-83.1076,Canada,206033,1208,0.0,204825.0


In [34]:
covid_data_list = covid_data[['Country/Region', 'Lat', 'Long']]

In [35]:
dict_of_locations = covid_data_list.set_index('Country/Region')[['Lat', 'Long']].T.to_dict('dict')
dict_of_locations

  dict_of_locations = covid_data_list.set_index('Country/Region')[['Lat', 'Long']].T.to_dict('dict')


{'Afghanistan': {'Lat': 33.93911, 'Long': 67.709953},
 'Albania': {'Lat': 41.1533, 'Long': 20.1683},
 'Algeria': {'Lat': 28.0339, 'Long': 1.6596},
 'Andorra': {'Lat': 42.5063, 'Long': 1.5218},
 'Angola': {'Lat': -11.2027, 'Long': 17.8739},
 'Antigua and Barbuda': {'Lat': 17.0608, 'Long': -61.7964},
 'Argentina': {'Lat': -38.4161, 'Long': -63.6167},
 'Armenia': {'Lat': 40.0691, 'Long': 45.0382},
 'Australia': {'Lat': -31.9505, 'Long': 115.8605},
 'Austria': {'Lat': 47.5162, 'Long': 14.5501},
 'Azerbaijan': {'Lat': 40.1431, 'Long': 47.5769},
 'Bahamas': {'Lat': 25.025885, 'Long': -78.035889},
 'Bahrain': {'Lat': 26.0275, 'Long': 50.55},
 'Bangladesh': {'Lat': 23.685, 'Long': 90.3563},
 'Barbados': {'Lat': 13.1939, 'Long': -59.5432},
 'Belarus': {'Lat': 53.7098, 'Long': 27.9534},
 'Belgium': {'Lat': 50.8333, 'Long': 4.469936},
 'Belize': {'Lat': 17.1899, 'Long': -88.4976},
 'Benin': {'Lat': 9.3077, 'Long': 2.3158},
 'Bhutan': {'Lat': 27.5142, 'Long': 90.4336},
 'Bolivia': {'Lat': -16.2902