In [1]:
import json
import numpy as np
import os
import pandas as pd

In [2]:
regions = ['asia', 'africa', 'europe', 'north_america', 'oceania', 'south_america', 'south_and_central_america', 'all_countries']

# Specify the region to train.
region = 'south_and_central_america'

confirmed_cases_processed_csv = '/content/gdrive/MyDrive/Covid_Project/Data/Processed_Data/'+ region +'_confirmed_cases_processed_filtered.csv'
confirmed_deaths_processed_csv = '/content/gdrive/MyDrive/Covid_Project/Data/Processed_Data/'+ region +'_confirmed_deaths_processed_filtered.csv'
fully_vaccinated_processed_csv = '/content/gdrive/MyDrive/Covid_Project/Data/Processed_Data/'+ region +'_fully_vaccinated_processed_filtered.csv'
partially_vaccinated_processed_csv = '/content/gdrive/MyDrive/Covid_Project/Data/Processed_Data/'+ region +'_partially_vaccinated_processed_filtered.csv'

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
os.listdir('/content/gdrive/MyDrive/Covid_Project/Data/Processed_Data')

['max_covid_confirmed_by_country.csv',
 'confirmed_cases_processed.csv',
 'confirmed_deaths_processed.csv',
 'partially_vaccinated_processed.csv',
 'fully_vaccinated_processed.csv',
 'confirmed_cases_processed_filtered.csv',
 'asia_partially_vaccinated_processed_filtered.csv',
 'asia_confirmed_cases_processed_filtered.csv',
 'fully_vaccinated_processed_filtered.csv',
 'confirmed_deaths_processed_filtered.csv',
 'asia_fully_vaccinated_processed_filtered.csv',
 'asia_confirmed_deaths_processed_filtered.csv',
 'africa_confirmed_deaths_processed_filtered.csv',
 'africa_confirmed_cases_processed_filtered.csv',
 'partially_vaccinated_processed_filtered.csv',
 'africa_partially_vaccinated_processed_filtered.csv',
 'africa_fully_vaccinated_processed_filtered.csv',
 'europe_confirmed_deaths_processed_filtered.csv',
 'europe_partially_vaccinated_processed_filtered.csv',
 'europe_confirmed_cases_processed_filtered.csv',
 'europe_fully_vaccinated_processed_filtered.csv',
 'north_america_confirmed_

In [5]:
# Inputs
confirmed_cases_df = pd.read_csv(confirmed_cases_processed_csv)
fully_vaccinated_df = pd.read_csv(fully_vaccinated_processed_csv)
partially_vaccinated_df = pd.read_csv(partially_vaccinated_processed_csv)

# Labels
confirmed_deaths_df = pd.read_csv(confirmed_deaths_processed_csv)

In [6]:
# Inputs
confirmed_cases_np = confirmed_cases_df.to_numpy()[:, 1:, np.newaxis]
fully_vaccinated_np = fully_vaccinated_df.to_numpy()[:, 1:, np.newaxis]
partially_vaccinated_np = partially_vaccinated_df.to_numpy()[:, 1:, np.newaxis]

# Labels
confirmed_deaths_np = confirmed_deaths_df.to_numpy()[:, 1:, np.newaxis]

In [7]:
print(confirmed_cases_np.shape, fully_vaccinated_np.shape, partially_vaccinated_np.shape, confirmed_deaths_np.shape)

(30, 569, 1) (30, 569, 1) (30, 569, 1) (30, 569, 1)


In [8]:
print(confirmed_cases_np[0, 500:503])
print(fully_vaccinated_np[0, 500:503])
print(partially_vaccinated_np[0, 500:503])

[[0.8450941155188446]
 [0.8147992613430881]
 [0.8397117195938203]]
[[0.06521982292651228]
 [0.06602763317212786]
 [0.06685992332658049]]
[[0.2315429296700852]
 [0.2370946304886972]
 [0.24287288085281744]]


In [9]:
training_data_inputs_np = np.concatenate((confirmed_cases_np, fully_vaccinated_np, partially_vaccinated_np), axis=-1)

In [10]:
training_data_inputs_np[0, 500:503]

array([[0.8450941155188446, 0.06521982292651228, 0.2315429296700852],
       [0.8147992613430881, 0.06602763317212786, 0.2370946304886972],
       [0.8397117195938203, 0.06685992332658049, 0.24287288085281744]],
      dtype=object)

In [11]:
countries_name = list(confirmed_cases_df['Country/Region'])
training_data_inputs = training_data_inputs_np.tolist()
training_data_labels = confirmed_deaths_np.tolist()

In [12]:
print(len(countries_name))
print(training_data_inputs_np.shape)
print(confirmed_deaths_np.shape)

30
(30, 569, 3)
(30, 569, 1)


In [13]:
population_data = {'Afghanistan': 37172386, 'Albania': 2866376, 'Algeria': 42228429, 'American Samoa': 55465, 'Andorra': 77006, 'Angola': 30809762, 'Anguilla': 15094, 'Antarctica': 1106, 'Antigua and Barbuda': 96286, 'Argentina': 44494502, 'Armenia': 2951776, 'Aruba': 105845, 'Australia': 24982688, 'Austria': 8840521, 'Azerbaijan': 9939800, 'Bahamas': 385640, 'Bahrain': 1569439, 'Bangladesh': 161356039, 'Barbados': 286641, 'Belarus': 9483499, 'Belgium': 11433256, 'Belize': 383071, 'Benin': 11485048, 'Bermuda': 63973, 'Bhutan': 754394, 'Bolivia': 11353142, 'Bosnia and Herzegovina': 3323929, 'Botswana': 2254126, 'Bouvet Island': 0, 'Brazil': 209469333, 'British Indian Ocean Territory': 0, 'Brunei': 428962, 'Bulgaria': 7025037, 'Burkina Faso': 19751535, 'Burundi': 11175378, 'Cambodia': 16249798, 'Cameroon': 25216237, 'Canada': 37057765, 'Cabo Verde': 543767, 'Cayman Islands': 64174, 'Central African Republic': 4666377, 'Chad': 15477751, 'Chile': 18729160, 'China': 1392730000, 'Christmas Island': 1402, 'Cocos (Keeling) Islands': 596, 'Colombia': 49648685, 'Comoros': 832322, 'Congo (Brazzaville)': 5244363, 'Cook Islands': 17379, 'Costa Rica': 4999441, 'Croatia': 4087843, 'Cuba': 11338138, 'Cyprus': 1189265, 'Czechia': 10629928, 'Denmark': 5793636, 'Djibouti': 958920, 'Dominica': 71625, 'Dominican Republic': 10627165, 'Timor-Leste': 1267972, 'Ecuador': 17084357, 'Egypt': 98423595, 'El Salvador': 6420744, 'England': 55619400, 'Equatorial Guinea': 1308974, 'Eritrea': 6213972, 'Estonia': 1321977, 'Ethiopia': 109224559, 'Falkland Islands': 2840, 'Faroe Islands': 48497, 'Fiji': 883483, 'Finland': 5515525, 'France': 66977107, 'French Guiana': 290691, 'French Polynesia': 277679, 'French Southern territories': 0, 'Gabon': 2119275, 'Gambia': 2280102, 'Georgia': 3726549, 'Germany': 82905782, 'Ghana': 29767108, 'Gibraltar': 33718, 'Greece': 10731726, 'Greenland': 56025, 'Grenada': 111454, 'Guadeloupe': 395700, 'Guam': 165768, 'Guatemala': 17247807, 'Guinea': 12414318, 'Guinea-Bissau': 1874309, 'Guyana': 779004, 'Haiti': 11123176, 'Heard Island and McDonald Islands': 0, 'Holy See (Vatican City State)': 825, 'Honduras': 9587522, 'Hong Kong': 7451000, 'Hungary': 9775564, 'Iceland': 352721, 'India': 1352617328, 'Indonesia': 267663435, 'Iran': 81800269, 'Iraq': 38433600, 'Ireland': 4867309, 'Israel': 8882800, 'Italy': 60421760, "Cote d'Ivoire": 25069229, 'Jamaica': 2934855, 'Japan': 126529100, 'Jordan': 9956011, 'Kazakhstan': 18272430, 'Kenya': 51393010, 'Kiribati': 115847, 'Kosovo': 1935259,  'Kuwait': 4137309, 'Kyrgyzstan': 6322800, 'Laos': 7061507, 'Latvia': 1927174, 'Lebanon': 6848925, 'Lesotho': 2108132, 'Liberia': 4818977, 'Libya': 6678567, 'Liechtenstein': 37910, 'Lithuania': 2801543, 'Luxembourg': 607950, 'Macao': 631636, 'Madagascar': 26262368, 'Malawi': 18143315, 'Malaysia': 31528585, 'Maldives': 515696, 'Mali': 19077690, 'Malta': 484630, 'Marshall Islands': 58413, 'Martinique': 376480, 'Mauritania': 4403319, 'Mauritius': 1265303, 'Mayotte': 270372, 'Mexico': 126190788, 'Micronesia': 112640, 'Moldova': 2706049, 'Monaco': 38682, 'Mongolia': 3170208, 'Montenegro': 631219, 'Montserrat': 5900, 'Morocco': 36029138, 'Mozambique': 29495962, 'Burma': 53708395, 'Namibia': 2448255, 'Nauru': 12704, 'Nepal': 28087871, 'Netherlands': 17231624, 'Netherlands Antilles': 227049, 'New Caledonia': 284060, 'New Zealand': 4841000, 'Nicaragua': 6465513, 'Niger': 22442948, 'Nigeria': 195874740, 'Niue': 1624, 'Norfolk Island': 2169, 'North Korea': 25549819, 'North Macedonia': 2084367, 'Northern Ireland': 1885400, 'Northern Mariana Islands': 56882, 'Norway': 5311916, 'Oman': 4829483, 'Pakistan': 212215030, 'Palau': 17907, 'Palestine': 4569087, 'Panama': 4176873, 'Papua New Guinea': 8606316, 'Paraguay': 6956071, 'Peru': 31989256, 'Philippines': 106651922, 'Pitcairn': 67, 'Poland': 37974750, 'Portugal': 10283822, 'Puerto Rico': 3195153, 'Qatar': 2781677, 'Reunion': 859959, 'Romania': 19466145, 'Russia': 144478050, 'Rwanda': 12301939, 'Saint Helena': 6600, 'Saint Kitts and Nevis': 52441, 'Saint Lucia': 181889, 'Saint Pierre and Miquelon': 5888, 'Saint Vincent and the Grenadines': 110210, 'Samoa': 196130, 'San Marino': 33785, 'Sao Tome and Principe': 211028, 'Saudi Arabia': 33699947, 'Scotland': 5424800, 'Senegal': 15854360, 'Serbia': 6963764, 'Seychelles': 96762, 'Sierra Leone': 7650154, 'Singapore': 5638676, 'Slovakia': 5446771, 'Slovenia': 2073894, 'Solomon Islands': 652858, 'Somalia': 15008154, 'South Africa': 57779622, 'South Georgia and the South Sandwich Islands': 30, 'Korea, South': 51606633, 'South Sudan': 10975920, 'Spain': 46796540, 'Sri Lanka': 21670000, 'Sudan': 41801533, 'Suriname': 575991, 'Svalbard and Jan Mayen': 2572, 'Eswatini': 1136191, 'Sweden': 10175214, 'Switzerland': 8513227, 'Syria': 16906283, 'Taiwan*': 23568378, 'Tajikistan': 9100837, 'Tanzania': 56318348, 'Thailand': 69428524, 'Congo (Kinshasa)': 84068091, 'Togo': 7889094, 'Tokelau': 1411, 'Tonga': 103197, 'Trinidad and Tobago': 1389858, 'Tunisia': 11565204, 'Turkey': 82319724, 'Turkmenistan': 5850908, 'Turks and Caicos Islands': 37665, 'Tuvalu': 11508, 'Uganda': 42723139, 'Ukraine': 44622516, 'United Arab Emirates': 9630959, 'United Kingdom': 66460344, 'US': 326687501, 'United States Minor Outlying Islands': 300, 'Uruguay': 3449299, 'Uzbekistan': 32955400, 'Vanuatu': 292680, 'Venezuela': 28870195, 'Vietnam': 95540395, 'Virgin Islands, British': 29802, 'Virgin Islands, U.S.': 106977, 'Wales': 3139000, 'Wallis and Futuna': 15289, 'Western Sahara': 652271, 'Yemen': 28498687, 'Zambia': 17351822, 'Zimbabwe': 14439018}

data = {'countries_name': countries_name, 'training_data_inputs': training_data_inputs, 'training_data_labels': training_data_labels, 'population_data': population_data}

In [14]:
path_for_training_data = '/content/gdrive/MyDrive/Covid_Project/Data/Training_Data/' + region + '_training_data.json'
with open(path_for_training_data, 'w') as f:
    json.dump(data, f)