# Covid 19 - Expansion Prediction

by: Leandro Arruda

## Exploring Distance between Countries and  Confirmed cases

## Importing Libraries

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from geopy.distance import great_circle
import os
plt.figure(figsize=(16,6))

<Figure size 1600x600 with 0 Axes>

## Reading the dataset

In [2]:
# Input data files are available in the "../data/" directory.

for dirname, _, filenames in os.walk('.\data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

.\data\2019_nCoV_data.csv
.\data\countries-lat-lon.csv
.\data\countries.csv
.\data\countries_and_continents.csv
.\data\covid_19_data.csv
.\data\time_series_covid_19_confirmed.csv
.\data\time_series_covid_19_deaths.csv
.\data\time_series_covid_19_recovered.csv


In [3]:
covid = pd.read_csv('./data/covid_19_data.csv')
covid.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [4]:
countries = pd.read_csv("./data/countries.csv")
countries_loc = pd.read_csv("./data/countries-lat-lon.csv", index_col=0)
countries.head()

Unnamed: 0,name,official_name_en,official_name_fr,ISO3166-1-Alpha-2,ISO3166-1-Alpha-3,M49,ITU,MARC,WMO,DS,...,ISO4217-currency_minor_unit,ISO4217-currency_name,ISO4217-currency_numeric_code,is_independent,Capital,Continent,TLD,Languages,Geoname ID,EDGAR
0,,Channel Islands,Îles Anglo-Normandes,,,830,,,,,...,,,,,,,,,,
1,,Sark,Sercq,,,680,,,,,...,,,,,,,,,,
2,Afghanistan,Afghanistan,Afghanistan,AF,AFG,4,AFG,af,AF,AFG,...,2.0,Afghani,971.0,Yes,Kabul,AS,.af,"fa-AF,ps,uz-AF,tk",1149361.0,B2
3,Albania,Albania,Albanie,AL,ALB,8,ALB,aa,AB,AL,...,2.0,Lek,8.0,Yes,Tirana,EU,.al,"sq,el",783754.0,B3
4,Algeria,Algeria,Algérie,DZ,DZA,12,ALG,ae,AL,DZ,...,2.0,Algerian Dinar,12.0,Yes,Algiers,AF,.dz,ar-DZ,2589581.0,B4


## Data Cleaning

### Preparing COVID dataset

In [5]:
covid['ObservationDate'] = pd.to_datetime(covid['ObservationDate'], format='%m/%d/%Y')
covid.loc[covid['Country/Region'] == 'China', 'Country/Region'] = 'Mainland China'
covid.loc[covid['Country/Region'] == 'Côte d’Ivoire', 'Country/Region'] = 'Ivory Coast'

In [6]:
covid.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,2020-01-22,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,2020-01-22,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,2020-01-22,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,2020-01-22,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [7]:
covid2 = covid.drop_duplicates(['ObservationDate', 'Country/Region'])

In [8]:
covid2 = covid2[covid2['Confirmed'] > 0]


**Counting the number of duplicated rows for each country
to predict an expansion of the desease, I just need to know if a country had confirmed cases.**

In [9]:
covid2['Country/Region'].value_counts()

Japan                   35
Taiwan                  35
Thailand                35
US                      35
South Korea             35
Macau                   35
Mainland China          35
Singapore               34
Hong Kong               34
Vietnam                 34
France                  33
Nepal                   32
Malaysia                32
Australia               32
Canada                  31
Cambodia                30
Sri Lanka               30
Germany                 29
Finland                 28
United Arab Emirates    28
India                   27
Philippines             27
Russia                  26
UK                      26
Sweden                  26
Italy                   26
Spain                   25
Belgium                 22
Others                  19
Egypt                   12
Iran                     7
Lebanon                  5
Israel                   5
Bahrain                  2
Iraq                     2
Afghanistan              2
Kuwait                   2
O

In [10]:
covid3 = covid2.drop_duplicates("Country/Region")
covid3 = covid3[covid3["Country/Region"] != "Others"]
covid3 = covid3.reset_index(drop=True)
covid3.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,21,2020-01-22,Macau,Macau,1/22/2020 17:00,1.0,0.0,0.0
2,29,2020-01-22,Taiwan,Taiwan,1/22/2020 17:00,1.0,0.0,0.0
3,32,2020-01-22,Washington,US,1/22/2020 17:00,1.0,0.0,0.0
4,36,2020-01-22,,Japan,1/22/2020 17:00,2.0,0.0,0.0


### Preparing World Countries & Location to Merge into COVID dataset

**Renaming Name column**

In [11]:
countries_loc2 = countries_loc.rename(columns={"name": "Country/Region"})
countries_loc2 = countries_loc2.dropna()
countries_loc2.head()

Unnamed: 0,longitude,latitude,Country/Region
2,33.93911,67.709953,Afghanistan
3,41.153332,20.168331,Albania
4,28.033886,1.659626,Algeria
5,-14.270972,-170.132217,American Samoa
6,42.506285,1.521801,Andorra


In [12]:
covid3.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,21,2020-01-22,Macau,Macau,1/22/2020 17:00,1.0,0.0,0.0
2,29,2020-01-22,Taiwan,Taiwan,1/22/2020 17:00,1.0,0.0,0.0
3,32,2020-01-22,Washington,US,1/22/2020 17:00,1.0,0.0,0.0
4,36,2020-01-22,,Japan,1/22/2020 17:00,2.0,0.0,0.0


**Correcting country names**

In [13]:
countries_loc2.loc[countries_loc2['Country/Region'] == 'China', 'Country/Region'] = 'Mainland China'
countries_loc2.loc[countries_loc2['Country/Region'] == 'Côte d’Ivoire', 'Country/Region'] = 'Ivory Coast'

#### Assigning Longitude and Latitude to listed countries

In [15]:
covid4 = covid3.merge(countries_loc2, how='left')
covid4.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,longitude,latitude
0,1,2020-01-22,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0,35.86166,104.195397
1,21,2020-01-22,Macau,Macau,1/22/2020 17:00,1.0,0.0,0.0,22.198745,113.543873
2,29,2020-01-22,Taiwan,Taiwan,1/22/2020 17:00,1.0,0.0,0.0,23.69781,120.960515
3,32,2020-01-22,Washington,US,1/22/2020 17:00,1.0,0.0,0.0,40.760537,-73.97889
4,36,2020-01-22,,Japan,1/22/2020 17:00,2.0,0.0,0.0,36.204824,138.252924


## Auxiliary Functions

**Using spherical geometry to calculate the surface distance between two
points(Countries).**

In [16]:
# Calculate the distance between confirmed locations and informed lon, Lat
def dist_to_confirmed(lon, lat, confirmed_loc):
    dists = []
    for lon2, lat2, country in confirmed_loc.values:
        d = great_circle((lon, lat), (lon2, lat2))
        dists.append(d.kilometers)
    return np.array(dists)

# Count the number of confirmed locations in a radius of 1k, 2k, and 3k kilometers
def calc_confirmed_radius(lon, lat, country, confirmed_loc):
    res = {"Country/Region": country}
    
    dists = dist_to_confirmed(lon, lat, confirmed_loc)
    
    res['avg_dist_to_confirmed'] = np.mean(dists)
    res['confirmed_1k'] = (dists < 1000).sum()
    res['confirmed_2k'] = (dists < 2000).sum()
    res['confirmed_3k'] = (dists < 3000).sum()
    
    return res

### Country Dataset

In [17]:
dates = pd.date_range("2020-01-22", "2020-02-22")

In [18]:
all_data = dict()
for date in dates:
    confirmed_countries = covid4[covid4['ObservationDate'] <= date]['Country/Region']
    
    confirmed_loc = countries_loc2[countries_loc2['Country/Region'].isin(confirmed_countries)]

    data = countries_loc2[~countries_loc2['Country/Region'].isin(confirmed_countries)]
    next_confirmed =  covid4[covid4['ObservationDate'] == date + pd.Timedelta(1,'D')]['Country/Region']
    
    if next_confirmed.shape[0] == 0:
        continue
    
    data['y'] = 0
    data.loc[data['Country/Region'].isin(next_confirmed), 'y'] = 1
    data['date'] = date
    
    features = []
    for lon, lat, country in data[['longitude', 'latitude','Country/Region']].values:
        features.append(calc_confirmed_radius(lon, lat, country, confirmed_loc))
    
    #print(features)
    features = pd.DataFrame(features)
    #features.rename(columns={"Country": "Country/Region"})
    data = data.merge(features, how='left')

    all_data[date] = data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


#### Calculating the distance between countries & confirmed cases

In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [20]:
features = ['avg_dist_to_confirmed', 'confirmed_1k', 'confirmed_2k', 'confirmed_3k']
sorted_date = sorted(all_data.keys())
correct = {'Total': 0,'Baseline': 0, 'LR': 0, 'Rank Avg': 0}

for i in range(len(sorted_date)-1) : 
    date = sorted_date[i]
    next_date = sorted_date[i+1]
    
    Xtrain, ytrain = all_data[date][features], all_data[date]['y']
    Xval, yval = all_data[next_date][features], all_data[next_date]['y']
    
    pipe = make_pipeline(StandardScaler(), LogisticRegression(random_state=0, class_weight='balanced'))
    #pipe = DecisionTreeClassifier(max_depth=None,class_weight='balanced', ccp_alpha=0.01)
    #pipe = ExtraTreesClassifier(n_estimators=100, n_jobs=6, max_depth=4, class_weight='balanced')
    
    pipe.fit(Xtrain, ytrain)
    p = pipe.predict_proba(Xval)[:,1]
    
    Xval['p'] = p
    Xval['y'] = yval
    Xval['country'] = all_data[next_date]['Country/Region']
    
    baseline = Xval.sort_values("avg_dist_to_confirmed").head(20)['y'].sum()# / yval.sum()
    lr_top_20 = Xval.sort_values("p", ascending=False).head(20)['y'].sum()# / yval.sum()
    
    Xval['rank_avg'] = 0.9*Xval['avg_dist_to_confirmed'].rank() + 0.1*Xval['p'].rank(ascending=False)
    avg_in_top_20 = Xval.sort_values("rank_avg", ascending=True).head(20)['y'].sum() #/ yval.sum()
    
    correct['Total'] += yval.sum()
    correct['Baseline'] += baseline
    correct['LR'] += lr_top_20
    correct['Rank Avg'] += avg_in_top_20
    
    str_result = "Prediction date: {}\nPositive in train: {}\nConfirmed next date: {}\nBaseline: {}\nLR: {}\nRank avg top 20: {}\n".format( 
        date, ytrain.sum(), yval.sum(), baseline, lr_top_20, avg_in_top_20)
    print(str_result)

  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

Prediction date: 2020-01-22 00:00:00
Positive in train: 3
Confirmed next date: 1
Baseline: 0
LR: 0
Rank avg top 20: 0

Prediction date: 2020-01-23 00:00:00
Positive in train: 1
Confirmed next date: 3
Baseline: 2
LR: 0
Rank avg top 20: 0



  Xt = transform.transform(Xt)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-vi

Prediction date: 2020-01-24 00:00:00
Positive in train: 3
Confirmed next date: 1
Baseline: 0
LR: 0
Rank avg top 20: 0

Prediction date: 2020-01-25 00:00:00
Positive in train: 1
Confirmed next date: 3
Baseline: 2
LR: 1
Rank avg top 20: 2



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

Prediction date: 2020-01-26 00:00:00
Positive in train: 3
Confirmed next date: 1
Baseline: 0
LR: 0
Rank avg top 20: 0

Prediction date: 2020-01-27 00:00:00
Positive in train: 1
Confirmed next date: 2
Baseline: 0
LR: 0
Rank avg top 20: 1



  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

Prediction date: 2020-01-28 00:00:00
Positive in train: 2
Confirmed next date: 2
Baseline: 2
LR: 1
Rank avg top 20: 2

Prediction date: 2020-01-29 00:00:00
Positive in train: 2
Confirmed next date: 4
Baseline: 1
LR: 0
Rank avg top 20: 1

Prediction date: 2020-01-30 00:00:00
Positive in train: 4
Confirmed next date: 1
Baseline: 0
LR: 0
Rank avg top 20: 0



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

Prediction date: 2020-01-31 00:00:00
Positive in train: 1
Confirmed next date: 1
Baseline: 0
LR: 1
Rank avg top 20: 0

Prediction date: 2020-02-03 00:00:00
Positive in train: 1
Confirmed next date: 1
Baseline: 0
LR: 0
Rank avg top 20: 0



  Xt = transform.transform(Xt)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-vi

Prediction date: 2020-02-13 00:00:00
Positive in train: 1
Confirmed next date: 1
Baseline: 1
LR: 1
Rank avg top 20: 1

Prediction date: 2020-02-18 00:00:00
Positive in train: 1
Confirmed next date: 2
Baseline: 0
LR: 0
Rank avg top 20: 0



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [21]:
correct
#{'Baseline': 8, 'LR': 4, 'Rank Avg': 6} = 50/50

{'Total': 23, 'Baseline': 8, 'LR': 4, 'Rank Avg': 7}

### List the predicted Countries  

#### What countries are the most probable to have a confirmed case of COVID-19. (Ordered by Rank Average)

In [22]:
Xval.sort_values("rank_avg", ascending=True).head(10)

Unnamed: 0,avg_dist_to_confirmed,confirmed_1k,confirmed_2k,confirmed_3k,p,y,country,rank_avg
102,4934.174551,0,1,5,0.928288,0,Kyrgyzstan,1.1
191,4958.310974,0,2,5,0.87092,0,Tajikistan,3.2
208,5026.162088,0,1,4,0.910517,0,Uzbekistan,3.9
22,4951.29032,1,4,10,0.492835,0,Bhutan,4.2
0,5029.039001,0,4,4,0.609131,0,Afghanistan,5.9
98,5038.895405,0,0,5,0.955509,0,Kazakhstan,6.4
147,5058.112703,0,4,5,0.633646,0,Pakistan,8.4
16,5029.894839,1,5,11,0.353286,0,Bangladesh,9.4
127,5143.128327,0,2,7,0.878122,0,Mongolia,10.3
200,5099.356181,1,2,5,0.590988,0,Turkmenistan,10.6


#### What countries are the most probable to have a confirmed case of COVID-19. (Ordered by Distance of a confirmed case)

In [23]:
Xval.sort_values("avg_dist_to_confirmed").head(10)

Unnamed: 0,avg_dist_to_confirmed,confirmed_1k,confirmed_2k,confirmed_3k,p,y,country,rank_avg
102,4934.174551,0,1,5,0.928288,0,Kyrgyzstan,1.1
22,4951.29032,1,4,10,0.492835,0,Bhutan,4.2
191,4958.310974,0,2,5,0.87092,0,Tajikistan,3.2
208,5026.162088,0,1,4,0.910517,0,Uzbekistan,3.9
0,5029.039001,0,4,4,0.609131,0,Afghanistan,5.9
16,5029.894839,1,5,11,0.353286,0,Bangladesh,9.4
98,5038.895405,0,0,5,0.955509,0,Kazakhstan,6.4
147,5058.112703,0,4,5,0.633646,0,Pakistan,8.4
132,5097.485935,1,8,13,0.092804,0,Myanmar,16.8
200,5099.356181,1,2,5,0.590988,0,Turkmenistan,10.6


## How to improve?


* Predict which countries are at risk for the next week
* More training data
  * Less noise
  * Train with all confirmed countries
* Because it is hard?
  * It depends on the discovery methods (Italy measures differently from other European countries, China stopped doing only laboratory tests)
  * Poorer countries

* In theory they will not have the same detection capacity as rich countries
* But there may also be not so many people crossing the border

**Add More features:**

* Economic and demographic data, public health
* Business partnership data (countries that receive many travelers from countries with confirmed cases)
* How many confirmed in countries at 1k Km, 2k Km ...
* See more top N countries

Use SARS data (and other rapidly spreading diseases) https://en.wikipedia.org/wiki/Severe_acute_respiratory_syndrome#Epidemiology