# Travel Restriction over the world

Summer is Comming, so In order to have a better visibility about travel restrictions over the world and also to better measure safety of travel, i chose to create a world map visualisation that provides an insight about how severe the international restrictions in a country and also the number of confirmed cases and active ones

In [1]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import folium
from folium.plugins import HeatMap, MarkerCluster, HeatMapWithTime
from branca.element import Template, MacroElement

## Data Preparation

In [2]:
# import datasets
covid_timeseries = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
travel_ban = pd.read_csv('./data/international-travel-covid.csv')

In [4]:
covid_timeseries.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/29/21,4/30/21,5/1/21,5/2/21,5/3/21,5/4/21,5/5/21,5/6/21,5/7/21,5/8/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,59576,59745,59939,60122,60300,60563,60797,61162,61455,61755
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,130977,131085,131185,131238,131276,131327,131419,131510,131577,131666
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,121866,122108,122311,122522,122717,122999,123272,123473,123692,123900
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,13198,13232,13232,13282,13295,13316,13340,13363,13390,13406
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,26431,26652,26815,26993,27133,27284,27529,27921,28201,28477


In [5]:
travel_ban.head()

Unnamed: 0,Entity,Code,Day,international_travel_controls
0,Afghanistan,AFG,2020-01-01,0
1,Afghanistan,AFG,2020-01-02,0
2,Afghanistan,AFG,2020-01-03,0
3,Afghanistan,AFG,2020-01-04,0
4,Afghanistan,AFG,2020-01-05,0


Since we want ti visualize the last update about travel restrictions so we want to keep just the last update about international travel restrictions in every country

In [6]:
travel_ban=travel_ban.sort_values('Day').drop_duplicates('Entity',keep='last')

we need to merge both datasets by name of country, so wa have to make sure that there is no difference in the name of the same country between both datasets

In [7]:
set(travel_ban['Entity'].unique())-set(covid_timeseries['Country/Region'].unique())

{'Aruba',
 'Bermuda',
 'Cape Verde',
 'Congo',
 'Democratic Republic of Congo',
 'Faeroe Islands',
 'Greenland',
 'Guam',
 'Hong Kong',
 'Kiribati',
 'Macao',
 'Myanmar',
 'Palestine',
 'Puerto Rico',
 'South Korea',
 'Taiwan',
 'Timor',
 'Tonga',
 'Turkmenistan',
 'United States',
 'United States Virgin Islands'}

We can see that we need to rename some countries to have an equivalent name in both datasets

In [8]:
# renaming countries in travel
travel_ban['Entity']= travel_ban['Entity'].replace({"United States":"US",
                                    "Congo":"Congo (Brazzaville)","Democratic Republic of Congo":"Congo (Kinshasa)",
                                    'South Korea':"Korea, South","Taiwan":"Taiwan*","Timor":"Timor-Leste","Cape Verde":"Cabo Verde"})

Greenland is a state of Danemark, and it's called as Danemark in covid_timeseries, we will change it to Greenland the more popular name of this state.

In [9]:
# find the index of GreenLand in covid_timeseries
covid_timeseries[covid_timeseries['Province/State']=='Greenland']['Country/Region'].index

Int64Index([103], dtype='int64')

In [10]:
# rename the country from Danemark to Greenland in covid_timeseries
covid_timeseries.loc[103,'Country/Region']='Greenland'

In [11]:
"""last_restrictions = []
last_restrictions_date = []
for country in travel_ban['Entity']:
    last_restrictions.append(travel_ban[travel_ban['Entity']==country].iloc[-1]['international_travel_controls'])
    last_restrictions_date.append(travel_ban[travel_ban['Entity']==country].iloc[-1]['Day'])
travel_ban['last_restrictions']=last_restrictions
travel_ban['last_restrictions_date']=last_restrictions_date"""

"last_restrictions = []\nlast_restrictions_date = []\nfor country in travel_ban['Entity']:\n    last_restrictions.append(travel_ban[travel_ban['Entity']==country].iloc[-1]['international_travel_controls'])\n    last_restrictions_date.append(travel_ban[travel_ban['Entity']==country].iloc[-1]['Day'])\ntravel_ban['last_restrictions']=last_restrictions\ntravel_ban['last_restrictions_date']=last_restrictions_date"

#### Merging Travel_ban and covid_timeseries datasets by country name's

In [12]:
travel_ban = travel_ban.rename(columns={"Entity":"country","Day":"restrictions_update_date"})
# changing province/state to state and country/region to country
covid_timeseries = covid_timeseries.rename(columns={'Province/State': 'state', 'Country/Region': 'country'})
# merge covid_timeseries and travel_ban datasets
covid_data = covid_timeseries.merge(travel_ban, on='country', how='left')
covid_data.columns = map(str.lower, covid_data.columns)

In [13]:
covid_data.head()

Unnamed: 0,state,country,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/2/21,5/3/21,5/4/21,5/5/21,5/6/21,5/7/21,5/8/21,code,restrictions_update_date,international_travel_controls
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,60122,60300,60563,60797,61162,61455,61755,AFG,2021-05-03,0.0
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,131238,131276,131327,131419,131510,131577,131666,ALB,2021-05-02,1.0
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,122522,122717,122999,123272,123473,123692,123900,DZA,2021-04-26,4.0
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,13282,13295,13316,13340,13363,13390,13406,AND,2021-04-27,1.0
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,26993,27133,27284,27529,27921,28201,28477,AGO,2021-04-05,2.0


In [14]:
# dropping duplicate values
covid_data.drop_duplicates(inplace=True)

#### Handling missing values

In [15]:
# Percentage of missing values in each column
round((covid_data.isnull().sum() / covid_data.shape[0] * 100).sort_values(ascending=False),2)

state                            68.73
restrictions_update_date          7.64
international_travel_controls     7.64
code                              7.64
lat                               0.73
                                 ...  
12/5/20                           0.00
12/6/20                           0.00
12/7/20                           0.00
12/8/20                           0.00
9/13/20                           0.00
Length: 480, dtype: float64

Let's explore each column and handle missing values on it

In [16]:
covid_data[covid_data['lat'].isna()]

Unnamed: 0,state,country,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/2/21,5/3/21,5/4/21,5/5/21,5/6/21,5/7/21,5/8/21,code,restrictions_update_date,international_travel_controls
52,Repatriated Travellers,Canada,,,0,0,0,0,0,0,...,13,13,13,13,13,13,13,CAN,2021-05-07,4.0
88,Unknown,China,,,0,0,0,0,0,0,...,0,0,0,0,0,0,0,CHN,2021-05-03,3.0


from state name, we can say that Those are no actual countries so we can delete them

In [17]:
covid_data.dropna(subset = ["lat"], inplace=True)

In [18]:
covid_data['international_travel_controls'].unique()

array([ 0.,  1.,  4.,  2., nan,  3.])

In [19]:
object_columns = ['state','code','restrictions_update_date']
for column in object_columns:
    covid_data[column].fillna('', inplace=True)

# we identify the countries with no data about restrictions with -1 in international_travel_controls column
covid_data['international_travel_controls'].fillna(-1, inplace=True)

# The rest of columns contains the number of confirmed cases per day, we can replace nan values by the last data about confirmed cases that we have
#covid_data.fillna(0,inplace=True)
covid_data['1/22/20'].fillna(0,inplace=True) # replace na values in the first column with 0
covid_data.iloc[:,4:-3].fillna(method='ffill',axis=1,inplace=True)

In [20]:
covid_data

Unnamed: 0,state,country,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/2/21,5/3/21,5/4/21,5/5/21,5/6/21,5/7/21,5/8/21,code,restrictions_update_date,international_travel_controls
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,60122,60300,60563,60797,61162,61455,61755,AFG,2021-05-03,0.0
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,131238,131276,131327,131419,131510,131577,131666,ALB,2021-05-02,1.0
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,122522,122717,122999,123272,123473,123692,123900,DZA,2021-04-26,4.0
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,13282,13295,13316,13340,13363,13390,13406,AND,2021-04-27,1.0
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,26993,27133,27284,27529,27921,28201,28477,AGO,2021-04-05,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
270,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,2962,2985,2995,3030,3090,3137,3245,VNM,2021-04-26,4.0
271,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,297638,298203,298921,299736,300387,300946,301437,,,-1.0
272,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,6341,6363,6390,6414,6426,6446,6462,YEM,2021-05-03,3.0
273,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,91693,91722,91804,91849,91946,92004,92057,ZMB,2021-05-02,1.0


In [21]:
# Percentage of missing values in each column
round((covid_data.isnull().sum() / covid_data.shape[0] * 100).sort_values(ascending=False),2)

international_travel_controls    0.0
restrictions_update_date         0.0
6/17/20                          0.0
6/18/20                          0.0
6/19/20                          0.0
                                ... 
12/7/20                          0.0
12/8/20                          0.0
12/9/20                          0.0
12/10/20                         0.0
state                            0.0
Length: 480, dtype: float64

## Travel restrictions around the world

In [22]:
restrictions_data = pd.DataFrame()
restrictions_data['country']=covid_data['country']
restrictions_data['last_restrictions']=covid_data['international_travel_controls']
# we need just countries that have data about restrictions
restrictions_data = restrictions_data[restrictions_data['last_restrictions']!=-1]
#restrictions_data = restrictions_data[restrictions_data['last_restrictions']!=0]

restrictions_data

Unnamed: 0,country,last_restrictions
0,Afghanistan,0.0
1,Albania,1.0
2,Algeria,4.0
3,Andorra,1.0
4,Angola,2.0
...,...,...
269,Venezuela,3.0
270,Vietnam,4.0
272,Yemen,3.0
273,Zambia,1.0


### Mapping restrictions data with world map

In [24]:
import requests, json, re, folium
from bs4 import BeautifulSoup

# dynamically get the world-country boundaries 
res = requests.get("https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json")
countries_shapes = pd.DataFrame(json.loads(res.content.decode()))
countries_shapes = countries_shapes.assign(id=countries_shapes["features"].apply(pd.Series)["id"],
         name=countries_shapes["features"].apply(pd.Series)["properties"].apply(pd.Series)["name"])




# adapt country names to geo data "world-countries.json"
restrictions_data.country = restrictions_data.country.replace({"US": "United States of America","Tanzania":"United Republic of Tanzania",
                                    "Congo (Brazzaville)":"Republic of the Congo","Congo (Kinshasa)":"Democratic Republic of the Congo",
                                    "Cote d'Ivoire":"Ivory Coast", "Guinea-Bissau":"Guinea Bissau","Czechia":"Czech Republic",
                                    "Serbia":"Republic of Serbia",'Bahamas':"The Bahamas","Guinea":"Guinea",
                                    "Korea, South":'South Korea',"Taiwan*":"Taiwan","Timor-Leste":"East Timor"})


# initialize a folium map
corona_map = folium.Map(location=[0, 0], zoom_start=3)


# creating a choropleth map by level of restrictions in differents countries

def style_fn(feature):
    """
       Define the color of a country by its level of travel restrictions {0, 1, 2, 3, 4}
       
    """
    country = feature['properties']['name']
    if country in restrictions_data[restrictions_data['last_restrictions']==0]['country'].values.tolist():
        style = {'fillColor': 'white', 'color': 'white','fillOpacity':0.8}
        return style
    if country in restrictions_data[restrictions_data['last_restrictions']==1]['country'].values.tolist():
        style = {'fillColor': '#ffffcc', 'color': '#00000000','fillOpacity':0.8}
        return style
    if country in restrictions_data[restrictions_data['last_restrictions']==2]['country'].values.tolist():
        style = {'fillColor': '#ffeda0', 'color': '#00000000','fillOpacity':0.8}
        return style
    if country in restrictions_data[restrictions_data['last_restrictions']==3]['country'].values.tolist():
        style = {'fillColor': '#fd8d3c', 'color': '#00000000','fillOpacity':0.7}
        return style
    if country in restrictions_data[restrictions_data['last_restrictions']==4]['country'].values.tolist():
        style = {'fillColor': '#bd0026', 'color': '#00000000','fillOpacity':0.8}
        return style
        

# overlay desired countries over folium map with a specific color

countries =  restrictions_data['country'].values.tolist()
for r in countries_shapes.loc[countries_shapes["name"].isin(countries)].to_dict(orient="records"):
    folium.GeoJson(r["features"], name=r["name"], tooltip=r["name"],style_function=style_fn).add_to(corona_map)




# Add legend to the map
template = """
{% macro html(this, kwargs) %}
<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
  <div id='maplegend' class='maplegend' 
    style='position: absolute; z-index:9999; border:2px solid grey; background-color:rgba(255, 255, 255, 0.8);
     border-radius:6px; padding: 10px; font-size:14px; right: 20px; bottom: 20px;'>
     
  <div class='legend-title'>International Travel Control</div>
  <div class='legend-scale'>
    <ul class='legend-labels'>
       <li><span style='background:grey;opacity:0.4;'></span>No Data.</li>
       <li><span style='background:white;opacity:0.8;'></span>No measures.</li>
       <li><span style='background:#ffffcc;opacity:0.9;'></span>Screening.</li>
       <li><span style='background:#ffeda0;opacity:0.9;'></span>Quarantine from high-risk regions.</li>
       <li><span style='background:#fd8d3c;opacity:0.8;'></span>Ban on high-risk regions.</li>
       <li><span style='background:#bd0026;opacity:1;'></span>Total border closure.</li>
    </ul>
  </div>
  </div>
 
</body>
</html>

<style type='text/css'>
  .maplegend .legend-title {
    text-align: left;
    margin-bottom: 5px;
    font-weight: bold;
    font-size: 90%;
    }
  .maplegend .legend-scale ul {
    margin: 0;
    margin-bottom: 5px;
    padding: 0;
    float: left;
    list-style: none;
    }
  .maplegend .legend-scale ul li {
    font-size: 80%;
    list-style: none;
    margin-left: 0;
    line-height: 18px;
    margin-bottom: 2px;
    }
  .maplegend ul.legend-labels li span {
    display: block;
    float: left;
    height: 16px;
    width: 30px;
    margin-right: 5px;
    margin-left: 0;
    border: 1px solid #999;
    }
  .maplegend .legend-source {
    font-size: 80%;
    color: #777;
    clear: both;
    }
  .maplegend a {
    color: #777;
    }
</style>
{% endmacro %}"""

macro = MacroElement()
macro._template = Template(template)

corona_map.get_root().add_child(macro)

corona_map

### Plot number of confirmed cases per country

In [25]:
for i in range(0,len(covid_data)):
    folium.Circle(
        location=[covid_data.iloc[i]['lat'], covid_data.iloc[i]['long']],
        fill=True,
        radius=(int((np.log(covid_data.iloc[i,-4]+0.00001)))+0.2)*20000,
        color='red',
        fill_color='indigo',
        tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+covid_data.iloc[i]['country'] + "</h4>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+str(covid_data.iloc[i]['state'] )+ "</h4>"+
                    "<hr style='margin:10px;color: white;'>"+
                    "<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
                    "<li>Confirmed cases: "+str(covid_data.iloc[i,-4])+"</li>"+   
                    "</ul></div>",
        ).add_to(corona_map)
    
# saving the map as an html file
covid_map=corona_map.save(outfile= "covid_map.html")      

corona_map

### The spread of covid over time

In [26]:
# creating a time_index with all registretad dates
time_index = []
for t in covid_data.columns[4:-3]:
    time_index.append(t)
time_index

['1/22/20',
 '1/23/20',
 '1/24/20',
 '1/25/20',
 '1/26/20',
 '1/27/20',
 '1/28/20',
 '1/29/20',
 '1/30/20',
 '1/31/20',
 '2/1/20',
 '2/2/20',
 '2/3/20',
 '2/4/20',
 '2/5/20',
 '2/6/20',
 '2/7/20',
 '2/8/20',
 '2/9/20',
 '2/10/20',
 '2/11/20',
 '2/12/20',
 '2/13/20',
 '2/14/20',
 '2/15/20',
 '2/16/20',
 '2/17/20',
 '2/18/20',
 '2/19/20',
 '2/20/20',
 '2/21/20',
 '2/22/20',
 '2/23/20',
 '2/24/20',
 '2/25/20',
 '2/26/20',
 '2/27/20',
 '2/28/20',
 '2/29/20',
 '3/1/20',
 '3/2/20',
 '3/3/20',
 '3/4/20',
 '3/5/20',
 '3/6/20',
 '3/7/20',
 '3/8/20',
 '3/9/20',
 '3/10/20',
 '3/11/20',
 '3/12/20',
 '3/13/20',
 '3/14/20',
 '3/15/20',
 '3/16/20',
 '3/17/20',
 '3/18/20',
 '3/19/20',
 '3/20/20',
 '3/21/20',
 '3/22/20',
 '3/23/20',
 '3/24/20',
 '3/25/20',
 '3/26/20',
 '3/27/20',
 '3/28/20',
 '3/29/20',
 '3/30/20',
 '3/31/20',
 '4/1/20',
 '4/2/20',
 '4/3/20',
 '4/4/20',
 '4/5/20',
 '4/6/20',
 '4/7/20',
 '4/8/20',
 '4/9/20',
 '4/10/20',
 '4/11/20',
 '4/12/20',
 '4/13/20',
 '4/14/20',
 '4/15/20',
 '4/16/

In [27]:
covid_data_daily =covid_data.copy()
# normalizing the number of confirmed cases data to be in range [0,1]
for date in time_index:
    
    covid_data_daily[date] = covid_data_daily[date]/covid_data_daily[date].max()


In [28]:
# list of all registrated cases by location and date (lat, long, date)
heat_data = [[[row['lat'],row['long'],row[date]] for index, row in covid_data_daily.iterrows()] for date in time_index]


In [29]:
heat_data[1]

[[33.93911, 67.709953, 0.0],
 [41.1533, 20.1683, 0.0],
 [28.0339, 1.6596, 0.0],
 [42.5063, 1.5218, 0.0],
 [-11.2027, 17.8739, 0.0],
 [17.0608, -61.7964, 0.0],
 [-38.4161, -63.6167, 0.0],
 [40.0691, 45.0382, 0.0],
 [-35.4735, 149.0124, 0.0],
 [-33.8688, 151.2093, 0.0],
 [-12.4634, 130.8456, 0.0],
 [-27.4698, 153.0251, 0.0],
 [-34.9285, 138.6007, 0.0],
 [-42.8821, 147.3272, 0.0],
 [-37.8136, 144.9631, 0.0],
 [-31.9505, 115.8605, 0.0],
 [47.5162, 14.5501, 0.0],
 [40.1431, 47.5769, 0.0],
 [25.025885, -78.035889, 0.0],
 [26.0275, 50.55, 0.0],
 [23.685, 90.3563, 0.0],
 [13.1939, -59.5432, 0.0],
 [53.7098, 27.9534, 0.0],
 [50.8333, 4.469936, 0.0],
 [17.1899, -88.4976, 0.0],
 [9.3077, 2.3158, 0.0],
 [27.5142, 90.4336, 0.0],
 [-16.2902, -63.5887, 0.0],
 [43.9159, 17.6791, 0.0],
 [-22.3285, 24.6849, 0.0],
 [-14.235, -51.9253, 0.0],
 [4.5353, 114.7277, 0.0],
 [42.7339, 25.4858, 0.0],
 [12.2383, -1.5616, 0.0],
 [21.9162, 95.956, 0.0],
 [-3.3731, 29.9189, 0.0],
 [16.5388, -23.0418, 0.0],
 [11.55, 1

In [31]:
#Put Data on Map
covid_heatmap= folium.Map(location=[0,0], zoom_start=3)

hm = HeatMapWithTime(heat_data[::-1],index=time_index,auto_play=True,radius=30,max_opacity=0.4,gradient={0.1: 'blue', 0.25: 'lime', 0.5:'yellow',0.75: 'red'})

hm.add_to(covid_heatmap)
covid_heatmap