#  <span style="text-align=center; color:black; font-family:Georgia; font-size:1.1em;"> Goncalves Quentin - ZOOV Data engineer internship - python test </span>


__Goal__ : Write a Map-based visualization of a world-scale database

# <span style="color:black; font-family:Georgia; font-size:1em;"> 1. Importing the necessary libraries 


In [246]:
import pandas as pd
import numpy as np
import folium
import folium.plugins
import json
import geopandas as gpd


from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

# <span style="color:black; font-family:Georgia; font-size:1em;"> 2. Reading the datasets

# <span style="color:black; font-family:Georgia; font-size:0.8em;"> CSSE COVID-19 Dataset

The dataset contains daily time series summary tables, including confirmed, deaths and recovered. All data are from the daily case report.

Field descriptioin : 
- Province/State: China - province name; US/Canada/Australia/ - city name, state/province name; Others - name of the event (e.g., "Diamond Princess" cruise ship); other countries - blank.
- Country/Region: country/region name.
- Lat and Long: a coordinates reference for the user.
- Date fields: M/DD/YYYY (UTC), the same data as MM-DD-YYYY.csv file.


In [247]:
#COVID-1à confirmed case
covid_confirmed = pd.read_csv('time_series_19-covid-Confirmed.csv')

#COVID-1à death case
covid_death = pd.read_csv('time_series_19-covid-Deaths.csv')

#COVID-1à recovered case
covid_recovered = pd.read_csv('time_series_19-covid-Recovered.csv')

covid_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20
0,,Thailand,15.0,101.0,2,3,5,7,8,8,...,48,50,50,50,53,59,70,75,82,114
1,,Japan,36.0,138.0,2,1,2,2,4,4,...,420,461,502,511,581,639,639,701,773,839
2,,Singapore,1.2833,103.8333,0,1,3,3,4,5,...,130,138,150,150,160,178,178,200,212,226
3,,Nepal,28.1667,84.25,0,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,,Malaysia,2.5,112.5,0,0,0,3,4,4,...,83,93,99,117,129,149,149,197,238,428


In [248]:
#Read the json file containing the data about the countries
countries_geo = open('countries.geo.json')
countries_geo = countries_geo.read()
countries_geo = json.loads(countries_geo)

# <span style="color:black; font-family:Georgia; font-size:1em;"> 3. Cleaning data
    
The goal here is to have all the confirmed case of confirmed covid-19 by country

In [249]:
#Replacing the 'Country' value by the name of the country present in 'countries.geo.json' by using coordinate

#Get the country id of a point of coordinate (longitude, latitude) by using point and polygon
#In countries.geo.json countries are represented by polygon
def get_country(long,lat):
    #Point will be use to know if it is inside a polygon or not 
    point = Point(long, lat)
    
    #Iterating over all features of the countries wich are countries in this case
    for country in countries_geo.get('features'):
        coordinates = country.get('geometry').get('coordinates')
        
        #Coordinate can be store as a single polygon or as multiple polygon
        #So to navigate threw the data we have to do it differently
        if(country.get('geometry').get('type') == "Polygon"):
            #Transform the each coordinate represented as array in tuple to create a polygon
            for i in range(len(coordinates[0])):
                coordinates[0][i] = tuple(coordinates[0][i])
            polygon = Polygon(coordinates[0])
            
            #If the point is in a polygon representing the border of a country then we return the country id
            if(polygon.contains(point)):
                return country.get('id')
        
        if(country.get('geometry').get('type') == "MultiPolygon"):
            for polygon in coordinates:
                
                for i in range(len(polygon)):
                    polygon[0][i] = tuple(polygon[0][i])
                    
                polygon = Polygon(polygon[0])
                
                #return the name of the country if the point is 
                if(polygon.contains(point)):
                    return country.get('id')

In [250]:
#Setting the country column of the dataFrame to make it fit countrie.geo.json file 
#and to easly identified places that are in the same country

def cleaned_data(df):
    
    #We start by deleting the province/state column because it is useless if we compare the confirmed case by country
    df = df.drop('Province/State', axis=1)
    df = df.rename({'Country/Region': 'Country'}, axis=1)
    
    for index, row in df.iterrows():
    #get the country of the raw with coordinate and set it up as the 'Country' value in the dataframe
        df.loc[index,'Country'] = get_country(row['Long'] , row['Lat'])
        
    #Set the country id as index to sum all confirmed case in the same country 
    df = df.set_index(df['Country'])
    df = df.sum(level='Country')
    
    df.insert(loc=0, column='Country', value=df.index)
    
    return df

covid_confirmed = cleaned_data(covid_confirmed)
covid_death = cleaned_data(covid_death)
covid_recovered = cleaned_data(covid_recovered)

covid_confirmed

Unnamed: 0_level_0,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
THA,THA,15.0000,101.0000,2,3,5,7,8,8,14,...,48,50,50,50,53,59,70,75,82,114
JPN,JPN,142.3311,556.9140,2,1,2,2,4,4,7,...,1161,1202,1243,1252,1323,1381,1381,1443,1515,1581
MYS,MYS,3.7833,216.3333,0,1,3,6,8,9,11,...,213,231,249,267,289,327,327,397,450,654
NPL,NPL,28.1667,84.2500,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
CAN,CAN,413.8113,-727.9566,0,0,0,0,1,1,2,...,49,54,64,77,79,108,117,191,196,249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GUF,GUF,3.9339,-53.1258,0,0,0,0,0,0,0,...,0,5,5,5,5,5,5,5,5,7
CS-KM,CS-KM,42.6026,20.9030,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
CAF,CAF,6.6111,20.9394,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
GNQ,GNQ,1.5000,10.0000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [251]:
#Create a GeoDataFrame to display a pop-up that contains useful informations
gpd1 = gpd.read_file('countries.geo.json')
gpd1 = gpd1.set_index(gpd1['id'])
gpd1 = gpd1.drop('id',axis=1)

In [252]:
#Get the last columns wich cointain the most recent number of confirmed/recovered/deaths  cases and join it with
#the geoDataFrame that is use to display the pop-up
last_columns = covid_confirmed.columns[-1]
gpd1 = gpd1.merge(covid_confirmed[last_columns], right_index=True , left_index=True)
gpd1 = gpd1.rename({last_columns: 'Confirmed'}, axis=1)

last_columns = covid_recovered.columns[-1]
gpd1 = gpd1.merge(covid_recovered[last_columns], right_index=True , left_index=True)
gpd1 = gpd1.rename({last_columns: 'Recovered'}, axis=1)

last_columns = covid_death.columns[-1]
gpd1 = gpd1.merge(covid_death[last_columns], right_index=True , left_index=True)
gpd1 = gpd1.rename({last_columns: 'Deaths'}, axis=1)

gpd1

Unnamed: 0,name,geometry,Confirmed,Recovered,Deaths
AFG,Afghanistan,"POLYGON ((61.21082 35.65007, 62.23065 35.27066...",16,0,0
ALB,Albania,"POLYGON ((20.59025 41.85540, 20.46317 41.51509...",42,0,1
ARE,United Arab Emirates,"POLYGON ((51.57952 24.24550, 51.75744 24.29407...",98,23,0
ARG,Argentina,"MULTIPOLYGON (((-65.50000 -55.20000, -66.45000...",45,1,2
ARM,Armenia,"POLYGON ((43.58275 41.09214, 44.97248 41.24813...",26,0,0
...,...,...,...,...,...
UZB,Uzbekistan,"POLYGON ((66.51861 37.36278, 66.54615 37.97469...",1,0,0
VEN,Venezuela,"POLYGON ((-71.33158 11.77628, -71.36001 11.539...",10,0,0
VNM,Vietnam,"POLYGON ((108.05018 21.55238, 106.71507 20.696...",56,16,0
PSE,West Bank,"POLYGON ((35.54566 32.39399, 35.54525 31.78251...",0,0,0


# <span style="color:black; font-family:Georgia; font-size:1em;"> 4. Displaying map

In [253]:
m = folium.Map(location=[46.1313856,-2.4357072], zoom_start=1)

#This list will be use to define the color of propagation in a country
bins = [0, 100 , 1000, 5000, 10000, 50000,75000, 100000]

#All country in green are not in the dataset wich means that either there is no case
# either we don't know
choropleth = folium.Choropleth(
    geo_data=countries_geo,
    name='choropleth',
    data=covid_confirmed,
    columns=['Country', last_columns],
    key_on='feature.id',
    nan_fill_color='green',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Confirmed case ',
    bins=bins
).add_to(m)

#This geojson wil be use to display a pop-up with the information of a country (deaths/recovered/confirmed)
#The opacity will be set to 0 so that it don't interfeer with what we done before
folium.GeoJson(gpd1[['geometry','name','Confirmed','Recovered','Deaths']],
               name="Countries",
               style_function=lambda x: {"weight":0.25, 'color':'black' ,'fillOpacity':0},
               highlight_function=lambda x: {'weight':1, 'color':'black'}, 
              tooltip=folium.features.GeoJsonTooltip(fields=['name','Confirmed','Recovered', 'Deaths'],
                                              aliases=['Country','Confirmed','Recovered', 'Deaths']
                                             )
              ).add_to(m)

folium.LayerControl().add_to(m)

m