#  <span style="text-align=center; color:black; font-family:Georgia; font-size:1.1em;"> Goncalves Quentin - ZOOV Data engineer internship - python test </span>


__Goal__ : Write a Map-based visualization of a world-scale database

# <span style="color:black; font-family:Georgia; font-size:1em;"> 1. Importing the necessary libraries 


In [20]:
import pandas as pd
import numpy as np
import folium
import folium.plugins
import json
import geopandas as gpd


from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

# <span style="color:black; font-family:Georgia; font-size:1em;"> 2. Reading the datasets

# <span style="color:black; font-family:Georgia; font-size:0.8em;"> CSSE COVID-19 Dataset

The dataset contains daily time series summary tables, including confirmed, deaths and recovered. All data are from the daily case report.

Field descriptioin : 
- Province/State: China - province name; US/Canada/Australia/ - city name, state/province name; Others - name of the event (e.g., "Diamond Princess" cruise ship); other countries - blank.
- Country/Region: country/region name.
- Lat and Long: a coordinates reference for the user.
- Date fields: M/DD/YYYY (UTC), the same data as MM-DD-YYYY.csv file.


In [21]:
#COVID-1à confirmed case
covid_confirmed = pd.read_csv('time_series_covid19_confirmed_global.csv')

#COVID-1à death case
covid_death = pd.read_csv('time_series_covid19_deaths_global.csv')

#COVID-1à recovered case
covid_recovered = pd.read_csv('time_series_covid19_recovered_global.csv')

covid_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,21,22,22,22,24,24,40,40,74,84
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,51,55,59,64,70,76,89,104,123,146
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,54,60,74,87,90,139,201,230,264,302
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,2,39,39,53,75,88,113,133,164,188
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,0,0,1,2,2,3,3,3


In [22]:
#Read the json file containing the data about the countries
countries_geo = open('countries.geo.json')
countries_geo = countries_geo.read()
countries_geo = json.loads(countries_geo)

# <span style="color:black; font-family:Georgia; font-size:1em;"> 3. Cleaning data
    
The goal here is to have all the confirmed case of confirmed covid-19 by country

In [23]:
#Replacing the 'Country' value by the name of the country present in 'countries.geo.json' by using coordinate

#Get the country id of a point of coordinate (longitude, latitude) by using point and polygon
#In countries.geo.json countries are represented by polygon
def get_country(long,lat):
    #Point will be use to know if it is inside a polygon or not 
    point = Point(long, lat)
    
    #Iterating over all features of the countries wich are countries in this case
    for country in countries_geo.get('features'):
        coordinates = country.get('geometry').get('coordinates')
        
        #Coordinate can be store as a single polygon or as multiple polygon
        #So to navigate threw the data we have to do it differently
        if(country.get('geometry').get('type') == "Polygon"):
            #Transform the each coordinate represented as array in tuple to create a polygon
            for i in range(len(coordinates[0])):
                coordinates[0][i] = tuple(coordinates[0][i])
            polygon = Polygon(coordinates[0])
            
            #If the point is in a polygon representing the border of a country then we return the country id
            if(polygon.contains(point)):
                return country.get('id')
        
        if(country.get('geometry').get('type') == "MultiPolygon"):
            for polygon in coordinates:
                
                for i in range(len(polygon)):
                    polygon[0][i] = tuple(polygon[0][i])
                    
                polygon = Polygon(polygon[0])
                
                #return the name of the country if the point is 
                if(polygon.contains(point)):
                    return country.get('id')

In [24]:
#Setting the country column of the dataFrame to make it fit countrie.geo.json file 
#and to easly identified places that are in the same country

def cleaned_data(df):
    
    #We start by deleting the province/state column because it is useless if we compare the confirmed case by country
    df = df.drop('Province/State', axis=1)
    df = df.rename({'Country/Region': 'Country'}, axis=1)
    
    for index, row in df.iterrows():
    #get the country of the raw with coordinate and set it up as the 'Country' value in the dataframe
        df.loc[index,'Country'] = get_country(row['Long'] , row['Lat'])
        
    #Set the country id as index to sum all confirmed case in the same country 
    df = df.set_index(df['Country'])
    df = df.sum(level='Country')
    
    df.insert(loc=0, column='Country', value=df.index)
    
    return df

covid_confirmed = cleaned_data(covid_confirmed)
covid_death = cleaned_data(covid_death)
covid_recovered = cleaned_data(covid_recovered)

covid_confirmed

Unnamed: 0_level_0,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFG,AFG,33.000000,65.000000,0,0,0,0,0,0,0,...,21,22,22,22,24,24,40,40,74,84
ALB,ALB,41.153300,20.168300,0,0,0,0,0,0,0,...,51,55,59,64,70,76,89,104,123,146
DZA,DZA,28.033900,1.659600,0,0,0,0,0,0,0,...,54,60,74,87,90,139,201,230,264,302
FRA,FRA,132.467200,11.152200,0,0,2,3,3,3,4,...,6642,7698,9089,10931,12698,14381,16154,20012,22491,25452
AGO,AGO,-11.202700,17.873900,0,0,0,0,0,0,0,...,0,0,0,0,1,2,2,3,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LAO,LAO,19.856270,102.495496,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2,3
LBY,LBY,26.335100,17.228331,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
PSE,PSE,31.952200,35.233200,0,0,0,0,0,0,0,...,38,39,41,44,47,48,52,59,59,-1
GNB,GNB,11.803700,-15.180400,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2


In [25]:
#Create a GeoDataFrame to display a pop-up that contains useful informations
gpd1 = gpd.read_file('countries.geo.json')
gpd1 = gpd1.set_index(gpd1['id'])
gpd1 = gpd1.drop('id',axis=1)

In [26]:
#Get the last columns wich cointain the most recent number of confirmed/recovered/deaths  cases and join it with
#the geoDataFrame that is use to display the pop-up
last_columns = covid_confirmed.columns[-1]
gpd1 = gpd1.merge(covid_confirmed[last_columns], right_index=True , left_index=True)
gpd1 = gpd1.rename({last_columns: 'Confirmed'}, axis=1)

last_columns = covid_recovered.columns[-1]
gpd1 = gpd1.merge(covid_recovered[last_columns], right_index=True , left_index=True)
gpd1 = gpd1.rename({last_columns: 'Recovered'}, axis=1)

last_columns = covid_death.columns[-1]
gpd1 = gpd1.merge(covid_death[last_columns], right_index=True , left_index=True)
gpd1 = gpd1.rename({last_columns: 'Deaths'}, axis=1)

gpd1.head()

Unnamed: 0,name,geometry,Confirmed,Recovered,Deaths
AFG,Afghanistan,"POLYGON ((61.21082 35.65007, 62.23065 35.27066...",84,1,2
AGO,Angola,"MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6...",3,0,0
ALB,Albania,"POLYGON ((20.59025 41.85540, 20.46317 41.51509...",146,10,5
ARE,United Arab Emirates,"POLYGON ((51.57952 24.24550, 51.75744 24.29407...",333,45,2
ARG,Argentina,"MULTIPOLYGON (((-65.50000 -55.20000, -66.45000...",387,52,8


# <span style="color:black; font-family:Georgia; font-size:1em;"> 4. Displaying map

In [29]:
m = folium.Map(location=[46.1313856,-2.4357072], zoom_start=1)


#All country in green are not in the dataset wich means that either there is no case
# either we don't know
choropleth = folium.Choropleth(
    geo_data=countries_geo,
    name='choropleth',
    data=covid_confirmed,
    columns=['Country', last_columns],
    key_on='feature.id',
    nan_fill_color='green',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Confirmed case ',
).add_to(m)

#This geojson wil be use to display a pop-up with the information of a country (deaths/recovered/confirmed)
#The opacity will be set to 0 so that it don't interfeer with what we done before
folium.GeoJson(gpd1[['geometry','name','Confirmed','Recovered','Deaths']],
               name="Countries",
               style_function=lambda x: {"weight":0.25, 'color':'black' ,'fillOpacity':0},
               highlight_function=lambda x: {'weight':1, 'color':'black'}, 
              tooltip=folium.features.GeoJsonTooltip(fields=['name','Confirmed','Recovered', 'Deaths'],
                                              aliases=['Country','Confirmed','Recovered', 'Deaths']
                                             )
              ).add_to(m)

folium.LayerControl().add_to(m)

m