# Task 6.3: Geographical Visualisations

## This script contains:
1. Import libraries and data
2. Data wrangling & cleaning
3. Plotting a choropleth

# 1. Import Data and Libraries

## Libraries

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

In [None]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

## Data

#### Geojson file

In [None]:
#GEO Global
global_geo = r'C:/Users/M de Villiers/Desktop/Data Analytics/Achievement 6/countries.geojson'

In [None]:
# To look at the json file:

global_file = open(r'C:/Users/M de Villiers/Desktop/Data Analytics/Achievement 6/countries.geojson',)
  
# return JSON object as dictionary
global_json = json.load(global_file)

In [None]:
# Create path
path = r'C:\Users\M de Villiers\Desktop\Data Analytics\Achievement 6'

In [None]:
df=pd.read_csv(os.path.join(path, '02_Data', 'Prepared_Data', 'COVID-19_Final1.csv'), index_col=False)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

# 2. Data Wrangling & Cleaning

I cleaned and wrangled the data in previous exercises and final cleaning and wrangling I did in a seperate notebook.

In [None]:
pd.options.display.max_rows = None

In [None]:
# loop through the dictionary to obtain the name of the countries in the json file
denominations_json = []
for index in range(len(global_json['features'])):
    denominations_json.append(global_json['features'][index]['properties']['ADMIN'])
    
denominations_json

In [None]:
# Check that dataset country names match country names in geojson file
df['country'].value_counts().sort_index()

In [None]:
df['country'] = df['country'].replace(['Democratic Republic of Congo'],'Democratic Republic of the Congo')

In [None]:
df['country'].value_counts().sort_index()

In [None]:
df['country'] = df['country'].replace(["Cote d'Ivoire"],'Ivory Coast')
df['country'] = df['country'].replace(['Congo'],'Republic of Congo')

In [None]:
df['country'].value_counts().sort_index()

In [None]:
df['country'] = df['country'].replace(['Bahamas'],'The Bahamas')
df['country'] = df['country'].replace(['Faeroe Islands'],'Faroe Islands')
df['country'] = df['country'].replace(['Micronesia (country)'],'Federated States of Micronesia')
df['country'] = df['country'].replace(['Guinea-Bissau'],'Guinea Bissau')
df['country'] = df['country'].replace(['Hong Kong'],'Hong Kong S.A.R.')
df['country'] = df['country'].replace(['Macao'],'Macao S.A.R.')
df['country'] = df['country'].replace(['North Macedonia'],'Macedonia')
df['country'] = df['country'].replace(['Pitcairn'],'Pitcairn Islands')
df['country'] = df['country'].replace(['Serbia'],'Republic of Serbia')
df['country'] = df['country'].replace(['Sint Maarten (Dutch part)'],'Sint Maarten')
df['country'] = df['country'].replace(['Timor'],'East Timor')
df['country'] = df['country'].replace(['United States'],'United States of America')

In [None]:
df['country'].value_counts().sort_index()

In [None]:
df['%deaths'] = df['total_deaths'] / df['population'] * 100

In [None]:
df['%cases'] = df['total_cases'] / df['population'] * 100

In [None]:
df.head(10)

In [None]:
df.to_csv(os.path.join(path, '02_Data', 'Prepared_Data', 'COVID_19_DATA.csv'), index=False)

# 3. Plotting a Choropleth (Question 7)

### People Vaccinated (fully or with at least one shot)

#### People fully vaccinated by hundred

In [None]:
#Data to plot
data_to_plot = df.groupby(['country'], as_index=False).agg(people_fully_vaccinated_per_hundred = ('people_fully_vaccinated_per_hundred', 'max'))
data_to_plot.head()

In [None]:
map = folium.Map(location = [100, 0], zoom_start = 1.5)

folium.Choropleth(
    geo_data = global_geo, 
    data = data_to_plot,
    columns = ['country', 'people_fully_vaccinated_per_hundred'],
    key_on = 'feature.properties.ADMIN',
    fill_color = 'YlGnBu', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "% people fully vaccinated").add_to(map)
folium.LayerControl().add_to(map)

map

In [None]:
map.save('people fully vaccinated per hundred.html')

#### People vaccinated by 100 (had at least one shot)

In [None]:
#Data to plot1
data_to_plot1 = df.groupby(['country'], as_index=False).agg(people_vaccinated_per_hundred = ('people_vaccinated_per_hundred', 'max'))
data_to_plot1.head()

In [None]:
map1 = folium.Map(location = [100, 0], zoom_start = 1.5)

folium.Choropleth(
    geo_data = global_geo, 
    data = data_to_plot1,
    columns = ['country', 'people_vaccinated_per_hundred'],
    key_on = 'feature.properties.ADMIN',
    fill_color = 'YlGnBu', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "people vaccinated per hundred").add_to(map1)
folium.LayerControl().add_to(map1)
map1.save('people vaccinated per hundred.html')
map1

### Total Vaccinations

In [None]:
#Data to plot2
data_to_plot2 = df.groupby(['country'], as_index=False).agg(total_vaccinations_per_hundred = ('total_vaccinations_per_hundred', 'max'))
data_to_plot2.head()

In [None]:
# Setup a folium map at a high-level zoom
map2 = folium.Map(location = [100, 0], zoom_start = 1.5)

folium.Choropleth(
    geo_data = global_geo, 
    data = data_to_plot2,
    columns = ['country', 'total_vaccinations_per_hundred'],
    key_on = 'feature.properties.ADMIN',
    fill_color = 'PuRd', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "total vaccinations per hundred").add_to(map2)
folium.LayerControl().add_to(map2)
map2.save('total vaccinations per hundred.html')
map2

### Deaths

#### Total deaths

In [None]:
#Data to plot3
data_to_plot3 = df.groupby(['country'], as_index=False).agg(total_deaths = ('total_deaths', 'max'))
data_to_plot3.head()

In [None]:
map3 = folium.Map(location = [100, 0], zoom_start = 1.5)

folium.Choropleth(
    geo_data = global_geo, 
    data = data_to_plot3,
    columns = ['country', 'total_deaths'],
    key_on = 'feature.properties.ADMIN',
    fill_color = 'YlOrRd', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "total deaths").add_to(map3)
folium.LayerControl().add_to(map3)
map3.save('total deaths.html')
map3

#### Death rate (perscentage)

In [None]:
#Data to plot4
data_to_plot4 = df.groupby(['country'], as_index=False).agg(death_rate = ('%deaths', 'max'))
data_to_plot4.head()

In [None]:
map4 = folium.Map(location = [100, 0], zoom_start = 1.5)

folium.Choropleth(
    geo_data = global_geo, 
    data = data_to_plot4,
    columns = ['country', 'death_rate'],
    key_on = 'feature.properties.ADMIN',
    fill_color = 'YlOrRd', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "Percetage of Deaths").add_to(map4)
folium.LayerControl().add_to(map4)
map2.save('Percetage of Deaths.html')
map4

### Question 8: Discuss the results and what they mean in a markdown section.
#### Does the analysis answer any of your existing research questions?
#### Does the analysis lead you to any new research questions?

#### My initial Questions
1.	Which countries are using which vaccines? This question cannot be answered with the above choropleth maps, although I would like to learn how to plot categorical data unto a map, e.g. assign a colour to each vaccine combination and then plot the countries with the corresponding colour and have a legend explaining what each colour means (which combination of vaccines).
2.	How has the vaccine rollouts been progressing in each country? This question was partly answered in the previous exercise although plotting 223 countries on a line chart was a bit chaotic.
3.	Which countries are more advanced and why? This question can be answered with the first 3 maps (see below)
4.	Which countries had the first batch of vaccines? This was answered in the previous exercise.
5.	Eventually, I would also want to know what the effect of the vaccines have been on the COVID-19 situation worldwide? This might be explored when a second dataset about daily covid-19 deaths can be introduced and merged. I would need to do another time series with both deaths and vaccines to see how both variables change over time. I could also do a correlation matrix to see if there is a correlation, specifically a negative relationship (as vaccines increase, deaths decrease).
#### Questions that can be answered by the maps and further analysis:
3. When looking at the first 3 maps, a few countries stand out as being most advanced in the whole vaccination process. 

As far as I can tell the following countries are the forerunners when it comes to people _fully vaccinated_: Brunei, Canada, Cambodia, Chile, China, Denmark, Faroe Islands, Portugal, Spain, and United Arab Emirates.

The following countries are the forerunners when it comes to people _vaccinated with at least one shot_: Australia, Argentina, Brazil, Brunei, Canada, Cambodia, Chile, China, Cuba, Denmark, Faroe Islands, Iceland, Italy, New Zealand, Portugal, Spain, Seychelles, United Arab Emirates, United States of America, and Uruguay.

In terms of _total vaccinations_ administered, the following countries are the forerunners (this differ so much probably due to booster shots which people who are fully vaccinated can get and this won't increase the number of fully vaccinated people): Chile, Cuba, Denmark, Portugal, South Korea, Sri Lanka, and United Arab Emirates.

In general, Africa as a continent have been progressing very poorly with administering vaccines. I am especially shocked at the low vaccination rate of Algeria, Egypt, Libia, Gabon and South Africa, since they are on the top 10 list of most developed countries in Africa. Even Rwanda which is not on this top 10 list of most developed countries, are more advanced in the vaccination process.

#### New Questions:
Since my 5th question is to determine if the vaccines had an effect on the death rate, I decided to map out the deaths since new questions that were raised here was: 
1. How has covid-19 affected each country in terms of total deaths and death rate (proportion of the population)?
2. Which countries were most affected regarding deaths and death rate?

These were answered by plotting the data in a choropleth map:

When looking at total deaths, the USA and Brazil seem to have the most total deaths, but when looking at the death rate (proportion of population that died of covid-19), Peru, Bosnia and Herzegovina, Bulgaria, Hungary, Montenegro, and North Macedonia was hit hardest by covid-19 when looking at deaths.

These countries will be my center of investigation when looking at how vaccinations have affected deaths over time.

I will also be looking at this on a global level since my hypothesis is: "The more people have been fully vaccinated, the less deaths have occured" or more general "The more vaccinations have been administered, how less daily deaths have been reported."