In [1]:
import pandas as pd
import sys
import numpy as np
import geopandas as gpd
import fiona
fiona.supported_drivers  

pd.set_option('display.max_columns', None)
sys.path.append('../src')
from functions import *

# Geojson multipolygons

In this notebook we will prepare the geojson file with the multipolygon and polygon objects to match with our country ids and be able to draw a kepler map in streamlit.

In [2]:
# We import the file

countries_geo = gpd.read_file('../raw_data/countries.geojson')

In [3]:
countries_geo

Unnamed: 0,ADMIN,ISO_A3,geometry
0,Aruba,ABW,"POLYGON ((-69.99694 12.57758, -69.93639 12.531..."
1,Afghanistan,AFG,"POLYGON ((71.04980 38.40866, 71.05714 38.40903..."
2,Angola,AGO,"MULTIPOLYGON (((11.73752 -16.69258, 11.73851 -..."
3,Anguilla,AIA,"MULTIPOLYGON (((-63.03767 18.21296, -63.09952 ..."
4,Albania,ALB,"POLYGON ((19.74777 42.57890, 19.74601 42.57993..."
...,...,...,...
250,Samoa,WSM,"MULTIPOLYGON (((-171.57002 -13.93816, -171.564..."
251,Yemen,YEM,"MULTIPOLYGON (((53.30824 12.11839, 53.31027 12..."
252,South Africa,ZAF,"MULTIPOLYGON (((37.86378 -46.94085, 37.83644 -..."
253,Zambia,ZMB,"POLYGON ((31.11984 -8.61663, 31.14102 -8.60619..."


In [4]:
# We check the country names with our custom function:

countries_geo.ADMIN.apply(to_proper_country_name).value_counts()[:10]

189 Aruba Cuba max sim:67
157 American Samoa Syrian Arab Republic max sim:56
17 Antarctica Costa Rica max sim:60
176 Ashmore and Cartier Islands Marshall Islands max sim:62
23 French Southern and Antarctic Lands South Africa max sim:51
57 Bajo Nuevo Bank (Petrel Is.) Lao People’s Democratic Republic max sim:43
92 Saint Barthelemy Belize max sim:50
9 Bermuda Germany max sim:57
161 Clipperton Island Solomon Islands max sim:62
157 Cyprus No Mans Area Syrian Arab Republic max sim:53
176 Coral Sea Islands Marshall Islands max sim:67
53 Curaçao Ecuador max sim:57
161 Cayman Islands Solomon Islands max sim:69
157 Dhekelia Sovereign Base Area Syrian Arab Republic max sim:41
176 Falkland Islands Marshall Islands max sim:69
176 Faroe Islands Marshall Islands max sim:69
0 Federated States of Micronesia Micronesia max sim:62
9 Guernsey Germany max sim:67
73 Gibraltar Malta max sim:57
120 Guam Guatemala max sim:62
83 Hong Kong S.A.R. Mongolia max sim:45
161 Heard Island and McDonald Islands Solomon

Thailand            4
Marshall Islands    3
Cyprus              2
Angola              2
Solomon Islands     2
Congo               2
Ireland             2
Somalia             2
New Zealand         1
Nauru               1
Name: ADMIN, dtype: int64

In [5]:
# The repeated ones were countries with names similar to ours but not registered in our database.
# Let's pop them:

countries_geo = countries_geo.drop(index = [5,216,232,154,178,58,3,49,48,92,207]) 

In [6]:
# We change the coulmn definitely:

countries_geo.ADMIN = countries_geo.ADMIN.apply(to_proper_country_name)

countries_geo

189 Aruba Cuba max sim:67
157 American Samoa Syrian Arab Republic max sim:56
17 Antarctica Costa Rica max sim:60
176 Ashmore and Cartier Islands Marshall Islands max sim:62
23 French Southern and Antarctic Lands South Africa max sim:51
57 Bajo Nuevo Bank (Petrel Is.) Lao People’s Democratic Republic max sim:43
92 Saint Barthelemy Belize max sim:50
9 Bermuda Germany max sim:57
161 Clipperton Island Solomon Islands max sim:62
157 Cyprus No Mans Area Syrian Arab Republic max sim:53
176 Coral Sea Islands Marshall Islands max sim:67
53 Curaçao Ecuador max sim:57
161 Cayman Islands Solomon Islands max sim:69
157 Dhekelia Sovereign Base Area Syrian Arab Republic max sim:41
176 Falkland Islands Marshall Islands max sim:69
176 Faroe Islands Marshall Islands max sim:69
0 Federated States of Micronesia Micronesia max sim:62
9 Guernsey Germany max sim:67
73 Gibraltar Malta max sim:57
120 Guam Guatemala max sim:62
83 Hong Kong S.A.R. Mongolia max sim:45
161 Heard Island and McDonald Islands Solomon

Unnamed: 0,ADMIN,ISO_A3,geometry
0,Aruba (country not in ref list),ABW,"POLYGON ((-69.99694 12.57758, -69.93639 12.531..."
1,Afghanistan,AFG,"POLYGON ((71.04980 38.40866, 71.05714 38.40903..."
2,Angola,AGO,"MULTIPOLYGON (((11.73752 -16.69258, 11.73851 -..."
4,Albania,ALB,"POLYGON ((19.74777 42.57890, 19.74601 42.57993..."
6,Andorra,AND,"POLYGON ((1.70701 42.50278, 1.69750 42.49446, ..."
...,...,...,...
250,Samoa,WSM,"MULTIPOLYGON (((-171.57002 -13.93816, -171.564..."
251,Yemen,YEM,"MULTIPOLYGON (((53.30824 12.11839, 53.31027 12..."
252,South Africa,ZAF,"MULTIPOLYGON (((37.86378 -46.94085, 37.83644 -..."
253,Zambia,ZMB,"POLYGON ((31.11984 -8.61663, 31.14102 -8.60619..."


In [7]:
# Now we will identify all countries that are not in our dataset, and drop them:

to_pop = []

for i in countries_geo.ADMIN:
    if '(country not in ref list)' in i:
        to_pop.append(countries_geo[countries_geo.ADMIN == i].index[0])

countries_geo = countries_geo.drop(index = to_pop).reset_index(drop = True)

In [8]:
# Let's change the names to country_id

countries_geo.ADMIN = countries_geo.ADMIN.apply(change_to_country_id) 

In [9]:
countries_geo

Unnamed: 0,ADMIN,ISO_A3,geometry
0,146,AFG,"POLYGON ((71.04980 38.40866, 71.05714 38.40903..."
1,118,AGO,"MULTIPOLYGON (((11.73752 -16.69258, 11.73851 -..."
2,17,ALB,"POLYGON ((19.74777 42.57890, 19.74601 42.57993..."
3,147,AND,"POLYGON ((1.70701 42.50278, 1.69750 42.49446, ..."
4,71,ARE,"MULTIPOLYGON (((53.86305 24.23469, 53.88860 24..."
...,...,...,...
187,177,WSM,"MULTIPOLYGON (((-171.57002 -13.93816, -171.564..."
188,192,YEM,"MULTIPOLYGON (((53.30824 12.11839, 53.31027 12..."
189,20,ZAF,"MULTIPOLYGON (((37.86378 -46.94085, 37.83644 -..."
190,85,ZMB,"POLYGON ((31.11984 -8.61663, 31.14102 -8.60619..."


In [10]:
countries_geo.ADMIN.value_counts() # Nice! No repeated country_id

146    1
118    1
8      1
135    1
130    1
      ..
93     1
158    1
117    1
160    1
45     1
Name: ADMIN, Length: 192, dtype: int64

In [11]:
# We drop the column in the middle and rename the rest:

countries_geo = countries_geo.drop(columns = 'ISO_A3')
countries_geo.columns = ['country_id','geometry']

countries_geo # Great. This is ready to merge with oder tables to draw the kepler map

Unnamed: 0,country_id,geometry
0,146,"POLYGON ((71.04980 38.40866, 71.05714 38.40903..."
1,118,"MULTIPOLYGON (((11.73752 -16.69258, 11.73851 -..."
2,17,"POLYGON ((19.74777 42.57890, 19.74601 42.57993..."
3,147,"POLYGON ((1.70701 42.50278, 1.69750 42.49446, ..."
4,71,"MULTIPOLYGON (((53.86305 24.23469, 53.88860 24..."
...,...,...
187,177,"MULTIPOLYGON (((-171.57002 -13.93816, -171.564..."
188,192,"MULTIPOLYGON (((53.30824 12.11839, 53.31027 12..."
189,20,"MULTIPOLYGON (((37.86378 -46.94085, 37.83644 -..."
190,85,"POLYGON ((31.11984 -8.61663, 31.14102 -8.60619..."


In [12]:
# Let's save it:

countries_geo.to_file('../clean_data/countries_geo.geojson', driver='GeoJSON', index = False)