# Prepare harmonized geo entities table

In this notebook we grab the current countries from the mysql countries table, clean and enrich it and export it

In [1]:
from pathlib import Path

In [2]:
df : pd.DataFrame = None

In [3]:
%%mysql

select * from country_name_tool_countrydata

In [4]:
df.sort_values("owid_name", inplace=True)

In [5]:
df.reset_index(drop=True, inplace=True)

In [6]:
Path("intermediate/").mkdir(exist_ok=True)

In [7]:
df.to_feather("intermediate/01-countries-from-mysql.feather")

## Get and save geo entities from entities table

Get and save all entities that have a code assigned (will be merged into the countries table in a later notebook)

In [8]:
%%mysql -o entities_with_code

select e.id as id, e.code as code, e.name as name 
from entities e
where e.code is not null

In [9]:
entities_with_code.sort_values("name", inplace=True)

In [10]:
entities_with_code.reset_index(drop=True, inplace=True)

In [11]:
entities_with_code.to_feather("intermediate/entities-with-code.feather")

## Get and save geo entities via population table

For sanity checking, get all entities that are referenced in the popular gapminder population dataset (variable 72). Will be merged into the countries table in a later notebook

In [12]:
%%mysql -o entities_from_population

select d.entityId as id, e.code as code, e.name as name
from data_values d 
left join entities e on e.id = d.entityId 
where d.variableId = 72
group by d.entityId 

In [13]:
entities_from_population.sort_values("name", inplace=True)

In [14]:
entities_from_population.reset_index(drop=True, inplace=True)

In [15]:
entities_from_population.to_feather("intermediate/entities-from-population.feather")