# 01 - First Exploration

First exploration of the data.

## Preliminaires 

### System 

In [None]:
cd ../

In [None]:
pwd

### Imports

In [None]:
import os, sys, logging

from dataclasses import dataclass

In [None]:
from IPython.display import display, HTML

In [None]:
import numpy as np
import pandas as pd

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import missingno as msno

In [None]:
from gbs.etl.extract import Extract

### Data

In [None]:
extract = Extract()
extract

In [None]:
data_dir = os.path.join(
    # os.getcwd(),
    extract.base,
    extract.folder,
    extract.subfolder,
)
data_dir

In [None]:
# # get data
# if (not os.path.exists(data_dir)) or  (not os.listdir(data_dir)):
#     extract.get_all(clean=True, include_production=True)

In [None]:
# extract.get_all(clean=True, production=True)

In [None]:
data_dir = "./data/source/"
os.listdir(data_dir)

In [None]:
!rm data/source/production.zip

In [None]:
!tree -L 1 ./

In [None]:
!tree -L 3 data/

In [None]:
crops = pd.read_csv(os.path.join(data_dir, "crops.csv"))
crops

In [None]:
country_specs = pd.read_csv(os.path.join(data_dir, "country_specs.csv"))
country_specs.head()

In [None]:
_path = "./data/source/production/"

fn_list = [
    os.path.join(_path, f) for f in os.listdir(_path) if f.endswith(".csv")
]
fn_list

In [None]:
# for fn in fn_list:
#     # read file
#     with open(fn, "r", encoding="latin-1") as f:
#         txt = f.read()

#     # clean file
#     txt.replace(",'", ",")

#     # write file
#     with open(fn, "w", encoding="utf8") as f:
#         f.write(txt)

In [None]:
_path = "./data/source/production/"

In [None]:
@dataclass
class Production:
    """Production data for a crop in a country in a year"""

    areacodes = pd.read_csv(os.path.join(_path, "areacodes.csv"))
    data_normalized = pd.read_csv(os.path.join(_path, "data_normalized.csv"))
    flags = pd.read_csv(os.path.join(_path, "flags.csv"))
    itemcodes = pd.read_csv(os.path.join(_path, "itemcodes.csv"))

## Exploration 

### Country Specs 

#### Display

In [None]:
country_specs.head(10)

In [None]:
country_specs.tail(10)

In [None]:
country_specs.sample(10)

#### Structure

In [None]:
country_specs.shape

In [None]:
country_specs.columns

In [None]:
country_specs.dtypes

In [None]:
country_specs.info()

In [None]:
crops.dtypes.value_counts()

In [None]:
for dtype in ["object", "float", "int"]:
    selected_dtype = country_specs.select_dtypes(include=[dtype])
    display(selected_dtype.columns)

In [None]:
_num = country_specs.select_dtypes(include=["number"])
_num

In [None]:
_num_cols = _num.columns.tolist()
[i for i in _num_cols if ("code" not in i) and ("id" not in i)]

**Conclusion**

No data in the crop dataset.

#### Nan

In [None]:
crops.isna().sum()

In [None]:
tmp = crops.isna().mean().round(2)
tmp[tmp > 0.00]

In [None]:
len(tmp[tmp > 0.00]) / len(tmp)

In [None]:
tmp = crops.isna().mean(axis=1).round(2)
tmp.value_counts().sort_index()

In [None]:
msno.matrix(country_specs)

**Conclusion**

- delete is_EU27 and is_south_america

In [None]:
# delete is_EU27 and is_south_america

In [None]:
drop_cols = ["is_EU27", "is_south_america"]
country_specs = country_specs.drop(columns=drop_cols)
msno.matrix(country_specs)

In [None]:
tmp = country_specs.isna().mean(axis=1).round(2)
tmp.value_counts().sort_index()

In [None]:
threshold = 0.2
tmp[tmp >= threshold]

In [None]:
drop_idxs = tmp[tmp > threshold].index
country_specs.loc[drop_idxs, :]

**Conclusion**

- countries with Nan rate > 0.3 are Island or -100k pop (Monaco, Antigua)

In [None]:
country_specs = country_specs.drop(
    index=drop_idxs, columns=drop_cols, errors="ignore"
)
msno.matrix(country_specs)

In [None]:
num_cols = country_specs.select_dtypes(
    include=["float", "int"]
).columns.tolist()

num_cols = [i for i in num_cols if "code" not in i]

categ_cols = [
    "alpha_3_code",
    "FAO_country_name",
    "exiobase_region_name",
    "globio_country_code",
    "globio_country_name",
    "USS30_region_name",
]


country_specs = country_specs.loc[:, num_cols + categ_cols]

In [None]:
country_specs

**Conclusion**

- No relevant data in the crop dataset

#### Data Inspection

In [None]:
country_specs.FAO_country_name.value_counts()

In [None]:
country_specs.FAO_country_name.nunique()

In [None]:
country_specs.USS30_region_name.value_counts()

In [None]:
country_specs.FAO_country_name.value_counts().value_counts()

In [None]:
country_specs.groupby("exiobase_region_name").FAO_country_name.count()

#### DataSet Conclusion

- No relevant data in the crop dataset

### Crops 

#### Wheat Selection

In [None]:
feature = "Wheat"
crops = crops.loc[crops.item_name == feature, :]

In [None]:
item_cols = [i for i in crops.columns if "item" in i]
item_cols

In [None]:
crops.drop(columns=item_cols, inplace=True, errors="ignore")
crops

In [None]:
crops

#### Display

In [None]:
crops.head(10)

In [None]:
crops.tail(10)

In [None]:
crops.sample(10)

#### Structure

In [None]:
crops.drop(columns="id", inplace=True, errors="ignore")

In [None]:
crops.shape

In [None]:
crops.columns

In [None]:
crops.dtypes

In [None]:
crops.info()

In [None]:
crops.dtypes.value_counts()

In [None]:
for dtype in ["object", "float", "int"]:
    selected_dtype = crops.select_dtypes(include=[dtype])
    display(selected_dtype.columns)

In [None]:
_num = crops.select_dtypes(include=["number"])
_num

In [None]:
_feat_cols = [i for i in _num.columns if "msa" in i]
_feat_cols

#### Separation Static/Dynamic

In [None]:
categ_cols = [i for i in crops.columns if "msa" not in i]
static_cols = [i for i in crops.columns if "static" in i]
dynamic_cols = [i for i in crops.columns if "dynamic" in i]

display(categ_cols)
display(static_cols)
display(dynamic_cols)

In [None]:
crops_static = crops.loc[:, categ_cols + static_cols]
crops_static.head()

In [None]:
crops_dynamic = crops.loc[:, categ_cols + dynamic_cols]
crops_dynamic

# BE CAREFULL => in the report we do have to distinguish between terestrial static, dynamic and marine static, dynamic

#### Summize

crops_static

In [None]:
crops_static

In [None]:
sum_static = crops_static.iloc[:, 1:].sum(axis=1)
sum_static

In [None]:
crops_static = crops_static.iloc[:, :1]
crops_static["sum_static"] = sum_static.values

#### Data Inspection

In [None]:
crops_static.describe()

In [None]:
crops.globio_country_code.value_counts()

In [None]:
crops.globio_country_code.nunique()

**Conclusion**

????

#### Merge Country Specs and crops

In [None]:
country_specs

In [None]:
# merged = pd.merge(left=_country_specs, right=crops, on="globio_country_code",how="left")
# merged

In [None]:
tmp = country_specs.loc[:, ["globio_country_code", "globio_country_name"]]
tmp.index = tmp.globio_country_code
tmp.drop(columns="globio_country_code", inplace=True)
tmp.index.name = None
tmp = tmp.to_dict().get("globio_country_name")
tmp

In [None]:
crops_static["globio_country_name"] = crops_static.globio_country_code.apply(
    lambda i: tmp.get(i, np.nan)
)
crops_static

#### Nan

In [None]:
crops_static.isna().sum()

**Conclusion**

???

**Conclusion**
???

## Production

#### Table Analysis

In [None]:
Production.areacodes

In [None]:
Production.flags

In [None]:
flags = {
    k: v
    for k, v in zip(
        Production.flags.Flag.values, Production.flags.Description.values
    )
}
flags

In [None]:
Production.itemcodes

In [None]:
data = Production.data_normalized
data

In [None]:
data["Flag_value"] = data.Flag.apply(lambda i: flags[i])
data

In [None]:
data.columns

#### Feature selection

In [None]:
cols = [
    # "Area Code",
    # "Area Code (M49)",
    "Area",
    # "Item Code",
    # "Item Code (CPC)",
    "Item",
    # "Element Code",
    "Element",
    # "Year Code",
    "Year",
    "Unit",
    "Value",
    # "Flag",
    # "Note",
    "Flag_value",
]

In [None]:
data = data.loc[:, cols]
data

In [None]:
data.Element.nunique()

In [None]:
data.Element.value_counts()

In [None]:
data.Item.value_counts()

In [None]:
data_weat = data.loc[data.Item.str.lower().str.contains("wheat"), :]
data_weat

In [None]:
data_weat_2019 = data_weat.loc[data_weat.Year == 2019, :]
data_weat_2019

In [None]:
data_weat_2019_h = data_weat_2019.loc[data_weat_2019.Unit == "ha", :]

In [None]:
data_weat_2019_h

In [None]:
data_weat_2019_h.columns

In [None]:
cols = [
    "Area",
    # "Item",
    # "Element",
    # "Year",
    # "Unit",
    "Value",
    "Flag_value",
]

data_weat_2019_h = data_weat_2019_h.loc[:, cols]
data_weat_2019_h

#### Display 

In [None]:
data_weat_2019_h.head(10)

In [None]:
data_weat_2019_h.tail(10)

In [None]:
data_weat_2019_h.sample(10)

#### Structure

In [None]:
data_weat_2019_h.shape

In [None]:
data_weat_2019_h.info()

#### Nan

In [162]:
data_weat_2019_h.isna().sum()

Area          0
Value         0
Flag_value    0
dtype: int64

In [163]:
data_weat_2019_h

Unnamed: 0,Area,Value,Flag_value
11474,Afghanistan,2334000.0,Official figure
30465,Albania,57330.0,Official figure
51223,Algeria,1974987.0,Official figure
64909,Angola,3357.0,Estimated value
99174,Argentina,6050953.0,Official figure
...,...,...,...
4044324,Small Island Developing States,167.0,Estimated value
4051501,Low Income Food Deficit Countries,32141.0,Estimated value
4082626,Low Income Food Deficit Countries,8732855.0,Official figure
4090319,Net Food Importing Developing Countries,33608.0,Estimated value


data_weat_2019_h.shape

## Final Merge

### Keys Analysis

In [188]:
data_weat_2019_h.sort_values("Area", inplace=True, ascending=True)
data_weat_2019_h.rename(
    columns={"Value": "km2"}, inplace=True, errors="ignore"
)
data_weat_2019_h

Unnamed: 0,Area,km2,Flag_value
11474,Afghanistan,2334000.0,Official figure
3180706,Africa,9647204.0,Estimated value
3149126,Africa,21795.0,Estimated value
30465,Albania,57330.0,Official figure
51223,Algeria,1974987.0,Official figure
...,...,...,...
3141552,World,215748027.0,Official figure
3105324,World,1838123.0,Estimated value
3052765,Yemen,57466.0,Official figure
3077044,Zambia,22706.0,Official figure


####### BE CARREFULLL KM2 is supposed

In [189]:
crops_static.sort_values("globio_country_name", ascending=True, inplace=True)
crops_static

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crops_static.sort_values("globio_country_name", ascending=True, inplace=True)


Unnamed: 0,globio_country_code,sum_static,globio_country_name
0,40,0.002951,Austria
59,56,0.001293,Belgium
105,100,0.002051,Bulgaria
187,191,0.001951,Croatia
250,196,0.00502,Cyprus
318,203,0.002068,Czech Republic
380,208,0.001343,Denmark
422,233,0.002742,Estonia
455,246,0.006413,Finland
488,250,0.001674,France


In [190]:
country_specs.sort_values("FAO_country_name", ascending=True, inplace=True)
country_specs

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code,globio_country_name,USS30_region_name
0,45.0,AFG,Afghanistan,RoW Asia and Pacific,4,Afghanistan,Rest S Asia
1,47.0,ALB,Albania,RoW Europe,8,Albania,C.Europe
2,48.0,DZA,Algeria,RoW Africa,12,Algeria,N.Africa
3,45.0,ASM,American Samoa,RoW Asia and Pacific,16,American Samoa,Oceania
5,48.0,AGO,Angola,RoW Africa,24,Angola,Rest S Africa
...,...,...,...,...,...,...,...
236,45.0,WLF,Wallis and Futuna Islands,RoW Asia and Pacific,876,Wallis and Futuna,Oceania
237,48.0,ESH,Western Sahara,RoW Africa,732,Western Sahara,N.Africa
238,49.0,YEM,Yemen,RoW Middle East,887,Yemen,M.East
239,48.0,ZMB,Zambia,RoW Africa,894,Zambia,Rest S Africa


In [191]:
crops_static.shape

(27, 3)

In [192]:
data_weat_2019_h.shape

(202, 3)

In [193]:
country_specs.shape

(222, 7)

### data vs country ON FAO_country_name

In [194]:
merge1 = pd.merge(
    left=country_specs,
    right=data_weat_2019_h,
    left_on="FAO_country_name",
    right_on="Area",
    how="outer",
    indicator=True,
)

merge1

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge
0,45.0,AFG,Afghanistan,RoW Asia and Pacific,4.0,Afghanistan,Rest S Asia,Afghanistan,2334000.0,Official figure,both
1,47.0,ALB,Albania,RoW Europe,8.0,Albania,C.Europe,Albania,57330.0,Official figure,both
2,48.0,DZA,Algeria,RoW Africa,12.0,Algeria,N.Africa,Algeria,1974987.0,Official figure,both
3,45.0,ASM,American Samoa,RoW Asia and Pacific,16.0,American Samoa,Oceania,,,,left_only
4,48.0,AGO,Angola,RoW Africa,24.0,Angola,Rest S Africa,Angola,3357.0,Estimated value,both
...,...,...,...,...,...,...,...,...,...,...,...
302,,,,,,,,Western Asia,10753795.0,Official figure,right_only
303,,,,,,,,Western Asia,103.0,Estimated value,right_only
304,,,,,,,,Western Europe,9064853.0,Official figure,right_only
305,,,,,,,,World,215748027.0,Official figure,right_only


In [195]:
merge1.rename(columns={"_merge": "_merge_1"}, inplace=True)
merge1

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1
0,45.0,AFG,Afghanistan,RoW Asia and Pacific,4.0,Afghanistan,Rest S Asia,Afghanistan,2334000.0,Official figure,both
1,47.0,ALB,Albania,RoW Europe,8.0,Albania,C.Europe,Albania,57330.0,Official figure,both
2,48.0,DZA,Algeria,RoW Africa,12.0,Algeria,N.Africa,Algeria,1974987.0,Official figure,both
3,45.0,ASM,American Samoa,RoW Asia and Pacific,16.0,American Samoa,Oceania,,,,left_only
4,48.0,AGO,Angola,RoW Africa,24.0,Angola,Rest S Africa,Angola,3357.0,Estimated value,both
...,...,...,...,...,...,...,...,...,...,...,...
302,,,,,,,,Western Asia,10753795.0,Official figure,right_only
303,,,,,,,,Western Asia,103.0,Estimated value,right_only
304,,,,,,,,Western Europe,9064853.0,Official figure,right_only
305,,,,,,,,World,215748027.0,Official figure,right_only


data_weat_2019_h.head()

### Merge1 v crops 

In [196]:
merge2 = pd.merge(
    left=merge1,
    right=crops_static,
    left_on="globio_country_name",
    right_on="globio_country_name",
    how="outer",
    indicator=True,
)

merge2

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge
0,45.0,AFG,Afghanistan,RoW Asia and Pacific,4.0,Afghanistan,Rest S Asia,Afghanistan,2334000.0,Official figure,both,,,left_only
1,47.0,ALB,Albania,RoW Europe,8.0,Albania,C.Europe,Albania,57330.0,Official figure,both,,,left_only
2,48.0,DZA,Algeria,RoW Africa,12.0,Algeria,N.Africa,Algeria,1974987.0,Official figure,both,,,left_only
3,45.0,ASM,American Samoa,RoW Asia and Pacific,16.0,American Samoa,Oceania,,,,left_only,,,left_only
4,48.0,AGO,Angola,RoW Africa,24.0,Angola,Rest S Africa,Angola,3357.0,Estimated value,both,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,,,,,,,,Western Asia,10753795.0,Official figure,right_only,,,left_only
303,,,,,,,,Western Asia,103.0,Estimated value,right_only,,,left_only
304,,,,,,,,Western Europe,9064853.0,Official figure,right_only,,,left_only
305,,,,,,,,World,215748027.0,Official figure,right_only,,,left_only


In [197]:
merge2.rename(columns={"_merge": "_merge_2"}, inplace=True)
merge2

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge_2
0,45.0,AFG,Afghanistan,RoW Asia and Pacific,4.0,Afghanistan,Rest S Asia,Afghanistan,2334000.0,Official figure,both,,,left_only
1,47.0,ALB,Albania,RoW Europe,8.0,Albania,C.Europe,Albania,57330.0,Official figure,both,,,left_only
2,48.0,DZA,Algeria,RoW Africa,12.0,Algeria,N.Africa,Algeria,1974987.0,Official figure,both,,,left_only
3,45.0,ASM,American Samoa,RoW Asia and Pacific,16.0,American Samoa,Oceania,,,,left_only,,,left_only
4,48.0,AGO,Angola,RoW Africa,24.0,Angola,Rest S Africa,Angola,3357.0,Estimated value,both,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,,,,,,,,Western Asia,10753795.0,Official figure,right_only,,,left_only
303,,,,,,,,Western Asia,103.0,Estimated value,right_only,,,left_only
304,,,,,,,,Western Europe,9064853.0,Official figure,right_only,,,left_only
305,,,,,,,,World,215748027.0,Official figure,right_only,,,left_only


In [198]:
merge2.loc[merge2.loc[:, "_merge_2"] == "both", :]

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge_2
11,1.0,AUT,Austria,Austria,40.0,Austria,W.Europe,Austria,278340.0,Official figure,both,40.0,0.002951,both
19,2.0,BEL,Belgium,Belgium,56.0,Belgium,W.Europe,Belgium,203760.0,Official figure,both,56.0,0.001293,both
33,3.0,BGR,Bulgaria,Bulgaria,100.0,Bulgaria,C.Europe,Bulgaria,1198680.0,Official figure,both,100.0,0.002051,both
55,13.0,HRV,Croatia,Croatia,191.0,Croatia,C.Europe,Croatia,143150.0,Official figure,both,191.0,0.001951,both
57,4.0,CYP,Cyprus,Cyprus,196.0,Cyprus,C.Europe,Cyprus,10590.0,Official figure,both,196.0,0.00502,both
58,5.0,CZE,Czech republic,Czech Republic,203.0,Czech Republic,C.Europe,,,,left_only,203.0,0.002068,both
62,7.0,DNK,Denmark,Denmark,208.0,Denmark,W.Europe,Denmark,573400.0,Official figure,both,208.0,0.001343,both
71,8.0,EST,Estonia,Estonia,233.0,Estonia,C.Europe,Estonia,166980.0,Official figure,both,233.0,0.002742,both
76,10.0,FIN,Finland,Finland,246.0,Finland,W.Europe,Finland,197600.0,Official figure,both,246.0,0.006413,both
77,11.0,FRA,France,France,250.0,France,W.Europe,France,5244250.0,Official figure,both,250.0,0.001674,both


In [200]:
merge2.loc[merge2.loc[:, "_merge_2"] == "both", :]

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge_2
11,1.0,AUT,Austria,Austria,40.0,Austria,W.Europe,Austria,278340.0,Official figure,both,40.0,0.002951,both
19,2.0,BEL,Belgium,Belgium,56.0,Belgium,W.Europe,Belgium,203760.0,Official figure,both,56.0,0.001293,both
33,3.0,BGR,Bulgaria,Bulgaria,100.0,Bulgaria,C.Europe,Bulgaria,1198680.0,Official figure,both,100.0,0.002051,both
55,13.0,HRV,Croatia,Croatia,191.0,Croatia,C.Europe,Croatia,143150.0,Official figure,both,191.0,0.001951,both
57,4.0,CYP,Cyprus,Cyprus,196.0,Cyprus,C.Europe,Cyprus,10590.0,Official figure,both,196.0,0.00502,both
58,5.0,CZE,Czech republic,Czech Republic,203.0,Czech Republic,C.Europe,,,,left_only,203.0,0.002068,both
62,7.0,DNK,Denmark,Denmark,208.0,Denmark,W.Europe,Denmark,573400.0,Official figure,both,208.0,0.001343,both
71,8.0,EST,Estonia,Estonia,233.0,Estonia,C.Europe,Estonia,166980.0,Official figure,both,233.0,0.002742,both
76,10.0,FIN,Finland,Finland,246.0,Finland,W.Europe,Finland,197600.0,Official figure,both,246.0,0.006413,both
77,11.0,FRA,France,France,250.0,France,W.Europe,France,5244250.0,Official figure,both,250.0,0.001674,both


In [201]:
final = merge2.loc[merge2.loc[:, "_merge_2"] == "both", :]

### Select Features 

In [202]:
final

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge_2
11,1.0,AUT,Austria,Austria,40.0,Austria,W.Europe,Austria,278340.0,Official figure,both,40.0,0.002951,both
19,2.0,BEL,Belgium,Belgium,56.0,Belgium,W.Europe,Belgium,203760.0,Official figure,both,56.0,0.001293,both
33,3.0,BGR,Bulgaria,Bulgaria,100.0,Bulgaria,C.Europe,Bulgaria,1198680.0,Official figure,both,100.0,0.002051,both
55,13.0,HRV,Croatia,Croatia,191.0,Croatia,C.Europe,Croatia,143150.0,Official figure,both,191.0,0.001951,both
57,4.0,CYP,Cyprus,Cyprus,196.0,Cyprus,C.Europe,Cyprus,10590.0,Official figure,both,196.0,0.00502,both
58,5.0,CZE,Czech republic,Czech Republic,203.0,Czech Republic,C.Europe,,,,left_only,203.0,0.002068,both
62,7.0,DNK,Denmark,Denmark,208.0,Denmark,W.Europe,Denmark,573400.0,Official figure,both,208.0,0.001343,both
71,8.0,EST,Estonia,Estonia,233.0,Estonia,C.Europe,Estonia,166980.0,Official figure,both,233.0,0.002742,both
76,10.0,FIN,Finland,Finland,246.0,Finland,W.Europe,Finland,197600.0,Official figure,both,246.0,0.006413,both
77,11.0,FRA,France,France,250.0,France,W.Europe,France,5244250.0,Official figure,both,250.0,0.001674,both


In [203]:
final.columns

Index(['exiobase_region_id', 'alpha_3_code', 'FAO_country_name',
       'exiobase_region_name', 'globio_country_code_x', 'globio_country_name',
       'USS30_region_name', 'Area', 'km2', 'Flag_value', '_merge_1',
       'globio_country_code_y', 'sum_static', '_merge_2'],
      dtype='object')

In [None]:
cols = [
    # "exiobase_region_id",
    "alpha_3_code",
    "FAO_country_name",
    # "exiobase_region_name",
    # "globio_country_code_x",
    "globio_country_name",
    "USS30_region_name",
    "Area",
    "km2",
    "Flag_value",
    # "_merge_1",
    # "globio_country_code_y",
    "sum_static",
    # "_merge_2",
]

In [204]:
final = final.loc[:, cols]
final

KeyError: "['Value'] not in index"

In [205]:
final["ms.km2"] = final.km2 * final.sum_static
final

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final["ms.km2"] = final.km2 *  final.sum_static


Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge_2,ms.km2
11,1.0,AUT,Austria,Austria,40.0,Austria,W.Europe,Austria,278340.0,Official figure,both,40.0,0.002951,both,821.336665
19,2.0,BEL,Belgium,Belgium,56.0,Belgium,W.Europe,Belgium,203760.0,Official figure,both,56.0,0.001293,both,263.487603
33,3.0,BGR,Bulgaria,Bulgaria,100.0,Bulgaria,C.Europe,Bulgaria,1198680.0,Official figure,both,100.0,0.002051,both,2458.68223
55,13.0,HRV,Croatia,Croatia,191.0,Croatia,C.Europe,Croatia,143150.0,Official figure,both,191.0,0.001951,both,279.286771
57,4.0,CYP,Cyprus,Cyprus,196.0,Cyprus,C.Europe,Cyprus,10590.0,Official figure,both,196.0,0.00502,both,53.158405
58,5.0,CZE,Czech republic,Czech Republic,203.0,Czech Republic,C.Europe,,,,left_only,203.0,0.002068,both,
62,7.0,DNK,Denmark,Denmark,208.0,Denmark,W.Europe,Denmark,573400.0,Official figure,both,208.0,0.001343,both,770.322745
71,8.0,EST,Estonia,Estonia,233.0,Estonia,C.Europe,Estonia,166980.0,Official figure,both,233.0,0.002742,both,457.916824
76,10.0,FIN,Finland,Finland,246.0,Finland,W.Europe,Finland,197600.0,Official figure,both,246.0,0.006413,both,1267.182502
77,11.0,FRA,France,France,250.0,France,W.Europe,France,5244250.0,Official figure,both,250.0,0.001674,both,8778.389602


## Feature Engineering

### population

In [214]:
fn = "./data/source/population/API_SP.POP.TOTL_DS2_en_csv_v2_6298256.csv"


encoding = "latin-1"
encoding = "utf8"
with open(fn, "r", encoding=encoding) as f:
    txt = f.readlines()

In [216]:
txt

['\ufeff"Data Source","World Development Indicators",\n',
 '\n',
 '"Last Updated Date","2023-12-18",\n',
 '\n',
 '"Country Name","Country Code","Indicator Name","Indicator Code","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022",\n',
 '"Aruba","ABW","Population, total","SP.POP.TOTL","54608","55811","56682","57475","58178","58782","59291","59522","59471","59330","59106","58816","58855","59365","60028","60715","61193","61465","61738","62006","62267","62614","63116","63683","64174","64478","64553","64450","64332","64596","65712","67864","70192","72360","74710","77050","79417","81858","84355","86867","89101

In [217]:
txt[4:]

['"Country Name","Country Code","Indicator Name","Indicator Code","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022",\n',
 '"Aruba","ABW","Population, total","SP.POP.TOTL","54608","55811","56682","57475","58178","58782","59291","59522","59471","59330","59106","58816","58855","59365","60028","60715","61193","61465","61738","62006","62267","62614","63116","63683","64174","64478","64553","64450","64332","64596","65712","67864","70192","72360","74710","77050","79417","81858","84355","86867","89101","90691","91781","92701","93540","94483","95606","96787","97996","99212","100341","101288","102112","102880","1

In [218]:
with open(fn, "w", encoding="utf8") as f:
    # txt[4:].write(f)
    f.writelines(txt[4:])

In [219]:
pop = pd.read_csv(fn)

In [220]:
pop

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,...,103594.0,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,106445.0,
1,Africa Eastern and Southern,AFE,"Population, total",SP.POP.TOTL,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,...,583651101.0,600008424.0,616377605.0,632746570.0,649757148.0,667242986.0,685112979.0,702977106.0,720859132.0,
2,Afghanistan,AFG,"Population, total",SP.POP.TOTL,8622466.0,8790140.0,8969047.0,9157465.0,9355514.0,9565147.0,...,32716210.0,33753499.0,34636207.0,35643418.0,36686784.0,37769499.0,38972230.0,40099462.0,41128771.0,
3,Africa Western and Central,AFW,"Population, total",SP.POP.TOTL,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,...,397855507.0,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,490330870.0,
4,Angola,AGO,"Population, total",SP.POP.TOTL,5357195.0,5441333.0,5521400.0,5599827.0,5673199.0,5736582.0,...,27128337.0,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0,35588987.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,"Population, total",SP.POP.TOTL,947000.0,966000.0,994000.0,1022000.0,1050000.0,1078000.0,...,1812771.0,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1790133.0,1786038.0,1761985.0,
262,"Yemen, Rep.",YEM,"Population, total",SP.POP.TOTL,5542459.0,5646668.0,5753386.0,5860197.0,5973803.0,6097298.0,...,27753304.0,28516545.0,29274002.0,30034389.0,30790513.0,31546691.0,32284046.0,32981641.0,33696614.0,
263,South Africa,ZAF,"Population, total",SP.POP.TOTL,16520441.0,16989464.0,17503133.0,18042215.0,18603097.0,19187194.0,...,54729551.0,55876504.0,56422274.0,56641209.0,57339635.0,58087055.0,58801927.0,59392255.0,59893885.0,
264,Zambia,ZMB,"Population, total",SP.POP.TOTL,3119430.0,3219451.0,3323427.0,3431381.0,3542764.0,3658024.0,...,15737793.0,16248230.0,16767761.0,17298054.0,17835893.0,18380477.0,18927715.0,19473125.0,20017675.0,


In [223]:
pop = pop.loc[:, pop.columns.tolist()[:2] + ["2019"]]
pop

Unnamed: 0,Country Name,Country Code,2019
0,Aruba,ABW,106442.0
1,Africa Eastern and Southern,AFE,667242986.0
2,Afghanistan,AFG,37769499.0
3,Africa Western and Central,AFW,454306063.0
4,Angola,AGO,32353588.0
...,...,...,...
261,Kosovo,XKX,1788878.0
262,"Yemen, Rep.",YEM,31546691.0
263,South Africa,ZAF,58087055.0
264,Zambia,ZMB,18380477.0


In [235]:
pop.rename(columns={"2019": "population"}, inplace=True)

## Gpd

In [224]:
fn = "./data/source/gpd/API_NY.GDP.PCAP.CD_DS2_en_csv_v2_6298251.csv"

In [225]:
with open(fn, "r", encoding="utf8") as f:
    txt = f.readlines()

In [227]:
txt

['\ufeff"Data Source","World Development Indicators",\n',
 '\n',
 '"Last Updated Date","2023-12-18",\n',
 '\n',
 '"Country Name","Country Code","Indicator Name","Indicator Code","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022",\n',
 '"Aruba","ABW","GDP per capita (current US$)","NY.GDP.PCAP.CD","","","","","","","","","","","","","","","","","","","","","","","","","","","6283.00144344602","7567.25364168664","9274.51415613905","10767.3962204623","11638.7337057728","12850.2157123975","13657.6706444765","14970.1523419526","16675.2784883673","17140.4333687405","17375.2253063755","18713.4253880988","19742

In [228]:
txt[4:]

['"Country Name","Country Code","Indicator Name","Indicator Code","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022",\n',
 '"Aruba","ABW","GDP per capita (current US$)","NY.GDP.PCAP.CD","","","","","","","","","","","","","","","","","","","","","","","","","","","6283.00144344602","7567.25364168664","9274.51415613905","10767.3962204623","11638.7337057728","12850.2157123975","13657.6706444765","14970.1523419526","16675.2784883673","17140.4333687405","17375.2253063755","18713.4253880988","19742.3167386832","19833.8267458639","21026.1670909022","20911.1927670907","21375.269123446","22050.589873287","24105

In [229]:
with open(fn, "w", encoding="utf8") as f:
    f.writelines(txt[4:])

In [231]:
gpd = pd.read_csv(fn)
gpd

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
0,Aruba,ABW,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,26940.264114,28419.264534,28449.712946,29329.081747,30918.483584,31902.809818,24008.127822,29127.759384,33300.838819,
1,Africa Eastern and Southern,AFE,GDP per capita (current US$),NY.GDP.PCAP.CD,141.385955,144.342434,148.774835,157.047580,166.849791,177.769086,...,1725.332959,1554.167299,1444.003514,1625.286236,1558.307482,1507.982881,1355.805923,1545.613215,1644.062829,
2,Afghanistan,AFG,GDP per capita (current US$),NY.GDP.PCAP.CD,62.369375,62.443703,60.950364,82.021738,85.511073,105.243196,...,626.512930,566.881133,523.053012,526.140801,492.090632,497.741429,512.055098,355.777826,,
3,Africa Western and Central,AFW,GDP per capita (current US$),NY.GDP.PCAP.CD,107.053706,112.128417,117.814663,122.370114,130.700278,137.301801,...,2248.316255,1882.264038,1648.762676,1590.277754,1735.374911,1812.446822,1688.075575,1766.943618,1785.312219,
4,Angola,AGO,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,5011.984412,3217.339244,1809.709377,2439.374441,2540.508878,2191.347764,1450.905112,1927.474078,3000.444231,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,3902.530841,3520.782075,3759.472855,4009.353811,4384.188680,4416.029253,4310.934002,5269.783901,5340.268798,
262,"Yemen, Rep.",YEM,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,1557.601406,1488.416267,1069.816997,893.716493,701.714868,693.816503,578.512010,543.637538,650.272218,
263,South Africa,ZAF,GDP per capita (current US$),NY.GDP.PCAP.CD,529.561923,543.042224,560.699394,601.599951,642.688431,681.131111,...,6965.137897,6204.929901,5735.066787,6734.475153,7067.724165,6702.526617,5753.066494,7073.612754,6766.481254,
264,Zambia,ZMB,GDP per capita (current US$),NY.GDP.PCAP.CD,228.567399,216.274674,208.562685,209.453362,236.941713,296.022427,...,1724.576220,1307.909649,1249.923143,1495.752138,1475.199883,1268.120941,956.831729,1134.713454,1456.901570,


In [232]:
gpd = gpd.loc[:, gpd.columns.tolist()[:2] + ["2019"]]
gpd

Unnamed: 0,Country Name,Country Code,2019
0,Aruba,ABW,31902.809818
1,Africa Eastern and Southern,AFE,1507.982881
2,Afghanistan,AFG,497.741429
3,Africa Western and Central,AFW,1812.446822
4,Angola,AGO,2191.347764
...,...,...,...
261,Kosovo,XKX,4416.029253
262,"Yemen, Rep.",YEM,693.816503
263,South Africa,ZAF,6702.526617
264,Zambia,ZMB,1268.120941


In [233]:
gpd.rename(columns={"2019": "gpd_per_capita"}, inplace=True)
gpd

Unnamed: 0,Country Name,Country Code,gpd_per_capita
0,Aruba,ABW,31902.809818
1,Africa Eastern and Southern,AFE,1507.982881
2,Afghanistan,AFG,497.741429
3,Africa Western and Central,AFW,1812.446822
4,Angola,AGO,2191.347764
...,...,...,...
261,Kosovo,XKX,4416.029253
262,"Yemen, Rep.",YEM,693.816503
263,South Africa,ZAF,6702.526617
264,Zambia,ZMB,1268.120941


### merge both

In [238]:
pop_gpd = pd.merge(
    left=pop, right=gpd, on=["Country Name", "Country Code"], how="outer"
)
pop_gpd

Unnamed: 0,Country Name,Country Code,population,gpd_per_capita
0,Aruba,ABW,106442.0,31902.809818
1,Africa Eastern and Southern,AFE,667242986.0,1507.982881
2,Afghanistan,AFG,37769499.0,497.741429
3,Africa Western and Central,AFW,454306063.0,1812.446822
4,Angola,AGO,32353588.0,2191.347764
...,...,...,...,...
261,Kosovo,XKX,1788878.0,4416.029253
262,"Yemen, Rep.",YEM,31546691.0,693.816503
263,South Africa,ZAF,58087055.0,6702.526617
264,Zambia,ZMB,18380477.0,1268.120941


In [241]:
test_3_codes = ["AUT", "BEL, BGR"]

pop_gpd.loc[pop_gpd.loc[:, "Country Name"].str.contains("Bel"), :]

Unnamed: 0,Country Name,Country Code,population,gpd_per_capita
17,Belgium,BEL,11488980.0,46641.721402
25,Belarus,BLR,9419758.0,6837.768321
26,Belize,BLZ,389095.0,6134.215233


In [242]:
pop_gpd.loc[pop_gpd.loc[:, "Country Name"].str.contains("Bul"), :]

Unnamed: 0,Country Name,Country Code,population,gpd_per_capita
21,Bulgaria,BGR,6975761.0,9874.336326


In [243]:
final_gpd_pop = pd.merge(
    left=final,
    right=pop_gpd,
    left_on="alpha_3_code",
    right_on="Country Code",
    how="left",
)
final_gpd_pop

Unnamed: 0,exiobase_region_id,alpha_3_code,FAO_country_name,exiobase_region_name,globio_country_code_x,globio_country_name,USS30_region_name,Area,km2,Flag_value,_merge_1,globio_country_code_y,sum_static,_merge_2,ms.km2,Country Name,Country Code,population,gpd_per_capita
0,1.0,AUT,Austria,Austria,40.0,Austria,W.Europe,Austria,278340.0,Official figure,both,40.0,0.002951,both,821.336665,Austria,AUT,8879920.0,50067.585727
1,2.0,BEL,Belgium,Belgium,56.0,Belgium,W.Europe,Belgium,203760.0,Official figure,both,56.0,0.001293,both,263.487603,Belgium,BEL,11488980.0,46641.721402
2,3.0,BGR,Bulgaria,Bulgaria,100.0,Bulgaria,C.Europe,Bulgaria,1198680.0,Official figure,both,100.0,0.002051,both,2458.68223,Bulgaria,BGR,6975761.0,9874.336326
3,13.0,HRV,Croatia,Croatia,191.0,Croatia,C.Europe,Croatia,143150.0,Official figure,both,191.0,0.001951,both,279.286771,Croatia,HRV,4065253.0,15120.902903
4,4.0,CYP,Cyprus,Cyprus,196.0,Cyprus,C.Europe,Cyprus,10590.0,Official figure,both,196.0,0.00502,both,53.158405,Cyprus,CYP,1228836.0,29420.0
5,5.0,CZE,Czech republic,Czech Republic,203.0,Czech Republic,C.Europe,,,,left_only,203.0,0.002068,both,,Czechia,CZE,10671870.0,23664.847863
6,7.0,DNK,Denmark,Denmark,208.0,Denmark,W.Europe,Denmark,573400.0,Official figure,both,208.0,0.001343,both,770.322745,Denmark,DNK,5814422.0,59592.980689
7,8.0,EST,Estonia,Estonia,233.0,Estonia,C.Europe,Estonia,166980.0,Official figure,both,233.0,0.002742,both,457.916824,Estonia,EST,1326898.0,23424.484707
8,10.0,FIN,Finland,Finland,246.0,Finland,W.Europe,Finland,197600.0,Official figure,both,246.0,0.006413,both,1267.182502,Finland,FIN,5521606.0,48629.858228
9,11.0,FRA,France,France,250.0,France,W.Europe,France,5244250.0,Official figure,both,250.0,0.001674,both,8778.389602,France,FRA,67388001.0,40494.898294


### Final Selection

In [244]:
final_gpd_pop.columns

Index(['exiobase_region_id', 'alpha_3_code', 'FAO_country_name',
       'exiobase_region_name', 'globio_country_code_x', 'globio_country_name',
       'USS30_region_name', 'Area', 'km2', 'Flag_value', '_merge_1',
       'globio_country_code_y', 'sum_static', '_merge_2', 'ms.km2',
       'Country Name', 'Country Code', 'population', 'gpd_per_capita'],
      dtype='object')

In [245]:
cols = [
    # "exiobase_region_id",
    # "alpha_3_code",
    # "FAO_country_name",
    # "exiobase_region_name",
    # "globio_country_code_x",
    # "globio_country_name",
    "USS30_region_name",
    # "Area",
    "km2",
    # "Flag_value",
    # "_merge_1",
    # "globio_country_code_y",
    "sum_static",
    # "_merge_2",
    "ms.km2",
    "Country Name",
    "Country Code",
    "population",
    "gpd_per_capita",
]

In [246]:
final_gpd_pop = final_gpd_pop.loc[:, cols]
final_gpd_pop

Unnamed: 0,USS30_region_name,km2,sum_static,ms.km2,Country Name,Country Code,population,gpd_per_capita
0,W.Europe,278340.0,0.002951,821.336665,Austria,AUT,8879920.0,50067.585727
1,W.Europe,203760.0,0.001293,263.487603,Belgium,BEL,11488980.0,46641.721402
2,C.Europe,1198680.0,0.002051,2458.68223,Bulgaria,BGR,6975761.0,9874.336326
3,C.Europe,143150.0,0.001951,279.286771,Croatia,HRV,4065253.0,15120.902903
4,C.Europe,10590.0,0.00502,53.158405,Cyprus,CYP,1228836.0,29420.0
5,C.Europe,,0.002068,,Czechia,CZE,10671870.0,23664.847863
6,W.Europe,573400.0,0.001343,770.322745,Denmark,DNK,5814422.0,59592.980689
7,C.Europe,166980.0,0.002742,457.916824,Estonia,EST,1326898.0,23424.484707
8,W.Europe,197600.0,0.006413,1267.182502,Finland,FIN,5521606.0,48629.858228
9,W.Europe,5244250.0,0.001674,8778.389602,France,FRA,67388001.0,40494.898294


In [247]:
final_gpd_pop.columns = [i.lower() for i in final_gpd_pop.columns]
final_gpd_pop

Unnamed: 0,uss30_region_name,km2,sum_static,ms.km2,country name,country code,population,gpd_per_capita
0,W.Europe,278340.0,0.002951,821.336665,Austria,AUT,8879920.0,50067.585727
1,W.Europe,203760.0,0.001293,263.487603,Belgium,BEL,11488980.0,46641.721402
2,C.Europe,1198680.0,0.002051,2458.68223,Bulgaria,BGR,6975761.0,9874.336326
3,C.Europe,143150.0,0.001951,279.286771,Croatia,HRV,4065253.0,15120.902903
4,C.Europe,10590.0,0.00502,53.158405,Cyprus,CYP,1228836.0,29420.0
5,C.Europe,,0.002068,,Czechia,CZE,10671870.0,23664.847863
6,W.Europe,573400.0,0.001343,770.322745,Denmark,DNK,5814422.0,59592.980689
7,C.Europe,166980.0,0.002742,457.916824,Estonia,EST,1326898.0,23424.484707
8,W.Europe,197600.0,0.006413,1267.182502,Finland,FIN,5521606.0,48629.858228
9,W.Europe,5244250.0,0.001674,8778.389602,France,FRA,67388001.0,40494.898294


In [248]:
final_gpd_pop.columns

Index(['uss30_region_name', 'km2', 'sum_static', 'ms.km2', 'country name',
       'country code', 'population', 'gpd_per_capita'],
      dtype='object')

In [249]:
cols = [
    "uss30_region_name",
    "country name",
    "country code",
    "km2",
    "sum_static",
    "ms.km2",
    "population",
    "gpd_per_capita",
]



In [250]:
final_gpd_pop = final_gpd_pop.loc[:, cols]
final_gpd_pop.rename(
    columns={
        "uss30_region_name": "region",
        "sum_static": "sum_msa_static",
        "ms.km2": "msa.km2",
    }
)

Unnamed: 0,region,country name,country code,km2,sum_msa_static,msa.km2,population,gpd_per_capita
0,W.Europe,Austria,AUT,278340.0,0.002951,821.336665,8879920.0,50067.585727
1,W.Europe,Belgium,BEL,203760.0,0.001293,263.487603,11488980.0,46641.721402
2,C.Europe,Bulgaria,BGR,1198680.0,0.002051,2458.68223,6975761.0,9874.336326
3,C.Europe,Croatia,HRV,143150.0,0.001951,279.286771,4065253.0,15120.902903
4,C.Europe,Cyprus,CYP,10590.0,0.00502,53.158405,1228836.0,29420.0
5,C.Europe,Czechia,CZE,,0.002068,,10671870.0,23664.847863
6,W.Europe,Denmark,DNK,573400.0,0.001343,770.322745,5814422.0,59592.980689
7,C.Europe,Estonia,EST,166980.0,0.002742,457.916824,1326898.0,23424.484707
8,W.Europe,Finland,FIN,197600.0,0.006413,1267.182502,5521606.0,48629.858228
9,W.Europe,France,FRA,5244250.0,0.001674,8778.389602,67388001.0,40494.898294


In [251]:
final_gpd_pop.to_csv("./data/final.csv", index=False)