# Import packages

In [2]:
import pandas as pd

# Importing datasets

In [3]:
df_nobel = pd.read_csv("nobel_per_capita.csv", delimiter="\t")

# Cocoa beans consumption in kilograms per year per capita
df_cocoa_raw = pd.read_csv("chocolate_consumption_per_capita.csv")

# Preprocess the cocoa dataset

* pivot on year column
* replace NA by 0
* rename country according to the laureates dataframe

In [4]:
df_cocoa = df_cocoa_raw.pivot(index="Entity", columns="Year", values="Chocolate consumption")
df_cocoa = df_cocoa.fillna(0.0)

# Czechia => Czech Republic
# Democratic Republic of Congo => DR Congo
# Timor => East Timor
# PLO => Palestin
df_cocoa.rename(
    index={
        "Czechia": "Czech Republic",
        "Democratic Republic of Congo": "DR Congo",
        "Timor": "East Timor",
    }, inplace=True)

# Intersection of countries

In [5]:
nobel_entities = set(df_nobel["Entity"].unique().tolist())
cocoa_entities = set(df_cocoa.index.unique().tolist())
common_entities = nobel_entities.intersection(cocoa_entities)

# Merge datasets

In [6]:
df_cocoa_2009 = df_cocoa.loc[list(common_entities),2009]
df_nobel_2015 = df_nobel[df_nobel["Entity"].isin(list(common_entities))]
df = pd.merge(df_cocoa_2009, df_nobel_2015, how="inner", on="Entity")
df = df.rename(columns={2009: "Chocolate consumption"})

# Save merged datasets as CSV

In [7]:
df.to_csv("merged_data.csv", index=False)