In [106]:
countries = {
    'BE': 'Belgium',
    'BG': 'Bulgaria',
    'CZ': 'Czechia',
    'DK': 'Denmark',
    'DE': 'Germany', 
    'EE': 'Estonia',
    'IE': 'Ireland',
    'EL': 'Greece',
    'ES': 'Spain',
    'FR': 'France',
    'HR': 'Croatia',
    'IT': 'Italy',
    'CY': 'Cyprus',
    'LV': 'Latvia',
    'LT': 'Lithuania',
    'LU': 'Luxembourg',
    'HU': 'Hungary',
    'MT': 'Malta',
    'NL': 'Netherlands',
    'AT': 'Austria',
    'PL': 'Poland',
    'PT': 'Portugal',
    'RO': 'Romania',
    'SI': 'Slovenia',
    'SK': 'Slovakia',
    'FI': 'Finland',
    'SE': 'Sweden',
    'IS': 'Iceland',
    'NO': 'Norway',
    'CH': 'Switzerland' 
}

categories ={
    "TOTAL": "Total",
    "GF01": "General public services",
    "GF0101": "Executive and legislative organs, financial and fiscal affairs, external affairs",
    "GF0102": "Foreign economic aid",
    "GF0103": "General services",
    "GF0104": "Basic research",
    "GF0105": "R&D General public services",
    "GF0106": "General public services n.e.c.",
    "GF0107": "Public debt transactions",
    "GF0108": "Transfers of a general character between different levels of government",
    "GF02": "Defence",
    "GF0201": "Military defence",
    "GF0202": "Civil defence",
    "GF0203": "Foreign military aid",
    "GF0204": "R&D Defence",
    "GF0205": "Defence n.e.c.",
    "GF03": "Public order and safety",
    "GF0301": "Police services",
    "GF0302": "Fire-protection services",
    "GF0303": "Law courts",
    "GF0304": "Prisons",
    "GF0305": "R&D Public order and safety",
    "GF0306": "Public order and safety n.e.c.",
    "GF0606": "Housing and community amenities n.e.c.",
    "GF07": "Health",
    "GF0701": "Medical products, appliances and equipment",
    "GF0702": "Outpatient services",
    "GF0703": "Hospital services",
    "GF0704": "Public health services",
    "GF0705": "R&D Health",
    "GF0706": "Health n.e.c.",
    "GF08": "Recreation, culture and religion",
    "GF0801": "Recreational and sporting services",
    "GF0802": "Cultural services",
    "GF0803": "Broadcasting and publishing services",
    "GF0804": "Religious and other community services",
    "GF0805": "R&D Recreation, culture and religion",
    "GF0806": "Recreation, culture and religion n.e.c.",
    "GF09": "Education",
    "GF0901": "Pre-primary and primary education",
    "GF0902": "Secondary education",
    "GF0903": "Post-secondary non-tertiary education",
    "GF0904": "Tertiary education ",
    "GF0905": "Education not definable by level",
    "GF0906": "Subsidiary services to education",
    "GF0907": "R&D Education",
    "GF0908": "Education n.e.c.",
    "GF10": "Social protection",
    "GF1001": "Sickness and disability",
    "GF1002": "Old age",
    "GF1003": "Survivors",
    "GF1004": "Family and children",
    "GF1005": "Unemployment",
    "GF1006": "Housing",
    "GF1007": "Social exclusion n.e.c.",
    "GF1008": "R&D Social protection",
    "GF1009": "Social protection n.e.c.",
    "GF04": "Economic affairs"
}

col_names = {
    "DATAFLOW": "Dataflow",
    "LAST UPDATE": "Last Update",
    "freq": "Frequency",
    "unit": "Unit",
    "sector": "Sector",
    "cofog99": "Category",
    "geo": "Country",
    "TIME_PERIOD": "Year",
    "OBS_VALUE": "Value",
    "na_item": "National Accounts Indicator"
}

units = {
    "MIO_EUR": "Million euro",
    "PC_GDP": "Percentage GDP"
}

In [124]:
import pandas as pd

# read data from file (type: pd.Dataframe)
data = pd.read_csv('data.csv')

# drop unnecessary columns
data.drop(['DATAFLOW', 'LAST UPDATE', 'freq', 'OBS_FLAG', 'na_item', 'sector'], inplace=True, axis=1)

# remove category Total
data = data[data['cofog99'] != 'TOTAL']

# remove Country/geo EU27_2020
data = data[data['geo'] != 'EU27_2020']

# and EA19
data = data[data['geo'] != 'EA19']

# rename the column names/titles
data.rename(columns=col_names, inplace=True)

# replace tag names with full names
data.replace([*categories.keys(), *countries.keys(), *units.keys()], [*categories.values(), *countries.values(), *units.values()], inplace=True)

# seperate dataframes depending on unit type (million euro or percentage gdp)
data_in_euro = data.loc[data['Unit'] == 'Million euro']
data_in_percentage = data.loc[data['Unit'] != 'Million euro']

# use year column as index
data_in_euro.set_index('Year', inplace=True)
data_in_percentage.set_index('Year', inplace=True)

# change the order of columns
data_in_euro = data_in_euro.loc[:, ['Country', 'Value', 'Category', 'Unit']]
data_in_percentage = data_in_percentage.loc[:, ['Country', 'Value', 'Category', 'Unit']]

In [109]:
# Task 1

# Section A
max_percentage = {}
years = [*set(data_in_percentage.index)]

for year in years:
    temp_dict = dict()
    temp_dict['Country'] = data_in_percentage.loc[data_in_percentage.index == year].sort_values(by=['Value'], ascending=False).head(1).Country.values[0]
    temp_dict['Category'] = data_in_percentage.loc[data_in_percentage.index == year].sort_values(by=['Value'], ascending=False).head(1).Category.values[0]
    temp_dict['Value'] = data_in_percentage.loc[data_in_percentage.index == year].sort_values(by=['Value'], ascending=False).head(1).Value.values[0]
    max_percentage[year] = temp_dict

pd.DataFrame(max_percentage).T.sort_index()

Unnamed: 0,Country,Category,Value
2012,Denmark,Social protection,24.6
2013,Finland,Social protection,24.6
2014,Finland,Social protection,25.2
2015,Finland,Social protection,25.3
2016,Finland,Social protection,25.5
2017,Finland,Social protection,24.5
2018,Finland,Social protection,24.3
2019,Finland,Social protection,24.1
2020,France,Social protection,27.3
2021,Austria,Social protection,21.9


In [125]:
# Task 1

# Section B
max_euro = {}
years = [*set(data_in_euro.index)]

for year in years:
    temp_dict = dict()
    temp_dict['Country'] = data_in_euro.loc[data_in_euro.index == year].sort_values(by=['Value'], ascending=False).head(1).Country.values[0]
    temp_dict['Category'] = data_in_euro.loc[data_in_euro.index == year].sort_values(by=['Value'], ascending=False).head(1).Category.values[0]
    temp_dict['Value'] = data_in_euro.loc[data_in_euro.index == year].sort_values(by=['Value'], ascending=False).head(1).Value.values[0]
    max_euro[year] = temp_dict

pd.DataFrame(max_euro).T.sort_index()

Unnamed: 0,Country,Category,Value
2012,Germany,Social protection,518918.0
2013,Germany,Social protection,533910.0
2014,Germany,Social protection,550151.0
2015,Germany,Social protection,577822.0
2016,Germany,Social protection,611468.0
2017,Germany,Social protection,634997.0
2018,Germany,Social protection,648812.0
2019,Germany,Social protection,681481.0
2020,Germany,Social protection,734813.0
2021,Austria,Social protection,88969.6
