## Data Processing - Intermediate to Processed (not Model Ready)

## Pre-Requisites

In [1]:
import numpy
import pandas

In [2]:
def t_unemployment_rate(df: pandas.DataFrame) -> pandas.DataFrame:
    # remove unnecessary data
    df = df.drop(columns = ['Flag Codes'], inplace = False)

    # ensure proper future column names
    df['SUBJECT'] = df['SUBJECT'].apply(lambda x: f"Unemployment_Rate_{x}")

    # pivot
    df = df.pivot(index = 'TIME', columns = 'SUBJECT', values = 'Value').reset_index()

    # time-series related fix
    df['TIME'] = pandas.to_datetime(arg = df['TIME'], format = '%Y-%m', exact = True, errors = 'raise')
    df = df.rename(columns = {'TIME': 'Time'}, inplace = False)
    return df

In [3]:
country = 'germany'

source_path = f'../../intermediate/{country}/'
sink_path = '../../processed/'

## Load Intermediate Data

In [4]:
# target
df_unemployment_rate = pandas.read_csv(filepath_or_buffer = f'{source_path}unemployment_rate.csv',)

## Transform

In [5]:
df_unemployment_rate = t_unemployment_rate(df_unemployment_rate)

## Combine

In [6]:
df_all = df_unemployment_rate

In [7]:
df_all = df_all.sort_values(by = 'Time', inplace = False)

## Clean and Test Index

In [8]:
df_all = df_all.replace(to_replace = '...', value = numpy.NaN, inplace = False)

In [9]:
df_all.set_index(keys = 'Time', verify_integrity = True, inplace = False)

SUBJECT,Unemployment_Rate_MEN,Unemployment_Rate_TOT,Unemployment_Rate_WOMEN
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1991-01-01,4.3,5.2,6.5
1991-02-01,4.3,5.2,6.5
1991-03-01,4.3,5.3,6.6
1991-04-01,4.3,5.3,6.6
1991-05-01,4.4,5.3,6.7
...,...,...,...
2022-10-01,3.2,3.1,2.9
2022-11-01,3.1,3.0,2.9
2022-12-01,3.1,3.0,2.9
2023-01-01,3.0,2.9,2.8


## Save

In [10]:
df_all.to_excel(
    excel_writer = f'{sink_path}{country}.xlsx', index = False
)

## Check Constituent Datasets

In [11]:
df_unemployment_rate.columns

Index(['Time', 'Unemployment_Rate_MEN', 'Unemployment_Rate_TOT',
       'Unemployment_Rate_WOMEN'],
      dtype='object', name='SUBJECT')