In [61]:
from functools import reduce

import numpy
import pandas

In [2]:
source_path = '../../intermediate/france/'
sink_path = '../../processed/'

In [3]:
# target
df_unemployment_rate = pandas.read_csv(filepath_or_buffer = f'{source_path}unemployment_rate.csv',)

# energy
df_electricity = pandas.read_csv(filepath_or_buffer = f'{source_path}monthly_electricity_statistics.csv',)
df_gas_trade_balance = pandas.read_csv(filepath_or_buffer = f'{source_path}gas_trade_balance.csv',)

# market activity
df_production = pandas.read_csv(filepath_or_buffer = f'{source_path}imf_production.csv',)
df_prices = pandas.read_csv(filepath_or_buffer = f'{source_path}imf_prices.csv',)
df_labour = pandas.read_csv(filepath_or_buffer = f'{source_path}imf_labour.csv',)

# macro-economics
## figures
df_gdp_current_unadjusted = pandas.read_csv(filepath_or_buffer = f'{source_path}gdp_current_unadjusted.csv',)
df_gdp_current_adjusted = pandas.read_csv(filepath_or_buffer = f'{source_path}gdp_current_adjusted.csv',)
df_gdp_constant_unadjusted = pandas.read_csv(filepath_or_buffer = f'{source_path}gdp_constant_unadjusted.csv',)
df_gdp_constant_adjusted = pandas.read_csv(filepath_or_buffer = f'{source_path}gdp_constant_adjusted.csv',)
## indicators
df_consumer_confidence = pandas.read_csv(filepath_or_buffer = f'{source_path}consumer_confidence_index.csv',)
df_business_confidence = pandas.read_csv(filepath_or_buffer = f'{source_path}business_confidence_index.csv',)

# central bank and government intervention
df_interest_rates = pandas.read_csv(filepath_or_buffer = f'{source_path}central_banking_interest_rates.csv',)

In [4]:
df_unemployment_rate.sample(3)

Unnamed: 0,SUBJECT,TIME,Value,Flag Codes
1413,WOMEN,2020-06,7.1,
92,TOT,1990-09,9.3,
861,MEN,2014-08,10.4,


In [5]:
# remove unnecessary data
df_unemployment_rate = df_unemployment_rate.drop(columns = ['Flag Codes'], inplace = False)

# ensure proper future column names
df_unemployment_rate['SUBJECT'] = df_unemployment_rate['SUBJECT'].apply(lambda x: f"Unemployment_Rate_{x}")

# pivot
df_unemployment_rate = df_unemployment_rate.pivot(index = 'TIME', columns = 'SUBJECT', values = 'Value').reset_index()

# time-series related fix
df_unemployment_rate['TIME'] = pandas.to_datetime(arg = df_unemployment_rate['TIME'], format = '%Y-%m', exact = True, errors = 'raise')
df_unemployment_rate = df_unemployment_rate.rename(columns = {'TIME': 'Time'}, inplace = False)

In [6]:
df_unemployment_rate.sample(3)

SUBJECT,Time,Unemployment_Rate_MEN,Unemployment_Rate_TOT,Unemployment_Rate_WOMEN
364,2013-05-01,10.5,10.4,10.3
396,2016-01-01,10.7,10.3,9.8
189,1998-10-01,10.4,12.0,13.9


In [7]:
df_electricity.sample(3)

Unnamed: 0,Time,Balance,Product,Value,Unit
1184,February 2018,Net Electricity Production,Oil and Petroleum Products,576.572,GWh
1282,September 2017,Net Electricity Production,Total Combustible Fuels,3862.144,GWh
1761,September 2015,Net Electricity Production,Nuclear,30643.271,GWh


In [8]:
# drop unnecessary data
df_electricity = df_electricity.drop(columns = ['Unit'], inplace = False)
df_electricity = df_electricity[df_electricity['Balance'].isin(['Distribution Losses', 'Final Consumption (Calculated)'])]

# ensure proper future column names
df_electricity['Product_Balance'] = df_electricity['Product'].str.cat(others = df_electricity['Balance'], sep = '_', join = 'right').str.replace(pat = ' ', repl = '_')
df_electricity = df_electricity.drop(columns = ['Balance', 'Product'], inplace = False)

# pivot
df_electricity = df_electricity.pivot(index = 'Time', columns = 'Product_Balance', values = 'Value').reset_index()

# time-series related fix
df_electricity['Time'] = pandas.to_datetime(arg = df_electricity['Time'], format = '%B %Y', exact = True, errors = 'raise')


In [9]:
df_electricity.sample(3)

Product_Balance,Time,Electricity_Distribution_Losses,Electricity_Final_Consumption_(Calculated)
127,2019-11-01,3692.4723,41088.176
12,2022-04-01,2868.1163,34900.1343
94,2012-03-01,3374.201,40747.858


In [10]:
df_gas_trade_balance

Unnamed: 0,Trade_Direction,2008-10-01 00:00:00,2008-11-01 00:00:00,2008-12-01 00:00:00,2009-01-01 00:00:00,2009-02-01 00:00:00,2009-03-01 00:00:00,2009-04-01 00:00:00,2009-05-01 00:00:00,2009-06-01 00:00:00,...,2022-05-01 00:00:00,2022-06-01 00:00:00,2022-07-01 00:00:00,2022-08-01 00:00:00,2022-09-01 00:00:00,2022-10-01 00:00:00,2022-11-01 00:00:00,2022-12-01 00:00:00,2023-01-01 00:00:00,2023-02-01 00:00:00
0,Import,4174.463,4842.725,4676.27,4845.881,4111.289,4544.226,3832.155,0.0,3144.742,...,5966.236,4041.276,4910.323,4669.32,4157.898,4634.326,5453.07,5657.513,4404.261,3464.472
1,Export,564.896,677.133,758.52,746.67,666.563,553.577,363.008,159.043,0.0,...,1279.066,616.143,1093.435,1696.415,1512.808,1669.958,1483.71,1424.462,1513.878,1492.388


In [11]:
# melt to vertical
df_gas_trade_balance = df_gas_trade_balance.melt(id_vars = ['Trade_Direction'], var_name = 'Time', value_name = 'Value')

# ensure proper future column names
df_gas_trade_balance['Trade_Direction'] = df_gas_trade_balance['Trade_Direction'].apply(lambda x: f"Natural_Gas_{x}")

# pivot
df_gas_trade_balance = df_gas_trade_balance.pivot(index = 'Time', columns = 'Trade_Direction', values = 'Value').reset_index()

# calculate useful data
df_gas_trade_balance['Natural_Gas_Trade_Balance'] = df_gas_trade_balance['Natural_Gas_Import'] - df_gas_trade_balance['Natural_Gas_Export']

# drop unnecessary data
df_gas_trade_balance = df_gas_trade_balance.drop(columns = ['Natural_Gas_Export', 'Natural_Gas_Import'], inplace = False)

# time-series related fix
df_gas_trade_balance['Time'] = pandas.to_datetime(
    arg = df_gas_trade_balance['Time'].str.slice(start = 0, stop = 10, step = 1),
    format = '%Y-%m-%d', exact = True, errors = 'raise'
)

In [12]:
df_gas_trade_balance.sample(3)

Trade_Direction,Time,Natural_Gas_Trade_Balance
98,2016-12-01,4179.709
58,2013-08-01,3081.475
146,2020-12-01,3186.783


In [13]:
df_production.sample(3)

Unnamed: 0,Indicator,Indicator_Code,Base Year,1990M01,1990M02,1990M03,1990M04,1990M05,1990M06,1990M07,...,2022M06,2022M07,2022M08,2022M09,2022M10,2022M11,2022M12,2023M01,2023M02,2023M03
2,"Industrial Production, Seasonally adjusted, Index",AIP_SA_IX,2010=100,89.992501,91.492376,90.992417,91.392384,91.692359,90.192484,90.392467,...,98.991751,97.291892,99.891676,98.991751,96.391967,98.391801,...,...,...,...
1,"Economic Activity, Industrial Production, Index",AIP_IX,2010=100,91.327081,88.837248,97.900241,90.032368,88.438875,94.613661,87.940908,...,102.481534,91.426674,78.081169,102.182754,98.896174,101.78438,...,...,...,...
0,"Economic Activity, Industrial Production, Manu...",AIPMA_IX,2010=100,94.121599,96.640914,95.532415,94.92778,95.834733,93.819281,94.121599,...,101.276453,99.563319,102.687269,101.981861,99.865637,102.284179,...,...,...,...


In [14]:
# drop unnecessary data
df_production = df_production.drop(columns = ['Indicator_Code'], inplace = False)

# ensure proper future column names
df_production['Indicator'] = df_production['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_production = df_production.melt(id_vars=['Indicator', 'Base Year'], var_name = 'Time')

# pivot
df_production = df_production.pivot(index = ['Base Year', 'Time'], columns = 'Indicator', values = 'value').reset_index()

# time-series related fix
df_production['Time'] = pandas.to_datetime(arg = df_production['Time'], format = '%YM%m', exact = True, errors = 'raise')

In [15]:
df_production.sample(3)

Indicator,Base Year,Time,Economic_Activity|Industrial_Production|Index,Economic_Activity|Industrial_Production|Manufacturing|Index,Industrial_Production|Seasonally_adjusted|Index
353,2010=100,2019-06-01,102.38194,105.105811,103.791351
341,2010=100,2018-06-01,108.35754,105.609674,104.191317
245,2010=100,2010-06-01,108.457133,99.865637,99.691692


In [16]:
df_prices.sample(3)

Unnamed: 0,Indicator,Indicator_Code,Base Year,1990M01,1990M02,1990M03,1990M04,1990M05,1990M06,1990M07,...,2022M06,2022M07,2022M08,2022M09,2022M10,2022M11,2022M12,2023M01,2023M02,2023M03
1,"Prices, Producer Price Index, All Commodities,...",PPPI_IX,2010=100,...,...,...,...,...,...,...,...,139.411764705882,142.42214532872,145.951557093426,146.36678200692,144.602076124568,145.432525951557,147.093425605536,...,...,...
0,"Financial Market Prices, Equities, Index",FPE_IX,,51.8143327968443,49.6797608425582,51.284692130687,55.232582960139,56.0744047746106,54.1431507989703,53.3042640209359,...,...,...,...,...,...,...,...,...,...,...
2,"Prices, Consumer Price Index, All items, Index",PCPI_IX,2010=100,70.1261679776171,70.2739798342396,70.4429076703796,70.8335532914533,70.9391331890408,70.9285751992821,70.9496911787996,...,118.830174734731,119.168030407011,119.706487884707,119.030776540147,120.255503352162,120.635590983477,120.530011085889,120.994562635274,122.240405426807,123.30676239244


In [17]:
# drop unnecessary data
df_prices = df_prices.drop(columns = ['Indicator_Code'], inplace = False)

# ensure proper future column names
df_prices['Indicator'] = df_prices['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_prices = df_prices.melt(id_vars=['Indicator', 'Base Year'], var_name = 'Time')

# prepare for pivot
df_prices['Indicator'] = df_prices['Indicator'].str.cat(others = df_prices['Base Year'], sep = '|Base_Year-', na_rep = 'None')
df_prices = df_prices.drop(columns = ['Base Year'], inplace = False)

# pivot
df_prices = df_prices.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix
df_prices['Time'] = pandas.to_datetime(arg = df_prices['Time'], format = '%YM%m', exact = True, errors = 'raise')

In [18]:
df_prices.sample(3)

Indicator,Time,Financial_Market_Prices|Equities|Index|Base_Year-None,Prices|Consumer_Price_Index|All_items|Index|Base_Year-2010=100,Prices|Producer_Price_Index|All_Commodities|Index|Base_Year-2010=100
254,2011-03-01,105.171161,101.831811,103.910035
143,2001-12-01,120.721251,86.110964,87.197232
327,2017-04-01,137.00083,106.910204,103.598616


In [19]:
df_labour

Unnamed: 0,Indicator,Indicator_Code,1990M01,1990M02,1990M03,1990M04,1990M05,1990M06,1990M07,1990M08,...,2022M06,2022M07,2022M08,2022M09,2022M10,2022M11,2022M12,2023M01,2023M02,2023M03
0,"Labor Markets, Wage Rates, Index",LWR_IX,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,"Unemployment, Persons, Number of",LU_PE_NUM,...,...,...,...,...,...,...,...,...,2132,2186,2361,2213,2209,2176,...,...,...,...
2,"Labor Markets, Unemployment Rate, Percent",LUR_PT,...,...,...,...,...,...,...,...,...,7,7.1,7.6,7.2,7.2,7.1,...,...,...,...


In [20]:
# drop unnecessary data
df_labour = df_labour.drop(columns = ['Indicator_Code'], inplace = False)

# ensure proper future column names
df_labour['Indicator'] = df_labour['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_labour = df_labour.melt(id_vars='Indicator', var_name = 'Time')

# pivot
df_labour = df_labour.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix
df_labour['Time'] = pandas.to_datetime(arg = df_labour['Time'], format = '%YM%m', exact = True, errors = 'raise')

In [21]:
df_labour.sample(3)

Indicator,Time,Labor_Markets|Unemployment_Rate|Percent,Labor_Markets|Wage_Rates|Index,Unemployment|Persons|Number_of
220,2008-05-01,7.1,96.731462,2055.0
286,2013-11-01,10.6,110.034263,3140.0
180,2005-01-01,9.2,87.694978,2589.0


In [22]:
df_gdp_current_unadjusted.sample(3)

Unnamed: 0,Indicator,Indicator_Code,Scale,1990Q1,1990Q2,1990Q3,1990Q4,1991Q1,1991Q2,1991Q3,...,2020Q3,2020Q4,2021Q1,2021Q2,2021Q3,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4
1,"Household Consumption Expenditure, incl. NPISH...",NCP_NSA_XDC,Millions,141798.2,143618.6,142812.8,153390.1,147003.9,148303.3,147718.3,...,320722.5,321043.6,314626.9,316739.2,332402.2,353340.9,344590.8,349902.8,354659.0,373231.0
0,"Gross Domestic Product, Nominal, Undjusted, Do...",NGDP_NSA_XDC,Millions,259990.9,265294.3,257171.4,271089.3,268950.9,273481.6,267696.3,...,585886.9,617580.6,604507.4,616341.8,623007.5,657007.2,644145.3,656877.9,651476.4,690213.8
2,"Government Consumption Expenditure, Nominal, U...",NCGG_NSA_XDC,Millions,54756.3,55633.9,55449.3,57184.5,58312.1,58795.5,58704.2,...,142804.2,152796.9,147048.2,152236.9,149871.2,157291.1,151698.6,155363.4,154650.9,163972.3


In [23]:
# drop unnecessary data
df_gdp_current_unadjusted = df_gdp_current_unadjusted.drop(columns = ['Indicator_Code', 'Scale'], inplace = False)

# ensure proper future column names
df_gdp_current_unadjusted['Indicator'] = df_gdp_current_unadjusted['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_gdp_current_unadjusted = df_gdp_current_unadjusted.melt(id_vars='Indicator', var_name = 'Time')

# pivot
df_gdp_current_unadjusted = df_gdp_current_unadjusted.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix and data interpolation
df_gdp_current_unadjusted['Time'] = pandas.PeriodIndex(df_gdp_current_unadjusted['Time'], freq='Q')
df_gdp_current_unadjusted = df_gdp_current_unadjusted.set_index(
    'Time', inplace = False
    ).resample(
        rule = 'M', convention = 'end'
        ).interpolate(method = 'time').reset_index()
df_gdp_current_unadjusted['Time'] = pandas.to_datetime(arg = df_gdp_current_unadjusted['Time'].astype(str), format = '%Y-%m', exact = True, errors = 'raise')

In [24]:
df_gdp_current_unadjusted.sample(3)

Indicator,Time,Change_in_Inventories|Nominal|Undjusted|Domestic_Currency,Exports_of_Goods_and_Services|Nominal|Undjusted|Domestic_Currency,Government_Consumption_Expenditure|Nominal|Undjusted|Domestic_Currency,Gross_Domestic_Product|Nominal|Undjusted|Domestic_Currency,Gross_Fixed_Capital_Formation|Nominal|Undjusted|Domestic_Currency,Household_Consumption_Expenditure|incl._NPISHs|Nominal|Undjusted|Domestic_Currency,Imports_of_Goods_and_Services|Nominal|Undjusted|Domestic_Currency
152,2002-11-01,-1966.566667,109846.733333,91873.566667,402656.933333,85362.766667,218753.033333,101212.666667
255,2011-06-01,7689.4,147338.5,122151.8,516298.5,116738.7,281150.3,158770.3
303,2015-06-01,5887.1,171207.8,131581.2,548983.5,117968.7,295345.2,173006.5


In [25]:
df_gdp_current_adjusted.sample(3)

Unnamed: 0,Indicator,Indicator_Code,Scale,1990Q1,1990Q2,1990Q3,1990Q4,1991Q1,1991Q2,1991Q3,...,2020Q3,2020Q4,2021Q1,2021Q2,2021Q3,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4
1,"Household Consumption Expenditure, incl. NPISH...",NCP_SA_XDC,Millions,143218.7,144741.9,145829.1,147921.1,148445.0,149452.4,150692.0,...,327986.0,311205.4,314838.0,319643.4,338738.0,343633.2,344438.2,353228.3,360456.7,363168.9
3,"Gross Fixed Capital Formation, Nominal, Season...",NFI_SA_XDC,Millions,60639.1,61271.7,62058.6,62490.4,62533.5,63287.9,63823.4,...,140363.3,144495.9,146660.8,150523.6,153284.7,154818.9,158285.1,161467.3,166824.4,168657.6
0,"Gross Domestic Product, Nominal, Seasonally Ad...",NGDP_SA_XDC,Millions,259857.9,263461.3,264380.7,266443.7,268395.0,272221.8,274385.0,...,598693.1,600410.4,606014.8,615114.9,635644.7,642048.5,646767.9,654785.2,664448.9,675691.0


In [26]:
# drop unnecessary data
df_gdp_current_adjusted = df_gdp_current_adjusted.drop(columns = ['Indicator_Code', 'Scale'], inplace = False)

# ensure proper future column names
df_gdp_current_adjusted['Indicator'] = df_gdp_current_adjusted['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_gdp_current_adjusted = df_gdp_current_adjusted.melt(id_vars='Indicator', var_name = 'Time')

# pivot
df_gdp_current_adjusted = df_gdp_current_adjusted.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix and data interpolation
df_gdp_current_adjusted['Time'] = pandas.PeriodIndex(df_gdp_current_adjusted['Time'], freq='Q')
df_gdp_current_adjusted = df_gdp_current_adjusted.set_index(
    'Time', inplace = False
    ).resample(
        rule = 'M', convention = 'end'
        ).interpolate(method = 'time').reset_index()
df_gdp_current_adjusted['Time'] = pandas.to_datetime(arg = df_gdp_current_adjusted['Time'].astype(str), format = '%Y-%m', exact = True, errors = 'raise')

In [27]:
df_gdp_current_adjusted.sample(3)

Indicator,Time,Change_in_Inventories|Nominal|Seasonally_Adjusted|Domestic_Currency,Exports_of_Goods_and_Services|Nominal|Seasonally_Adjusted|Domestic_Currency,Government_Final_Consumption_Expenditure|Nominal|Seasonally_adjusted|Domestic_Currency,Gross_Domestic_Product|Nominal|Seasonally_Adjusted|Domestic_Currency,Gross_Fixed_Capital_Formation|Nominal|Seasonally_Adjusted|Domestic_Currency,Household_Consumption_Expenditure|incl._NPISHs|Nominal|Seasonally_Adjusted|Domestic_Currency,Imports_of_Goods_and_Services|Nominal|Seasonally_Adjusted|Domestic_Currency
260,2011-11-01,2172.066667,148516.333333,122949.933333,517378.833333,117055.0,282890.266667,156204.766667
99,1998-06-01,3294.2,88802.2,76146.5,337318.2,66703.6,181821.6,79450.0
70,1996-01-01,997.433333,70056.166667,71941.8,308630.333333,61560.966667,169114.733333,65040.733333


In [28]:
df_gdp_constant_unadjusted.sample(3)

Unnamed: 0,Indicator,Indicator_Code,Base Year,Scale,1990Q1,1990Q2,1990Q3,1990Q4,1991Q1,1991Q2,...,2020Q3,2020Q4,2021Q1,2021Q2,2021Q3,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4
3,"Gross Fixed Capital Formation, Real, Undjusted...",NFI_R_NSA_XDC,2010.0,Millions,81411.734278,83484.446704,80899.832056,84730.937682,81239.827528,83363.987893,...,120321.934711,137329.726143,130358.196317,135894.71999,128092.864334,140508.521535,134285.039016,136974.649458,130918.396604,144200.173656
4,"Change in Inventories, Real, Undjusted, Domest...",NINV_R_NSA_XDC,2010.0,Millions,3021.913991,6013.891929,-1522.74028,1936.717,4339.534576,2397.514822,...,3177.92148,-1057.07797,8676.2884,6645.36631,1964.44122,-610.878171,8040.855347,8022.446886,9877.749015,4129.098522
5,"Exports of Goods and Services, Real, Undjusted...",NX_R_NSA_XDC,2010.0,Millions,55805.954252,54485.863726,51174.274826,56825.782447,56144.704315,58060.290956,...,151450.289122,163010.630632,156534.827967,163918.283528,164826.613343,173560.524123,171220.121888,177261.438674,176946.187402,179581.049798


In [29]:
# drop unnecessary data
df_gdp_constant_unadjusted = df_gdp_constant_unadjusted.drop(columns = ['Indicator_Code', 'Scale'], inplace = False)

# ensure proper future column names
df_gdp_constant_unadjusted['Indicator'] = df_gdp_constant_unadjusted['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_gdp_constant_unadjusted = df_gdp_constant_unadjusted.melt(id_vars=['Indicator', 'Base Year'], var_name = 'Time')

# prepare for pivot
df_gdp_constant_unadjusted['Base Year'] = df_gdp_constant_unadjusted['Base Year'].astype(str)
df_gdp_constant_unadjusted['Indicator'] = df_gdp_constant_unadjusted['Indicator'].str.cat(others = df_gdp_constant_unadjusted['Base Year'], sep = '|Base_Year-', na_rep = 'None')
df_gdp_constant_unadjusted = df_gdp_constant_unadjusted.drop(columns = ['Base Year'], inplace = False)

# pivot
df_gdp_constant_unadjusted = df_gdp_constant_unadjusted.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix and data interpolation
df_gdp_constant_unadjusted['Time'] = pandas.PeriodIndex(df_gdp_constant_unadjusted['Time'], freq='Q')
df_gdp_constant_unadjusted = df_gdp_constant_unadjusted.set_index(
    'Time', inplace = False
    ).resample(
        rule = 'M', convention = 'end'
        ).interpolate(method = 'time').reset_index()
df_gdp_constant_unadjusted['Time'] = pandas.to_datetime(arg = df_gdp_constant_unadjusted['Time'].astype(str), format = '%Y-%m', exact = True, errors = 'raise')

In [30]:
df_gdp_constant_unadjusted.sample(3)

Indicator,Time,Change_in_Inventories|Real|Undjusted|Domestic_Currency|Base_Year-2010.0,Exports_of_Goods_and_Services|Real|Undjusted|Domestic_Currency|Base_Year-2010.0,Government_Consumption_Expenditure|Real|Undjusted|Domestic_Currency|Base_Year-2010.0,Gross_Domestic_Product|Real|Undjusted|Domestic_Currency|Base_Year-2010.0,Gross_Fixed_Capital_Formation|Real|Undjusted|Domestic_Currency|Base_Year-2010.0,Household_Consumption_Expenditure|incl._NPISHs|Real|Undjusted|Domestic_Currency|Base_Year-2010.0,Imports_of_Goods_and_Services|Real|Undjusted|Domestic_Currency|Base_Year-2010.0
75,1996-06-01,1550.028493,75012.492951,98318.924431,387562.239546,80317.964456,204265.192705,70984.047806
104,1998-11-01,1488.420846,94550.109953,98627.071835,415442.589196,87269.7678,220621.677063,87264.562549
188,2005-11-01,1592.250956,128699.558949,110797.157302,483426.464586,109789.170267,260617.808184,128158.71666


In [31]:
df_gdp_constant_adjusted.sample(3)

Unnamed: 0,Indicator,Indicator_Code,Base Year,Scale,1990Q1,1990Q2,1990Q3,1990Q4,1991Q1,1991Q2,...,2020Q3,2020Q4,2021Q1,2021Q2,2021Q3,2021Q4,2022Q1,2022Q2,2022Q3,2022Q4
0,"Gross Domestic Product, Real, Seasonally Adjus...",NGDP_R_SA_XDC,2010.0,Millions,355479.859751,357308.288391,359098.167863,359086.574128,359541.338387,360979.734456,...,544781.497039,539801.794582,540109.898093,545867.830019,564121.876101,567390.826328,566103.052201,568945.352977,569903.575184,570331.673852
4,"Change in Inventories, Real, Seasonally Adjust...",NINV_R_SA_XDC,2010.0,Millions,1576.984661,4108.8482,3753.842119,709.629946,3162.843292,983.166073,...,512.197043,4994.381685,6563.122733,4179.936493,63.19303,5009.891434,5099.045304,6560.575806,8989.963174,9844.352301
6,"Imports of Goods and Services, Real, Seasonall...",NM_R_SA_XDC,2010.0,Millions,57893.982931,58817.873826,58640.187676,58545.659414,60235.21959,60279.737486,...,170847.064547,172716.912568,175059.787331,177689.715805,179210.358507,188601.322095,191023.114948,193169.224463,201282.177523,200420.0528


In [32]:
# drop unnecessary data
df_gdp_constant_adjusted = df_gdp_constant_adjusted.drop(columns = ['Indicator_Code', 'Scale'], inplace = False)

# ensure proper future column names
df_gdp_constant_adjusted['Indicator'] = df_gdp_constant_adjusted['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_gdp_constant_adjusted = df_gdp_constant_adjusted.melt(id_vars=['Indicator', 'Base Year'], var_name = 'Time')

# prepare for pivot
df_gdp_constant_adjusted['Base Year'] = df_gdp_constant_adjusted['Base Year'].astype(str)
df_gdp_constant_adjusted['Indicator'] = df_gdp_constant_adjusted['Indicator'].str.cat(others = df_gdp_constant_adjusted['Base Year'], sep = '|Base_Year-', na_rep = 'None')
df_gdp_constant_adjusted = df_gdp_constant_adjusted.drop(columns = ['Base Year'], inplace = False)

# pivot
df_gdp_constant_adjusted = df_gdp_constant_adjusted.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix and data interpolation
df_gdp_constant_adjusted['Time'] = pandas.PeriodIndex(df_gdp_constant_adjusted['Time'], freq='Q')
df_gdp_constant_adjusted = df_gdp_constant_adjusted.set_index(
    'Time', inplace = False
    ).resample(
        rule = 'M', convention = 'end'
        ).interpolate(method = 'time').reset_index()
df_gdp_constant_adjusted['Time'] = pandas.to_datetime(arg = df_gdp_constant_adjusted['Time'].astype(str), format = '%Y-%m', exact = True, errors = 'raise')

In [33]:
df_gdp_constant_adjusted.sample(3)

Indicator,Time,Change_in_Inventories|Real|Seasonally_Adjusted|Domestic_Currency|Base_Year-2010.0,Exports_of_Goods_and_Services|Real|Seasonally_Adjusted|Domestic_Currency|Base_Year-2010.0,Government_Final_Consumption_Expenditure|Real|Seasonally_adjusted|Domestic_Currency|Base_Year-2010.0,Gross_Domestic_Product|Deflator|Seasonally_Adjusted|Base_Year-2010.0,Gross_Domestic_Product|Real|Seasonally_Adjusted|Domestic_Currency|Base_Year-2010.0,Gross_Fixed_Capital_Formation|Real|Seasonally_Adjusted|Domestic_Currency|Base_Year-2010.0,Household_Consumption_Expenditure|incl._NPISHs|Real|Seasonally_Adjusted|Domestic_Currency|Base_Year-2010.0,Imports_of_Goods_and_Services|Real|Seasonally_Adjusted|Domestic_Currency|Base_Year-2010.0
277,2013-04-01,1603.144134,147921.966334,124194.420266,102.803066,512819.566049,111764.321049,277222.345556,149873.546368
371,2021-02-01,6040.209051,158626.816001,133756.543101,111.8802,540007.196922,130968.292072,285040.883014,174278.829077
301,2015-04-01,7190.717218,160095.875887,127261.064196,104.372477,524186.353791,112532.021732,283991.037387,166667.039612


In [34]:
df_consumer_confidence.sample(3)

Unnamed: 0,TIME,Value
516,2016-01,99.779801
384,2005-01,100.261142
16,1974-05,102.044056


In [35]:
# time-series fix
df_consumer_confidence['TIME'] = pandas.to_datetime(arg = df_consumer_confidence['TIME'], format = '%Y-%m', exact = True, errors = 'raise')

# ensure proper future column names
df_consumer_confidence = df_consumer_confidence.rename(columns = {'Value': 'Consumer_Confidence', 'TIME': 'Time'}, inplace = False)

In [36]:
df_consumer_confidence.sample(3)

Unnamed: 0,Time,Consumer_Confidence
27,1975-04-01,101.403283
65,1978-06-01,102.860171
53,1977-06-01,100.658121


In [37]:
df_business_confidence.sample(3)

Unnamed: 0,TIME,Value
321,2002-09,100.014465
123,1986-03,99.361799
179,1990-11,98.689162


In [38]:
# time-series fix
df_business_confidence['TIME'] = pandas.to_datetime(arg = df_business_confidence['TIME'], format = '%Y-%m', exact = True, errors = 'raise')

# ensure proper future column names
df_business_confidence = df_business_confidence.rename(columns = {'Value': 'Business_Confidence', 'TIME': 'Time'}, inplace = False)

In [39]:
df_business_confidence.sample(3)

Unnamed: 0,Time,Business_Confidence
350,2005-02-01,100.107458
31,1978-07-01,100.301001
299,2000-11-01,103.245231


In [40]:
df_interest_rates.sample(3)

Unnamed: 0,Indicator,Indicator_Code,1990M01,1990M02,1990M03,1990M04,1990M05,1990M06,1990M07,1990M08,...,2022M01,2022M02,2022M03,2022M04,2022M05,2022M06,2022M07,2022M08,2022M09,2022M10
0,Deposit Rate,FIDR_PA,4.5,4.5,4.5,4.5,4.5,4.5,4.5,4.5,...,...,...,...,...,...,...,...,...,...,...
3,"Harmonized Euro Area Rates, New Business, Depo...",FIHN_HH_D_AM_1Y_PA,...,...,...,...,...,...,...,...,...,0.48,0.49,0.48,0.42,0.41,0.44,0.64,0.85,1.1,1.46
2,"Harmonized Euro Area Rates, Outstanding Amount...",FIHO_HH_D_AM_2Y_PA,...,...,...,...,...,...,...,...,...,0.36,0.36,0.37,0.36,0.35,0.36,0.38,0.4,0.54,0.91


In [41]:
# drop unnecessary data
df_interest_rates = df_interest_rates.drop(columns = ['Indicator_Code'], inplace = False)

# ensure proper future column names
df_interest_rates['Indicator'] = df_interest_rates['Indicator'].str.replace(', ','|').str.replace(' ', '_')

# melt to vertical
df_interest_rates = df_interest_rates.melt(id_vars='Indicator', var_name = 'Time')

# pivot
df_interest_rates = df_interest_rates.pivot(index = 'Time', columns = 'Indicator', values = 'value').reset_index()

# time-series related fix
df_interest_rates['Time'] = pandas.to_datetime(arg = df_interest_rates['Time'], format = '%YM%m', exact = True, errors = 'raise')

In [42]:
df_interest_rates.sample(3)

Indicator,Time,Deposit_Rate,Government_Bonds,Harmonized_Euro_Area_Rates|Loans|Households|Consumer_Credit_and_Other|Up_to_1_Year,Harmonized_Euro_Area_Rates|Loans|Households|House_Purchase|Over_5_Years,Harmonized_Euro_Area_Rates|Loans|Non-Financial_Corporations|Up_to_1_Year,Harmonized_Euro_Area_Rates|New_Business|Deposits|Households|Agreed_Maturity|Up_to_1_Year,Harmonized_Euro_Area_Rates|New_Business|Deposits|Non-financial_Corporations|Agreed_Maturity|Up_to_1_Year,Harmonized_Euro_Area_Rates|New_Business|Loans|Households|Consumption|Floating_Rate_and_up_to_1_Year,Harmonized_Euro_Area_Rates|New_Business|Loans|Households|House_Purchase|Over_5_Years,Harmonized_Euro_Area_Rates|Outstanding_Amounts|Deposits|Households|Agreed_Maturity|Up_to_2_Years,Harmonized_Euro_Area_Rates|Outstanding_Amounts|Deposits|Non-Financial_Corporations|Agreed_Maturity|Up_to_2_Years
68,1995-09-01,4.5,7.35,...,...,...,...,...,...,...,...,...
31,1992-08-01,4.5,9.04,...,...,...,...,...,...,...,...,...
384,2022-01-01,...,...,2.55,1.43,1.15,0.48,0.07,5.09,0.84,0.36,0.04


In [44]:
dfs: list[pandas.DataFrame] = [df_unemployment_rate,
    df_electricity, df_gas_trade_balance,
    df_production, df_prices, df_labour,
    df_gdp_current_unadjusted, df_gdp_current_adjusted, df_gdp_constant_unadjusted, df_gdp_constant_adjusted,
    df_consumer_confidence, df_business_confidence,
    df_interest_rates
]

In [51]:
df_all = reduce(lambda left, right: pandas.merge(left, right , on = ['Time'], how='outer'), dfs)
df_all = df_all.sort_values(by = 'Time', inplace = False)

In [65]:
df_all = df_all.replace(to_replace = '...', value = numpy.NaN, inplace = False)

In [66]:
df_all.to_excel(
    excel_writer = f'{sink_path}france.xlsx', index = False
)