In [2]:
# Import the Eurostat module
import eurostat
import numpy as np
import pandas as pd
import os

In [7]:
# Get the list of available datasets as a ToC with the get_toc() function
toc = eurostat.get_toc_df()
print (toc)

                                                  title  \
0     Employment by sex, age, economic activity and ...   
1     Employment by sex, age, economic activity and ...   
2     Employment by sex, age, professional status an...   
3     Economically active population by sex, age, ed...   
4     Economically active population by sex, age and...   
...                                                 ...   
7909  Percentage of letters delivered on-time (USP u...   
7910                                    Postal services   
7911    Number of enterprises providing postal services   
7912  Access points  (USP under direct or indirect d...   
7913  Domestic postal traffic, letter mail and parce...   

                         code     type       last update of data  \
0              LFST_R_LFE2EN1  dataset  2023-02-15T23:00:00+0100   
1              LFST_R_LFE2EN2  dataset  2024-12-12T23:00:00+0100   
2            LFST_R_LFE2ESTAT  dataset  2024-12-12T23:00:00+0100   
3            LFST_R

In [8]:
# Filter the ToC dataframe for the desired dataset
dataset = toc[toc['title'] == 'Producer prices in industry, total - annual data']

# Print dataset
print(dataset)

# Print the code of the dataset
print(dataset['code'].values[0])
# Code of the dataset = 'STS_INPP_A'

# Get parameters of the dataset
pars = eurostat.get_pars('STS_INPP_A')
print(pars)

# Get a dictionary of the available parameters as a list of tuples
dict = eurostat.get_dic('STS_INPP_A')
print(dict)


# Check available NACE sectors
par_values = eurostat.get_par_values('STS_INPP_A', 'NACE_R2')
print(par_values)

                                                 title        code     type  \
5550  Producer prices in industry, total - annual data  STS_INPP_A  dataset   

           last update of data last table structure change data start data end  
5550  2025-01-16T11:00:00+0100    2025-01-09T11:00:00+0100       1976     2024  
STS_INPP_A
['freq', 'indic_bt', 'nace_r2', 's_adj', 'unit', 'geo']
[('freq', 'Time frequency', 'This code list contains the periodicity that refers to the frequency.'), ('indic_bt', 'Business trend indicator', None), ('nace_r2', 'Statistical classification of economic activities in the European Community (NACE Rev. 2)', 'The Statistical classification of economic activities in the European Community, abbreviated as NACE, is the classification of economic activities in the European Union (EU); the term NACE is derived from the French Nomenclature statistique des activités économiques dans la Communauté européenne. NACE is a four-digit classification providing the framewor

In [None]:
# Get the data for the dataset
data = eurostat.get_data_df('STS_INPP_A', flags=False)

# Define the sectors and filter parameters
sectors = ['B', 'C10-C12', 'C13-C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31_C32', 'C33', 'D35']
filter_pars = {'startPeriod': 2015, 'endPeriod': 2023, 'geo': ['DE'], 'nace_r2': sectors}

# Retrieve the filtered data
filtered_data = eurostat.get_data_df('STS_INPP_A', filter_pars=filter_pars)
print(filtered_data)

# Save the filtered data to a CSV file with a tab separator
filtered_data.to_csv('filtered_data_1.1.csv', index=False)
print('Data saved to filtered_data_1.1.csv')

   freq indic_bt  nace_r2 s_adj    unit geo\TIME_PERIOD   2015   2016   2017  \
0     A  PRC_PRR        B   NSA     I15              DE  100.0   88.0   94.0   
1     A  PRC_PRR        B   NSA     I21              DE   78.0   68.6   73.2   
2     A  PRC_PRR        B   NSA  PCH_SM              DE   -7.9  -12.1    6.7   
3     A  PRC_PRR  C10-C12   NSA     I15              DE  100.0  100.1  103.7   
4     A  PRC_PRR  C10-C12   NSA     I21              DE   91.4   91.5   94.8   
..  ...      ...      ...   ...     ...             ...    ...    ...    ...   
58    A  PRC_PRR      C33   NSA     I21              DE   86.9   88.9   90.9   
59    A  PRC_PRR      C33   NSA  PCH_SM              DE    1.4    2.3    2.2   
60    A  PRC_PRR      D35   NSA     I15              DE  100.0   94.2   95.7   
61    A  PRC_PRR      D35   NSA     I21              DE   77.9   73.4   74.6   
62    A  PRC_PRR      D35   NSA  PCH_SM              DE   -3.2   -5.8    1.6   

     2018   2019   2020   2021   2022  

In [12]:
# File paths
technical_coeff_path_2020 = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients/technical_coefficients_2020.csv"
technical_coeff_path_2021 = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/technical_coefficients/technical_coefficients_2021.csv"
price_shock_path = "C:/Users/danie/Nextcloud/Coding/Masterthesis/filtered_data_1.1.csv"
output_dir = "C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results/real_price_shocks_industry"

# Create a new folder in the results folder
new_folder_path = os.path.join(output_dir, 'real_price_shocks_industry')
os.makedirs(new_folder_path, exist_ok=True)

# Load data
technical_coefficients_2020 = pd.read_csv(technical_coeff_path_2020, index_col=0)
technical_coefficients_2021 = pd.read_csv(technical_coeff_path_2021, index_col=0)
price_shock = pd.read_csv(price_shock_path)
price_shock.rename(columns={'nace_r2': 'sector'}, inplace=True)
price_shock.set_index('sector', inplace=True)  # New structure: 'sector' is the only index

# Filter rows where 'unit' column is 'PCH_SM'
price_shock = price_shock[price_shock['unit'] == 'PCH_SM']

# Prefix sectors with 'DE_'
price_shock.index = 'DE_' + price_shock.index

# Initialize lists to store unweighted impacts for 2021 and 2022
unweighted_impacts_2021 = []
unweighted_impacts_2022 = []

# Iterate over each sector in price_shock
for sector, row in price_shock.iterrows():
    # We are interested in the price shock for 2021 and 2022
    price_shock_value_2021 = row['2021']
    price_shock_value_2022 = row['2022']

    # Skip zero volatility or excluded sectors
    if price_shock_value_2021 == 0 and price_shock_value_2022 == 0:
        continue

    # Check if the row and column exist in the technical_coefficients DataFrame
    if sector not in technical_coefficients_2021.index or sector not in technical_coefficients_2021.columns:
        print(f"Skipping {sector} as it does not exist in technical_coefficients")
        continue

    # Extract A_EE and A_XE for 2021
    A_EE_2021 = technical_coefficients_2021.drop(index=[sector]).drop(columns=[sector]).T
    A_XE_2021 = technical_coefficients_2021[sector].drop(index=[sector]).values.reshape(-1, 1)

    # Extract A_EE and A_XE for 2020
    A_EE_2020 = technical_coefficients_2020.drop(index=[sector]).drop(columns=[sector]).T
    A_XE_2020 = technical_coefficients_2020[sector].drop(index=[sector]).values.reshape(-1, 1)

    # Identity matrix for endogenous sectors
    I_2021 = np.eye(A_EE_2021.shape[0])
    I_2020 = np.eye(A_EE_2020.shape[0])

    # Calculate Leontief inverse for 2021
    try:
        leontief_inverse_endogenous_2021 = np.linalg.inv(I_2021 - A_EE_2021.values)
    except np.linalg.LinAlgError:
        print(f"Singular matrix encountered for {sector} in 2021")
        continue

    # Calculate Leontief inverse for 2020
    try:
        leontief_inverse_endogenous_2020 = np.linalg.inv(I_2020 - A_EE_2020.values)
    except np.linalg.LinAlgError:
        print(f"Singular matrix encountered for {sector} in 2020")
        continue

    # Calculate ΔP_E using the modified Leontief price model for 2021 and 2022
    delta_P_E_2021 = leontief_inverse_endogenous_2021 @ (A_XE_2021 * price_shock_value_2021)
    delta_P_E_2022 = leontief_inverse_endogenous_2020 @ (A_XE_2020 * price_shock_value_2022)

    # Store unweighted impacts directly in a DataFrame
    unweighted_impact_series_2021 = pd.Series(delta_P_E_2021.flatten(), index=A_EE_2021.index, name=f"{sector}_2021")
    unweighted_impact_series_2022 = pd.Series(delta_P_E_2022.flatten(), index=A_EE_2020.index, name=f"{sector}_2022")
    unweighted_impacts_2021.append(unweighted_impact_series_2021)
    unweighted_impacts_2022.append(unweighted_impact_series_2022)

# Convert results to DataFrame
unweighted_df_2021 = pd.DataFrame(unweighted_impacts_2021)
unweighted_df_2022 = pd.DataFrame(unweighted_impacts_2022)

# Ensure columns are in the same order as technical_coefficients columns (for standardization)
column_order = technical_coefficients_2021.columns  # Capture the original order of columns
unweighted_df_2021 = unweighted_df_2021.reindex(columns=column_order, fill_value=0)
unweighted_df_2022 = unweighted_df_2022.reindex(columns=column_order, fill_value=0)

# Save results to CSV
output_file_2021 = os.path.join(output_dir, 'unweighted_real_price_shock_impacts_2021.csv')
output_file_2022 = os.path.join(output_dir, 'unweighted_real_price_shock_impacts_2022.csv')
unweighted_df_2021.to_csv(output_file_2021)
unweighted_df_2022.to_csv(output_file_2022)

print(f"Unweighted real price shock impacts for 2021 saved to {output_file_2021}")
print(f"Unweighted real price shock impacts for 2022 saved to {output_file_2022}")


Unweighted real price shock impacts for 2021 saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results/real_price_shocks_industry\unweighted_real_price_shock_impacts_2021.csv
Unweighted real price shock impacts for 2022 saved to C:/Users/danie/Nextcloud/Coding/Masterthesis/data/processed/results/real_price_shocks_industry\unweighted_real_price_shock_impacts_2022.csv
