# **API & WEBHOOK-BASED DATA RETRIEVAL**

### **1) REQUIREMENTS SETUP**

In [2]:
# !pip install -r requirements.txt

In [3]:
import warnings
warnings.filterwarnings("ignore")
import os
import pandas as pd
import numpy as np

### **2) MODULES IMPORT**

In [4]:
from FRED_module import fetch_FRED
from EUROSTAT_module import fetch_EUROSTAT
from WB_module import fetch_WB 
from YFINANCE_module import fetch_YFINANCE
from DBNOMICS_module import fetch_DBNOMICS

### **3) DATA FETCHING**

#### **3.1) EUROSTAT-extracted indicators**

In [5]:
# Industrial Production Indicators by EU member state (monthly, level 1 indicators, Index: 2021=100, non-seasonally adjusted, 1996-01, 2025-08)
# https://ec.europa.eu/eurostat/databrowser/view/sts_inpr_m/default/table?lang=en
# Level 1 Indicators: Mining and Quarrying (B), Manufacturing (C), Electricity, gas, steam and air conditioning supply (D)
EURO_indprod_m_raw = fetch_EUROSTAT(
    "sts_inpr_m",
    filters = {
    "geo": ["EU27_2020", "EU28", "EA20", "EA19",  
    "BE", "BG", "CZ", "DK", "DE", "EE", "IE", "EL", "ES", "FR",
    "HR", "IT", "CY", "LV", "LT", "LU", "HU", "MT", "NL", "AT",
    "PL", "PT", "RO", "SI", "SK", "FI", "SE"],
    "s_adj": "NSA",
    "unit": "I21",
    "nace_r2": ["B", "C", "D"]
    }
)

EURO_indprod_m_raw = EURO_indprod_m_raw[["geo", "nace_r2", "time", "value"]]
EURO_indprod_m_raw = EURO_indprod_m_raw.rename(columns={
    "geo": "Country",
    "time": "Time",
    "nace_r2": "Level 1 Index", 
    "value": "Indprod Index Value (I21)"
})
EURO_indprod_m_raw = EURO_indprod_m_raw.sort_values(["Country", "Level 1 Index", "Time"])

EURO_indprod_m_raw.tail()

Unnamed: 0,Country,Level 1 Index,Time,Indprod Index Value (I21)
26789,SK,D,2025-04,82.8
26790,SK,D,2025-05,81.0
26791,SK,D,2025-06,79.1
26792,SK,D,2025-07,76.9
26793,SK,D,2025-08,75.1


In [6]:
# Unemployment rate by EU member state (initially quarterly transformed by duplication to monthly, Percentage of population in the labour force (age-class = total, sex-class=total), non-seasonally adjusted, 1994-01 (varying), 2025-08)
# https://ec.europa.eu/eurostat/databrowser/view/une_rt_m/default/table?lang=en
EURO_unem_m_raw = fetch_EUROSTAT(
    "une_rt_m",
    filters = {
    "geo": ["EU27_2020", "EU28", "EA20", "EA19",  
    "BE", "BG", "CZ", "DK", "DE", "EE", "IE", "EL", "ES", "FR",
    "HR", "IT", "CY", "LV", "LT", "LU", "HU", "MT", "NL", "AT",
    "PL", "PT", "RO", "SI", "SK", "FI", "SE"],
    "s_adj": "NSA",
    "unit": "PC_ACT",
    "freq": "M",
    "age": "TOTAL",
    "sex": "T"
    }
)

EURO_unem_m_raw = EURO_unem_m_raw[["geo", "time", "value"]]
EURO_unem_m_raw = EURO_unem_m_raw.rename(columns={
    "geo": "Country",
    "time": "Time",
    "value": "Unemployment Rate (%pop in LF)"
})

EURO_unem_m_raw.tail()

Unnamed: 0,Country,Time,Unemployment Rate (%pop in LF)
10970,SK,2025-05,5.2
10971,SK,2025-06,5.3
10972,SK,2025-07,5.4
10973,SK,2025-08,5.5
10974,SK,2025-09,5.6


In [7]:
# Gross domestic product at market prices by EU member state (initially quarterly transformed by duplication to monthly, current prices in million euro, non-seasonally adjusted, 1995-Q1 (varying), 2025-Q2)
# https://ec.europa.eu/eurostat/databrowser/view/namq_10_gdp/default/table?lang=en
EURO_GDP_q_raw = fetch_EUROSTAT(
    "namq_10_gdp",
    filters = {
    "geo": ["EU27_2020", "EU28", "EA20", "EA19",  
    "BE", "BG", "CZ", "DK", "DE", "EE", "IE", "EL", "ES", "FR",
    "HR", "IT", "CY", "LV", "LT", "LU", "HU", "MT", "NL", "AT",
    "PL", "PT", "RO", "SI", "SK", "FI", "SE"],
    "na_item": "B1GQ",
    "s_adj": "NSA",
    "unit": "CP_MEUR"
    }
)

EURO_GDP_q_raw = EURO_GDP_q_raw[["geo", "time", "value"]]
EURO_GDP_q_raw = EURO_GDP_q_raw.rename(columns={
    "geo": "Country",
    "time": "Time",
    "value": "GDP (Million EUR)"
})

# Increasing data granularity from quarterly to monthly data by extending the quarter value to single months 
EURO_GDP_q_raw["Time"] = pd.PeriodIndex(EURO_GDP_q_raw["Time"], freq="Q").to_timestamp()
expanded_rows = []

for _, row in EURO_GDP_q_raw.iterrows():
    quarter_end = row["Time"]
    start_month = quarter_end - pd.offsets.QuarterEnd(startingMonth=3) + pd.DateOffset(days=1)
    for i in range(3):
        month = (start_month + pd.DateOffset(months=i)).strftime("%Y-%m")
        expanded_rows.append({
            "Country": row["Country"],
            "Time": month,
            "GDP (Million EUR)": row["GDP (Million EUR)"] / 3
        })

EURO_GDP_m_raw = pd.DataFrame(expanded_rows)
EURO_GDP_m_raw = EURO_GDP_m_raw.sort_values(by=["Country", "Time"]).reset_index(drop=True)

EURO_GDP_m_raw.tail()

Unnamed: 0,Country,Time,GDP (Million EUR)
11302,SK,2025-02,10303.7
11303,SK,2025-03,10303.7
11304,SK,2025-04,11414.566667
11305,SK,2025-05,11414.566667
11306,SK,2025-06,11414.566667


In [8]:
# HICP by EU member state (monthly, annual rate of change, 1997-01 (varying), 2025-09)
# https://ec.europa.eu/eurostat/databrowser/view/PRC_HICP_MANR__custom_3807536/bookmark/table?lang=en&bookmarkId=cd099aa2-8977-42d5-b5d8-bc5edd3a94df&c=1668007557361 
EURO_HICP_m_raw = fetch_EUROSTAT(
    "prc_hicp_manr",
    filters={
    "geo": [
    "EU27_2020", "EU28", "EA20", "EA19", 
    "BE", "BG", "CZ", "DK", "DE", "EE", "IE", "EL", "ES", "FR",
    "HR", "IT", "CY", "LV", "LT", "LU", "HU", "MT", "NL", "AT",
    "PL", "PT", "RO", "SI", "SK", "FI", "SE"
    ],
    "coicop": "CP00", 
    "unit": "RCH_A"
    }
)

EURO_HICP_m_raw = EURO_HICP_m_raw[["geo", "time", "value"]]
EURO_HICP_m_raw = EURO_HICP_m_raw.rename(columns={
    "geo": "Country",
    "time": "Time",
    "value": "HICP (%, annual rate of change)"
})

EURO_HICP_m_raw.tail()

Unnamed: 0,Country,Time,"HICP (%, annual rate of change)"
10466,SK,2025-06,4.6
10467,SK,2025-07,4.6
10468,SK,2025-08,4.4
10469,SK,2025-09,4.6
10470,SK,2025-10,3.8


#### **3.2) FRED-extracted indicators**

In [9]:
# EUR-USD exchange rate, U.S. Dollars to One Euro (initially daily, converted to monthly, non-seasonally adjusted, 1999-01, 2025-10)
# https://fred.stlouisfed.org/series/DEXUSEU
EXEURUSD_d_raw = fetch_FRED("DEXUSEU") 
EXEURUSD_d_raw = EXEURUSD_d_raw.rename(columns= 
        {"date": "Time", 
         "DEXUSEU": "EUR-USD Spot Exchange Rate"
})

# Aggregation (dimension from daily to monthly)
EXEURUSD_d_raw = EXEURUSD_d_raw.set_index("Time")
EXEURUSD_m_raw = EXEURUSD_d_raw.resample("M").mean().reset_index()
EXEURUSD_m_raw["Time"] = EXEURUSD_m_raw["Time"].dt.to_period("M").astype(str)

# Extension to every Euro-adopting country
euro_countries = [
    "BE", "DE", "EE", "IE", "EL", "ES", "FR",
    "HR", "IT", "CY", "LV", "LT", "LU", "MT",
    "NL", "AT", "PT", "SI", "SK", "FI"
]
EXEURUSD_m_raw = (EXEURUSD_m_raw.assign(key=1).merge(pd.DataFrame({"Country": euro_countries, "key": 1}), on="key").drop("key", axis=1)).sort_values(["Country", "Time"]).reset_index(drop=True)
EXEURUSD_m_raw = EXEURUSD_m_raw[["Country", "Time", "EUR-USD Spot Exchange Rate"]]

EXEURUSD_m_raw.tail()

Unnamed: 0,Country,Time,EUR-USD Spot Exchange Rate
6435,SK,2025-06,1.153365
6436,SK,2025-07,1.167082
6437,SK,2025-08,1.164748
6438,SK,2025-09,1.173871
6439,SK,2025-10,1.164073


In [10]:
# BGN-USD exchange rate, U.S. Dollars to One Bulgarian Lev (monthly, non-seasonally adjusted, 1960-01, 2021-06)
# https://fred.stlouisfed.org/series/BGRCCUSMA02STM

EXBGNUSD_m_raw = fetch_FRED("BGRCCUSMA02STM") 
EXBGNUSD_m_raw = EXBGNUSD_m_raw.rename(columns= 
        {"date": "Time", 
         "BGRCCUSMA02STM": "BGN-USD Spot Exchange Rate"
})

EXBGNUSD_m_raw["Time"] = EXBGNUSD_m_raw["Time"].dt.to_period("M").astype(str)

EXBGNUSD_m_raw["Country"] = "BG"
EXBGNUSD_m_raw["BGN-USD Spot Exchange Rate"] = 1 / EXBGNUSD_m_raw["BGN-USD Spot Exchange Rate"]
EXBGNUSD_m_raw = EXBGNUSD_m_raw[["Country", "Time", "BGN-USD Spot Exchange Rate"]]

EXBGNUSD_m_raw.tail()

Unnamed: 0,Country,Time,BGN-USD Spot Exchange Rate
769,BG,2021-02,0.618544
770,BG,2021-03,0.608014
771,BG,2021-04,0.611023
772,BG,2021-05,0.621736
773,BG,2021-06,0.615877


In [11]:
# SKK-USD exchange rate, U.S. Dollar to One Swedish Kronor (initially SKK-USD daily, converted to US-SKK monthly, non-seasonally adjusted, 1971-01, 2025-10)
# https://fred.stlouisfed.org/series/DEXSDUS
EXSKKUSD_d_raw = fetch_FRED("DEXSDUS") 
EXSKKUSD_d_raw = EXSKKUSD_d_raw.rename(columns= 
        {"date": "Time", 
         "DEXSDUS": "SKK-USD Spot Exchange Rate"
})

# Aggregation (dimension from daily to monthly)
EXSKKUSD_d_raw = EXSKKUSD_d_raw.set_index("Time")
EXSKKUSD_m_raw = EXSKKUSD_d_raw.resample("M").mean().reset_index()
EXSKKUSD_m_raw["Time"] = EXSKKUSD_m_raw["Time"].dt.to_period("M").astype(str)

EXSKKUSD_m_raw["Country"] = "SE"
EXSKKUSD_m_raw = EXSKKUSD_m_raw[["Country", "Time", "SKK-USD Spot Exchange Rate"]]

EXSKKUSD_m_raw.tail()

Unnamed: 0,Country,Time,SKK-USD Spot Exchange Rate
653,SE,2025-06,9.548195
654,SE,2025-07,9.597495
655,SE,2025-08,9.577424
656,SE,2025-09,9.372686
657,SE,2025-10,9.425345


In [12]:
# DKK-USD exchange rate, U.S. Dollar to One Danish Krone (initially DN-US converted to US-DN, monthly non-seasonally adjusted, 1971-01, 2025-09)
# https://fred.stlouisfed.org/series/EXDNUS
EXUSDDKK_m_raw = fetch_FRED("EXDNUS") 
EXUSDDKK_m_raw = EXUSDDKK_m_raw.rename(columns= 
        {"date": "Time", 
         "EXDNUS": "USD-DKK Spot Exchange Rate"
})

EXUSDDKK_m_raw["Time"] = EXUSDDKK_m_raw["Time"].dt.to_period("M").astype(str)

# Conversion to DKK-USD spot exchange rate 
EXDKKUSD_m_raw = EXUSDDKK_m_raw.copy()
EXDKKUSD_m_raw["DKK-USD Spot Exchange Rate"] = 1 / EXDKKUSD_m_raw["USD-DKK Spot Exchange Rate"]
EXDKKUSD_m_raw = EXDKKUSD_m_raw.drop(columns=["USD-DKK Spot Exchange Rate"])

EXDKKUSD_m_raw["Country"] = "DK"
EXDKKUSD_m_raw = EXDKKUSD_m_raw[["Country", "Time", "DKK-USD Spot Exchange Rate"]]

EXDKKUSD_m_raw.tail()

Unnamed: 0,Country,Time,DKK-USD Spot Exchange Rate
653,DK,2025-06,0.154607
654,DK,2025-07,0.156387
655,DK,2025-08,0.156057
656,DK,2025-09,0.157257
657,DK,2025-10,0.155872


In [13]:
# CZK-USD exchange rate, U.S. Dollar to One Czech koruna (monthly non-seasonally adjusted, 1991-01, 2025-09)
# https://fred.stlouisfed.org/series/CCUSMA02CZM618N 
EXUSDCZK_m_raw = fetch_FRED("CCUSMA02CZM618N") 
EXUSDCZK_m_raw = EXUSDCZK_m_raw.rename(columns= 
        {"date": "Time", 
         "CCUSMA02CZM618N": "USD-CZK Spot Exchange Rate"
})

EXUSDCZK_m_raw["Time"] = EXUSDCZK_m_raw["Time"].dt.to_period("M").astype(str)

# Conversion to DKK-USD spot exchange rate 
EXCZKUSD_m_raw = EXUSDCZK_m_raw.copy()
EXCZKUSD_m_raw["CZK-USD Spot Exchange Rate"] = 1 / EXCZKUSD_m_raw["USD-CZK Spot Exchange Rate"]
EXCZKUSD_m_raw = EXCZKUSD_m_raw.drop(columns=["USD-CZK Spot Exchange Rate"])

EXCZKUSD_m_raw["Country"] = "CZ"
EXCZKUSD_m_raw = EXCZKUSD_m_raw[["Country", "Time", "CZK-USD Spot Exchange Rate"]]

EXCZKUSD_m_raw.tail()

Unnamed: 0,Country,Time,CZK-USD Spot Exchange Rate
412,CZ,2025-05,0.045283
413,CZ,2025-06,0.046426
414,CZ,2025-07,0.047421
415,CZ,2025-08,0.04744
416,CZ,2025-09,0.048186


In [14]:
# HUF-USD exchange rate, U.S. Dollar to One Hungarian Forint (monthly non-seasonally adjusted, 1968-01, 2025-09)
# https://fred.stlouisfed.org/series/CCUSMA02HUM618N
EXUSDHUF_m_raw = fetch_FRED("CCUSMA02HUM618N") 
EXUSDHUF_m_raw = EXUSDHUF_m_raw.rename(columns= 
        {"date": "Time", 
         "CCUSMA02HUM618N": "USD-HUF Spot Exchange Rate"
})

EXUSDHUF_m_raw["Time"] = EXUSDHUF_m_raw["Time"].dt.to_period("M").astype(str)

# Conversion to HUF-USD spot exchange rate 
EXHUFUSD_m_raw = EXUSDHUF_m_raw.copy()
EXHUFUSD_m_raw["HUF-USD Spot Exchange Rate"] = 1 / EXHUFUSD_m_raw["USD-HUF Spot Exchange Rate"]
EXHUFUSD_m_raw = EXHUFUSD_m_raw.drop(columns=["USD-HUF Spot Exchange Rate"])

EXHUFUSD_m_raw["Country"] = "HU"
EXHUFUSD_m_raw = EXHUFUSD_m_raw[["Country", "Time", "HUF-USD Spot Exchange Rate"]]

EXHUFUSD_m_raw.tail()

Unnamed: 0,Country,Time,HUF-USD Spot Exchange Rate
688,HU,2025-05,0.002794
689,HU,2025-06,0.002864
690,HU,2025-07,0.002928
691,HU,2025-08,0.002933
692,HU,2025-09,0.002995


In [15]:
# PLN-USD exchange rate, U.S. Dollar to One Polish Zloty (monthly non-seasonally adjusted, 1957-01, 2025-09)
# https://fred.stlouisfed.org/series/CCUSMA02PLM618N
EXUSDPLN_m_raw = fetch_FRED("CCUSMA02PLM618N") 
EXUSDPLN_m_raw = EXUSDPLN_m_raw.rename(columns= 
        {"date": "Time", 
         "CCUSMA02PLM618N": "USD-PLN Spot Exchange Rate"
})

EXUSDPLN_m_raw["Time"] = EXUSDPLN_m_raw["Time"].dt.to_period("M").astype(str)

# Conversion to PLN-USD spot exchange rate 
EXPLNUSD_m_raw = EXUSDPLN_m_raw.copy()
EXPLNUSD_m_raw["PLN-USD Spot Exchange Rate"] = 1 / EXPLNUSD_m_raw["USD-PLN Spot Exchange Rate"]
EXPLNUSD_m_raw = EXPLNUSD_m_raw.drop(columns=["USD-PLN Spot Exchange Rate"])

EXPLNUSD_m_raw["Country"] = "PL"
EXPLNUSD_m_raw = EXPLNUSD_m_raw[["Country", "Time", "PLN-USD Spot Exchange Rate"]]

EXPLNUSD_m_raw.tail()

Unnamed: 0,Country,Time,PLN-USD Spot Exchange Rate
820,PL,2025-05,0.26526
821,PL,2025-06,0.270047
822,PL,2025-07,0.274689
823,PL,2025-08,0.272733
824,PL,2025-09,0.275543


In [16]:
# RON-USD exchange rate, U.S. Dollar to One Romanian Leu (monthly non-seasonally adjusted, 1960-01, 2023-11)
# https://fred.stlouisfed.org/series/CCUSMA02PLM618N
EXUSDRON_m_raw = fetch_FRED("ROUCCUSMA02STM") 
EXUSDRON_m_raw = EXUSDRON_m_raw.rename(columns= 
        {"date": "Time", 
         "ROUCCUSMA02STM": "USD-RON Spot Exchange Rate"
})

EXUSDRON_m_raw["Time"] = EXUSDRON_m_raw["Time"].dt.to_period("M").astype(str)

# Conversion to RON-USD spot exchange rate 
EXRONUSD_m_raw = EXUSDRON_m_raw.copy()
EXRONUSD_m_raw["RON-USD Spot Exchange Rate"] = 1 / EXRONUSD_m_raw["USD-RON Spot Exchange Rate"]
EXRONUSD_m_raw = EXRONUSD_m_raw.drop(columns=["USD-RON Spot Exchange Rate"])

EXRONUSD_m_raw["Country"] = "RO"
EXRONUSD_m_raw = EXRONUSD_m_raw[["Country", "Time", "RON-USD Spot Exchange Rate"]]

EXRONUSD_m_raw.tail()

Unnamed: 0,Country,Time,RON-USD Spot Exchange Rate
798,RO,2023-07,0.223769
799,RO,2023-08,0.22078
800,RO,2023-09,0.215193
801,RO,2023-10,0.212644
802,RO,2023-11,0.217226


In [17]:
# Crude Oil Prices: Brent - Europe (monthly, average price, Dollars per barrel, not seasonally adjusted, 1987-01, 2025-09)
# https://fred.stlouisfed.org/series/MCOILBRENTEU

oilprice_m_raw = fetch_FRED("MCOILBRENTEU") 
oilprice_m_raw = oilprice_m_raw.rename(columns= 
        {"date": "Time", 
         "MCOILBRENTEU": "Crude Oil Price (Brent, Europe)"
})

oilprice_m_raw["Time"] = oilprice_m_raw["Time"].dt.to_period("M").astype(str)

oilprice_m_raw.tail()

Unnamed: 0,Time,"Crude Oil Price (Brent, Europe)"
457,2025-06,71.44
458,2025-07,71.04
459,2025-08,67.87
460,2025-09,67.99
461,2025-10,64.54


In [18]:
# Nominal Broad U.S. Dollar Index (monthly, index Jan 2006=100, not seasonally adjusted, 2006-01, 2025-09)
# https://fred.stlouisfed.org/series/TWEXBGSMTH

usdi_m_raw = fetch_FRED("TWEXBGSMTH") 
usdi_m_raw = usdi_m_raw.rename(columns= 
        {"date": "Time", 
         "TWEXBGSMTH": "Nominal Broad USD Index"
})

usdi_m_raw["Time"] = usdi_m_raw["Time"].dt.to_period("M").astype(str)

usdi_m_raw.tail()

Unnamed: 0,Time,Nominal Broad USD Index
627,2025-06,120.9747
628,2025-07,120.5266
629,2025-08,120.9844
630,2025-09,120.4534
631,2025-10,121.1712


In [19]:
# Market Yield on U.S. Treasury Securities at 10-Year Constant Maturity, Quoted on an Investment Basis (from daily to monthly, percent, not seasonally adjusted, 1962-01, 2025-10)
# https://fred.stlouisfed.org/series/DGS10

ustyield_d_raw = fetch_FRED("DGS10") 
ustyield_d_raw = ustyield_d_raw.rename(columns= 
        {"date": "Time", 
         "DGS10": "Market Yield on 10-Year US Trasury Securities"
})

# Aggregation (dimension from daily to monthly)
ustyield_d_raw = ustyield_d_raw.set_index("Time")
ustyield_m_raw = ustyield_d_raw.resample("M").mean().reset_index()
ustyield_m_raw["Time"] = ustyield_m_raw["Time"].dt.to_period("M").astype(str)

ustyield_m_raw.tail()

Unnamed: 0,Time,Market Yield on 10-Year US Trasury Securities
762,2025-07,4.391818
763,2025-08,4.264762
764,2025-09,4.120476
765,2025-10,4.061818
766,2025-11,4.133333


In [20]:
# CBOE Volatility Index VIX (from daily to monthly, index, not seasonally adjusted, 1990-01, 2025-10)
# https://fred.stlouisfed.org/series/VIXCLS

VIX_d_raw = fetch_FRED("VIXCLS") 
VIX_d_raw = VIX_d_raw.rename(columns= 
        {"date": "Time", 
         "VIXCLS": "CBOE Volatility Index (VIX)"
})

# Aggregation (dimension from daily to monthly)
VIX_d_raw = VIX_d_raw.set_index("Time")
VIX_m_raw = VIX_d_raw.resample("M").mean().reset_index()
VIX_m_raw["Time"] = VIX_m_raw["Time"].dt.to_period("M").astype(str)

VIX_m_raw.tail()

Unnamed: 0,Time,CBOE Volatility Index (VIX)
426,2025-07,16.381304
427,2025-08,15.75
428,2025-09,15.789091
429,2025-10,18.086522
430,2025-11,18.06


#### **3.3) YAHOO!FINANCE-extracted indicators**

In [21]:
# Monthly price and volume of EU countryâ€™s stock index (monthly, price (USD), number of securities traded, 2015-01, 2025-10)
# (!!!) Not available on YFinance for Bulgaria, Croatia, Cyprus, Estonia, Greece, Hungary, Latvia, Lithuania, Luxembourg, Malta, Poland, Romania, Slovakia, Slovenia, Portugal and Sweden
# (!!!) Volume column is dangerous, a lot of 0 values, depending on country, it must be carefully handled 

eu_stock_indices_tickers = {
    "AT": "^ATX",
    "BE": "^BFX",
    "CZ": "^PX",
    "DK": "^OMXC25",
    "FI": "^OMXH25",
    "FR": "^FCHI",
    "DE": "^GDAXI",
    "IE": "^ISEQ",
    "IT": "FTSEMIB.MI",
    "NL": "^AEX",
    "ES": "^IBEX"
}

start = "2015-01-01"
end = "2025-10-25"

list_single_country_dfs = []

# We iterate over each country and respective stock index
# We aggregate data through concatenation based on y axis
for country, ticker in eu_stock_indices_tickers.items():
    df = fetch_YFINANCE(ticker, start, end)

# MultiIndex columns
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns]
    df = df.rename(columns={
        "Close": "Closing Price (USD)",
        "YearMonth": "Time"
    })
    df["Log Monthly Return"] = np.log(df["Closing Price (USD)"] / df["Closing Price (USD)"].shift(1))
    df["Country"] = country
    df["Stock Index"] = ticker

    df = df[["Country", "Stock Index", "Time", "Log Monthly Return", "Volume"]]

    list_single_country_dfs.append(df)

EURO_stock_m_raw = pd.concat(list_single_country_dfs, ignore_index=True)

EURO_stock_m_raw.tail()

Unnamed: 0,Country,Stock Index,Time,Log Monthly Return,Volume
1354,ES,^IBEX,2025-06,-0.011391,2351850100
1355,ES,^IBEX,2025-07,0.028541,2531475800
1356,ES,^IBEX,2025-08,0.036741,1894189600
1357,ES,^IBEX,2025-09,0.035465,2168514900
1358,ES,^IBEX,2025-10,0.035398,2351784400


#### **3.4) OTHERS-extracted indicators**

In [22]:
# Monthly US import by EU member state per HTS code-identified products (monthly, bilateral flows, general custom value in USD, not seasonally adjusted, 2020/01, 2025/06)
# (!!!) Harmonized EU country names to ISO-2
# (!!!) Already ziped df in the USITC_aggregated_US_import_module.py

# Target EU member countries acronyms dictionary (ISO-2 codes)
eu_country_map = {
    "Austria": "AT",
    "Belgium": "BE",
    "Bulgaria": "BG",
    "Croatia": "HR",
    "Cyprus": "CY",
    "Czechia (Czech Republic)": "CZ",
    "Czechia": "CZ",
    "Denmark": "DK",
    "Estonia": "EE",
    "Finland": "FI",
    "France": "FR",
    "Germany": "DE",
    "Greece": "GR",
    "Hungary": "HU",
    "Ireland": "IE",
    "Italy": "IT",
    "Latvia": "LV",
    "Lithuania": "LT",
    "Luxembourg": "LU",
    "Malta": "MT",
    "Netherlands": "NL",
    "Poland": "PL",
    "Portugal": "PT",
    "Romania": "RO",
    "Slovakia": "SK",
    "Slovenia": "SI",
    "Spain": "ES",
    "Sweden": "SE"
}

# Importing aggregated file of US imports (US_import_USITC_raw.csv)
# (!!!) Remember to specify the unzipping method (gzip)
US_import_raw = pd.read_csv("raw_df/US_import_USITC_raw.csv", sep=",", decimal=".", low_memory=False, compression="gzip")

# Data manual retructuring
# Country renaiming via ISO-2 code
US_import_raw.columns = US_import_raw.columns.str.strip()
US_import_raw["Country"] = US_import_raw["Country"].map(eu_country_map)
# Datetime format
US_import_raw["Year"] = US_import_raw["Year"].astype(int)
US_import_raw["Month"] = US_import_raw["Month"].astype(int)
US_import_raw["Time"] = (
    US_import_raw["Year"].astype(str) + "-" + US_import_raw["Month"].astype(str).str.zfill(2)
)
# Raw coulumns renaiming
US_import_raw = US_import_raw.rename(columns={
    "HTS Number": "HTS Code",
    "Description": "HTS Description",
    "General Customs Value": "Import - General Custom Value (USD)"
})
# Avoid HTS codes rounding
# (!!!) Since HTS codes are float values this may cause unwanted float-roundings of values. To avoid this, we transform them in strings
US_import_raw["HTS Code"] = (
    pd.to_numeric(US_import_raw["HTS Code"], errors="coerce")
    .astype("Int64")
    .astype(str)
)
# Normalization of comma and space
# (!!!) Simon: Don't bother the rest I just had issue with the format of the df
val = "Import - General Custom Value (USD)"
US_import_raw[val] = (
    US_import_raw[val]
    .astype(str)
    .str.replace("\u202f", "", regex=False)
    .str.replace("\xa0",  "", regex=False)
    .str.strip()
)

# Converting Import column to numeric and drop NaN
# (!!!) Normally it should be already in numeric format
US_import_raw[val] = pd.to_numeric(US_import_raw[val], errors="coerce")
US_import_raw = US_import_raw[US_import_raw[val].notna()].copy()

# Sorting by Country, HTS Code and Time
US_import_raw = (
    US_import_raw
    .sort_values(["Country", "HTS Code", "Time"])
    .reset_index(drop=True)
)

US_import_raw = US_import_raw[["Country", "HTS Code", "HTS Description", "Time", "Import - General Custom Value (USD)"]]
US_import_raw.tail()

Unnamed: 0,Country,HTS Code,HTS Description,Time,Import - General Custom Value (USD)
1845793,SK,999995,ESTIMATED IMPORTS OF LOW VALUED TRANSACTIONS,2025-01,4265.792
1845794,SK,999995,ESTIMATED IMPORTS OF LOW VALUED TRANSACTIONS,2025-02,4688.588
1845795,SK,999995,ESTIMATED IMPORTS OF LOW VALUED TRANSACTIONS,2025-03,4767.003
1845796,SK,999995,ESTIMATED IMPORTS OF LOW VALUED TRANSACTIONS,2025-04,5663.573
1845797,SK,999995,ESTIMATED IMPORTS OF LOW VALUED TRANSACTIONS,2025-05,5317.76


In [23]:
# Monthly US export to EU member state per HTS code-identified products (monthly, bilateral flows, FAS value in USD, not seasonally adjusted, 2020/01, 2025/06)

# ZIP_______________________________________________________________________________________________________________________________
# import pandas as pd
# US_export_USITC_raw = pd.read_csv("data_fetcher/raw_df/US_export_USITC_raw.csv", sep=";", decimal=",")
# US_export_USITC_raw.to_csv("data_fetcher/raw_df/US_export_USITC_raw.csv", index=False, sep=",", decimal=".", compression="gzip")
# __________________________________________________________________________________________________________________________________

# Importing aggregated file of US imports (US_export_USITC_raw.csv)
# (!!!) Remember to specify the unzipping method (gzip)
US_export_raw = pd.read_csv("raw_df/US_export_USITC_raw.csv", sep=",", decimal=".", low_memory=False, compression="gzip")

# Data manual retructuring
# Country renaiming via ISO-2 code
US_export_raw.columns = US_export_raw.columns.str.strip()
US_export_raw = US_export_raw[US_export_raw["Country"].isin(eu_country_map.keys())].copy()
US_export_raw["Country"] = US_export_raw["Country"].map(eu_country_map)
# Datetime format
US_export_raw["Year"] = US_export_raw["Year"].astype(int)
US_export_raw["Month"] = US_export_raw["Month"].astype(int)
US_export_raw["Time"] = (
    US_export_raw["Year"].astype(str) + "-" + US_export_raw["Month"].astype(str).str.zfill(2)
)
# Raw coulumns renaiming
US_export_raw = US_export_raw.rename(columns={
    "HTS Number": "HTS Code",
    "Description": "HTS Description",
    "FAS Value": "Export - FAS value (USD)"
})

# Avoid HTS codes rounding
# (!!!) Since HTS codes are float values this may cause unwanted float-roundings of values. To avoid this, we transform them in strings
# Normalization of comma and space
US_export_raw["HTS Code"] = (
    pd.to_numeric(US_export_raw["HTS Code"], errors="coerce")
    .astype("Int64")
    .astype(str)
)
US_export_raw["Export - FAS value (USD)"] = (
    US_export_raw["Export - FAS value (USD)"]
    .astype(str)
    .str.replace("\u202f", "", regex=False)
    .str.replace("\xa0",  "", regex=False)
    .str.strip()
    .str.replace(".", "", regex=False)
    .str.replace(",", ".", regex=False)
)
US_export_raw["Export - FAS value (USD)"] = pd.to_numeric(
    US_export_raw["Export - FAS value (USD)"],
    errors="coerce"
)

# Converting Export column to numeric and drop NaN
# (!!!) Normally it should be already in numeric format
US_export_raw = US_export_raw[US_export_raw["Export - FAS value (USD)"].notna()].copy()
US_export_raw = (US_export_raw.sort_values(["Country", "HTS Code", "Time"]).reset_index(drop=True))

US_export_raw = US_export_raw[["Country", "HTS Code", "HTS Description", "Time", "Export - FAS value (USD)"]]
US_export_raw.tail()

Unnamed: 0,Country,HTS Code,HTS Description,Time,Export - FAS value (USD)
69967,SK,97,"WORKS OF ART, COLLECTORS' PIECES AND ANTIQUES",2024-10,17.47
69968,SK,97,"WORKS OF ART, COLLECTORS' PIECES AND ANTIQUES",2024-12,2.76
69969,SK,97,"WORKS OF ART, COLLECTORS' PIECES AND ANTIQUES",2025-01,322.58
69970,SK,97,"WORKS OF ART, COLLECTORS' PIECES AND ANTIQUES",2025-02,9.22
69971,SK,97,"WORKS OF ART, COLLECTORS' PIECES AND ANTIQUES",2025-05,3.4


In [24]:
# US tariffs ratio on EU-derived imports
# https://policy.trade.ec.europa.eu/news/joint-statement-united-states-european-union-framework-agreement-reciprocal-fair-and-balanced-trade-2025-08-21_en
# https://www.whitehouse.gov/fact-sheets/2025/07/fact-sheet-the-united-states-and-european-union-reach-massive-trade-deal/
# https://www.whitehouse.gov/presidential-actions/2025/09/modifying-the-scope-of-reciprocal-tariffs-and-establishing-procedures-for-implementing-trade-and-security-agreements/

### **4) DATA ASSEMBLING**

#### **4.1) Dependent df**

In [25]:
# Aggregate FX market df
# No merging, but concatenation
# Time frame: 1957/01 (varying) - 2025/10 (varying)
# (!!!) We cannot merge them, neither concatenate them as they have no common ground (neither country of exchnage rate)

# Uploading as single dependent_df
EXEURUSD_m_raw.to_csv("aggregate_df/EXEURUSD_dependent_df.csv", index=False)
EXBGNUSD_m_raw.to_csv("aggregate_df/EXBGNUSD_dependent_df.csv", index=False)
EXSKKUSD_m_raw.to_csv("aggregate_df/EXSKKUSD_dependent_df.csv", index=False)
EXDKKUSD_m_raw.to_csv("aggregate_df/EXDKKUSD_dependent_df.csv", index=False)
EXCZKUSD_m_raw.to_csv("aggregate_df/EXCZKUSD_dependent_df.csv", index=False)
EXHUFUSD_m_raw.to_csv("aggregate_df/EXHUFUSD_dependent_df.csv", index=False)
EXPLNUSD_m_raw.to_csv("aggregate_df/EXPLNUSD_dependent_df.csv", index=False)
EXRONUSD_m_raw.to_csv("aggregate_df/EXRONUSD_dependent_df.csv", index=False)

In [26]:
# Aggregate Stock Index df
# No merging, no concat, already cleaned 
# Time frame: 2015/01 - 2025/10 (but it can be extended)
# (!!!) Not available on YFinance for Bulgaria, Croatia, Cyprus, Estonia, Greece, Hungary, Latvia, Lithuania, Luxembourg, Malta, Poland, Romania, Slovakia, Slovenia, Portugal and Sweden
# (!!!) Volume column is dangerous, a lot of 0 values, depending on country, it must be carefully handled 

# Uploading in aggregate_df
EURO_stock_m_raw.to_csv("aggregate_df/EURO_stock_dependent_df.csv", index=False)

In [27]:
# Aggregate Industrial Production Indexes df
# No merging, no concat, already cleaned 
# Time frame: 1996/01 - 2025/08 (but it can be extended)
# (!!!) Level 1 Indicators: Mining and Quarrying (B), Manufacturing (C), Electricity, gas, steam and air conditioning supply (D)

# Uploading in aggregate_df
EURO_indprod_m_raw.to_csv("aggregate_df/EURO_indprod_dependent_df.csv", index=False)

#### **4.2) Global controls df**

In [28]:
# Aggregate Global Control df
# Merging Index: Time
# Time frame: 2006/01 - 2025/09
# (!!!) To run the model we need no NaN in the df, so we drop them, causing a shrinking of the df time range
global_control_df = (oilprice_m_raw.merge(usdi_m_raw, on="Time", how="outer").merge(VIX_m_raw, on="Time", how="outer").merge(ustyield_m_raw, on="Time", how="outer").sort_values("Time").reset_index(drop=True))
global_control_df = global_control_df.dropna()

# Uploading in aggregate_df
global_control_df.to_csv("aggregate_df/global_control_df.csv", index=False)

global_control_df.tail()

Unnamed: 0,Time,"Crude Oil Price (Brent, Europe)",Nominal Broad USD Index,CBOE Volatility Index (VIX),Market Yield on 10-Year US Trasury Securities
761,2025-06,71.44,120.9747,18.403333,4.3835
762,2025-07,71.04,120.5266,16.381304,4.391818
763,2025-08,67.87,120.9844,15.75,4.264762
764,2025-09,67.99,120.4534,15.789091,4.120476
765,2025-10,64.54,121.1712,18.086522,4.061818


#### **4.3) Country-specific test variables df**

In [29]:
# Aggregate Country-Specific Control df
# Merging Index: Country, Time
# Time frame: 1997/01 - 2025/06
# (!!!) To run the model we need no NaN in the df, so we dop them, causing a shrinking of the df time range
# (!!!) As GDP data are quarterly, we lose at lot here by dropping NaN even for HICP and Unemployment Rate that were insetad fine
# (!!!) Converting GDP in million EUR to million USD
# (!!!) Do not drop NaN
EURO_GDPUS_m_raw = EURO_GDP_m_raw.copy()
EURUSD_exrate = (
    EXEURUSD_m_raw[EXEURUSD_m_raw["Country"] == "AT"]
    [["Time", "EUR-USD Spot Exchange Rate"]]
    .drop_duplicates()
    .set_index("Time")
)

# Then merge with GDP data ensuring all countries get the exchange rate
EURO_GDPUS_m_raw = (
    EURO_GDP_m_raw
    .merge(
        EURUSD_exrate, 
        how="left", 
        left_on="Time",
        right_index=True
    )
    .sort_values(["Country", "Time"])
)

# (!!!) Use parenthesis
EURO_GDPUS_m_raw["GDP (Million USD)"] = (
    EURO_GDPUS_m_raw["GDP (Million EUR)"] * 
    EURO_GDPUS_m_raw["EUR-USD Spot Exchange Rate"]
)

EURO_GDPUS_m_raw = EURO_GDPUS_m_raw.drop(columns=["GDP (Million EUR)", "EUR-USD Spot Exchange Rate"])

country_control_df = (EURO_GDPUS_m_raw.merge(EURO_HICP_m_raw, on=["Country", "Time"] , how="outer").merge(EURO_unem_m_raw, on= ["Country", "Time"], how="outer").sort_values(["Country", "Time"]).reset_index(drop=True))

# Uploading in aggregate_df
country_control_df.to_csv("aggregate_df/country_specific_test_df.csv", index=False)

country_control_df.tail()

Unnamed: 0,Country,Time,GDP (Million USD),"HICP (%, annual rate of change)",Unemployment Rate (%pop in LF)
12737,SK,2025-06,13165.161683,4.6,5.3
12738,SK,2025-07,,4.6,5.4
12739,SK,2025-08,,4.4,5.5
12740,SK,2025-09,,4.6,5.6
12741,SK,2025-10,,3.8,


#### **4.4) Regime shift transition variable - Trade Openess**

In [35]:
# Country-specific and monthly-computed trade openess (monthly, 2020-2025)
# Data import (!!!) specify compression
# (!!!) To be changed to monthly
gdp = pd.read_csv("aggregate_df/country_specific_test_df.csv")         
us_exp = US_export_raw.copy()             
us_imp = US_import_raw.copy()               

# Per each variable we keep only the time frame 2020â€“2025
gdp["Time"]   = pd.to_datetime(gdp["Time"])
us_exp["Time"] = pd.to_datetime(us_exp["Time"])
us_imp["Time"] = pd.to_datetime(us_imp["Time"])
gdp["Year"]   = gdp["Time"].dt.year
us_exp["Year"] = us_exp["Time"].dt.year
us_imp["Year"] = us_imp["Time"].dt.year
gdp     = gdp[gdp["Year"].between(2020, 2025)]
us_exp  = us_exp[us_exp["Year"].between(2020, 2025)]
us_imp  = us_imp[us_imp["Year"].between(2020, 2025)]

# Aggregate to annual totals
# (!!!) GDP already converted in million USD
gdp_annual = (
    gdp.groupby(["Country", "Year"], as_index=False)["GDP (Million USD)"]
       .sum()
)

# EU-member GDP
gdp_annual["GDP_USD"] = gdp_annual["GDP (Million USD)"] * 1_000_000

# US Export
# Trade is in thousand USD -> convert to USD (Ã— 1,000) after annual summation
exp_annual = (
    us_exp.groupby(["Country", "Year"], as_index=False)["Export - FAS value (USD)"]
          .sum()
          .rename(columns={"Export - FAS value (USD)": "Exports_USD_thousand"})
)
exp_annual["Exports_USD"] = exp_annual["Exports_USD_thousand"] * 1_000

# US Import
imp_annual = (
    us_imp.groupby(["Country", "Year"], as_index=False)["Import - General Custom Value (USD)"]
          .sum()
          .rename(columns={"Import - General Custom Value (USD)": "Imports_USD_thousand"})
)
imp_annual["Imports_USD"] = imp_annual["Imports_USD_thousand"]* 1_000

# Merging all
open_df = (
    gdp_annual.merge(exp_annual[["Country","Year","Exports_USD"]], on=["Country","Year"], how="left")
              .merge(imp_annual[["Country","Year","Imports_USD"]], on=["Country","Year"], how="left")
              .fillna({"Exports_USD": 0.0, "Imports_USD": 0.0})
              .sort_values(["Country","Year"])
              .reset_index(drop=True)
)

# Trade Openess Computation
open_df["Trade_Openness_pct_GDP"] = (
    (open_df["Exports_USD"] + open_df["Imports_USD"]) / open_df["GDP_USD"]
) * 100

# Lag openness for regressions
open_df["Openness_Lag1"] = open_df.groupby("Country")["Trade_Openness_pct_GDP"].shift(1)

open_df.to_csv("aggregate_df/trade_openness_annual_regime_df.csv", index=False)
open_df.head()

Unnamed: 0,Country,Year,GDP (Million USD),GDP_USD,Exports_USD,Imports_USD,Trade_Openness_pct_GDP,Openness_Lag1
0,AT,2020,434697.655844,434697700000.0,313657650.0,11616670000.0,2.744512,
1,AT,2021,480142.497936,480142500000.0,319815730.0,15136280000.0,3.219064,2.744512
2,AT,2022,472882.642877,472882600000.0,333846270.0,17812650000.0,3.837421,3.219064
3,AT,2023,516847.347331,516847300000.0,366211840.0,19143780000.0,3.774807,3.837421
4,AT,2024,534374.863656,534374900000.0,470719370.0,17573290000.0,3.376657,3.774807


#### **4.5) Explanatory variable - Exposure**

In [31]:
# Country product-specific exposure weight (monthly, truncated mean, 2022/01, 2024/06)
# Weight Exposure Computation
df = US_import_raw.copy()

# Data parsing
df["Time"] = pd.to_datetime(df["Time"])
df["Year"] = df["Time"].dt.year
df["Month"] = df["Time"].dt.month

# Keeping only the period pre-shock
df_trunc = df[
    ((df["Year"] == 2022) | (df["Year"] == 2023)) |
    ((df["Year"] == 2024) & (df["Month"] <= 6))
].copy()

# Mean import value per EU countryâ€“HTS pair
import_trunc_mean = (
    df_trunc.groupby(["Country", "HTS Code"], as_index=False)
    ["Import - General Custom Value (USD)"]
    .mean()
    .rename(columns={"Import - General Custom Value (USD)": "Truncated Mean - EU Export-to-US (2022_2024, thousand USD)"})
)
import_trunc_mean = import_trunc_mean.sort_values(["Country", "HTS Code"]).reset_index(drop=True)

# Normalizing within each country
import_trunc_mean["Export_Share"] = import_trunc_mean.groupby("Country")["Truncated Mean - EU Export-to-US (2022_2024, thousand USD)"].transform(
    lambda x: x / x.sum()
)

# Saving and zipping 
# (!!!) Not time dependent (we took the mean)
# (!!!) Dependends only from country and product
import_trunc_mean.tail()

Unnamed: 0,Country,HTS Code,"Truncated Mean - EU Export-to-US (2022_2024, thousand USD)",Export_Share
61817,SK,980200,15.0,2.2e-05
61818,SK,981000,42.646333,6.2e-05
61819,SK,981200,2.37,3e-06
61820,SK,981700,55.170167,8.1e-05
61821,SK,999995,4725.587467,0.006925


In [32]:
# Country-product specific tariff changes
# (!!!) Do not save import_trunc_mean as a csv file, just recall it from previous cell
# (!!!) Only final dfs in aggregate_df
# WIP