In [1]:
import pandas as pd
import requests
import io
import sqlite3
# import matplotlib as plt
import matplotlib.pyplot as plt
import statsmodels.api as sm
from linearmodels.panel import PooledOLS
from sklearn.metrics import r2_score, mean_absolute_error, explained_variance_score
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [66]:
# data links
wage_url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_EAR@DF_HOU_EAR,1.0/all?startPeriod=2023-12&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
macro_ind_url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES_REVISIONS@DF_STES_REVISIONS,4.0/all?startPeriod=2000-01&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
unemp_url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_LFS@DF_IALFS_UNE_M,1.0/all?startPeriod=2000-01&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
finance_url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_FINMARK,4.0/all?startPeriod=2000-01&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
cpi_url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_PRICES@DF_PRICES_ALL,1.0/all?startPeriod=2024-01&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
cci_url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CS,4.0/all?startPeriod=2024-06&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'

In [2]:
conn = sqlite3.connect('/Users/Mark Rozenberg/Downloads/Macro-Indicators/macro_indicators_v2.db')

### wages

In [37]:
# Read the data from wage_url
wage_data = pd.read_csv(io.StringIO(requests.get(wage_url).content.decode('utf-8')), header=0)
# wage_data.columns
wage_data.rename(columns={
'MEASURE': 'measure_code',
'Measure': 'measure_name',
'ADJUSTMENT': 'adjustment_code',
'Adjustment': 'adjustment_name',
'TIME_PERIOD': 'time_period_code',
'Time period': 'time_period_name',
'DECIMALS': 'decimals_code',
'Decimals': 'decimals_name'
}, inplace=True)
# Remove spaces from column names
wage_data.columns = wage_data.columns.str.replace(' ', '_')
# # Save the data to the wage table in the database
wage_data.to_sql('wage', conn, if_exists='replace', index=False)

511

### Finance

In [None]:
# Read the data from wage_url
finance_data = pd.read_csv(io.StringIO(requests.get(finance_url).content.decode('utf-8')), header=0)
finance_data.rename(columns={
'MEASURE': 'measure_code',
'Measure': 'measure_name',
'DECIMALS': 'decimals_code',
'Decimals': 'decimals_name',
'METHODOLOGY': 'methodology_code',
'Methodology': 'methodology_name',
'TRANSFORMATION': 'transformation_code',
'Transformation': 'transformation_name',
'ADJUSTMENT': 'adjustment_code',
'Adjustment': 'adjustment_name',
'TIME_PERIOD': 'time_period_code',
'Time_period': 'time_period_name'
}, inplace=True)
# Remove spaces from column names
finance_data.columns = finance_data.columns.str.replace(' ', '_')
# Save the data to the wage table in the database
finance_data.to_sql('finance', conn, if_exists='replace', index=False)

112356

### unemployment

In [None]:
# Read the data from wage_url
unemp_data = pd.read_csv(io.StringIO(requests.get(unemp_url).content.decode('utf-8')), header=0)
# unemp_data.columns
unemp_data.rename(columns={
'MEASURE': 'measure_code',
'Measure': 'measure_name',
'DECIMALS': 'decimals_code',
'Decimals': 'decimals_name',
'SEX': 'sex_code',
'Sex': 'sex_name',
'AGE': 'age_code',
'Age': 'age_name',
'TRANSFORMATION': 'transformation_code',
'Transformation': 'transformation_name',
'ADJUSTMENT': 'adjustment_code',
'Adjustment': 'adjustment_name'
}, inplace=True)
# Remove spaces from column names
unemp_data.columns = unemp_data.columns.str.replace(' ', '_')
unemp_data.rename(columns={
'TIME_PERIOD': 'time_period_code',
'Time_period': 'time_period_name',
}, inplace=True)
# Save the data to the wage table in the database
unemp_data.to_sql('unemp', conn, if_exists='replace', index=False)

304274

### prices

In [73]:
# Read the data
cpi_data = pd.read_csv(io.StringIO(requests.get(cpi_url).content.decode('utf-8')), header=0)
# cpi_data.columns
cpi_data.rename(columns={
'METHODOLOGY': 'methodology_code',
'Methodology': 'methodology_name',
'MEASURE': 'measure_code',
'Measure': 'measure_name',
'EXPENDITURE': 'expenditure_code',
'Expenditure': 'expenditure_name',
'ADJUSTMENT': 'adjustment_code',
'Adjustment': 'adjustment_name',
'TRANSFORMATION': 'transformation_code',
'Transformation': 'transformation_name',
'DURABILITY': 'durability_code',
'Durability': 'durability_name',
'DECIMALS': 'decimals_code',
'Decimals': 'decimals_name',
'TIME_PERIOD': 'time_period_code',
'Time_period': 'time_period_name',
}, inplace=True)
cpi_data.columns = cpi_data.columns.str.replace(' ', '_')
# Save the data to the wage table in the database
cpi_data.to_sql('cpi', conn, if_exists='replace', index=False)

60462

### Economic Indicators

In [68]:
# Read the data
# cci_data = pd.read_csv(io.StringIO(requests.get(cci_url).content.decode('utf-8')), header=0)
# cci_data.columns
cci_data.rename(columns={
'MEASURE': 'measure_code',
'Measure': 'measure_name',
'ADJUSTMENT': 'adjustment_code',
'Adjustment': 'adjustment_name',
'TRANSFORMATION': 'transformation_code',
'Transformation': 'transformation_name',
'TIME_PERIOD': 'time_period_code',
'Time_period': 'time_period_name',
'DECIMALS': 'decimals_code',
'Decimals': 'decimals_name'
}, inplace=True)
cci_data.columns = cci_data.columns.str.replace(' ', '_')
# Save the data to the wage table in the database
cci_data.to_sql('cci', conn, if_exists='replace', index=False)

520

In [69]:
# Query to list all tables in the database
pd.read_sql_query('''SELECT name FROM sqlite_master WHERE type='table';''', conn)

Unnamed: 0,name
0,finance
1,unemp
2,cpi
3,wage
4,cci


In [70]:
del cpi_test

In [72]:
cci_test = pd.read_sql_query('''
SELECT *
FROM cci
where
freq = 'M'
LIMIT 2
''', conn)

In [78]:
pd.read_sql_query('''
SELECT count(distinct REF_AREA)
FROM cci
where freq = 'M'
and measure_code = 'CCICP'
''', conn)

Unnamed: 0,count(distinct REF_AREA)
0,39


In [79]:
# test for unique values
pd.read_sql_query('''
SELECT
time_period_code as time_period,
ref_area as country,
obs_value as value,
count(*) as rows
FROM cci
where freq = 'M'
and measure_code = 'CCICP'
group by 1,2,3
order by 4 desc
''', conn)

Unnamed: 0,time_period,country,value,rows
0,2024-06,AUS,-16.00000,1
1,2024-06,AUT,-12.60000,1
2,2024-06,BEL,-8.10000,1
3,2024-06,BGR,-15.50000,1
4,2024-06,BRA,91.10000,1
...,...,...,...,...
184,2024-10,SVK,-21.10000,1
185,2024-10,SVN,-27.60000,1
186,2024-10,SWE,0.90000,1
187,2024-10,TUR,80.60000,1


### combine the sources into main table

In [None]:
main_table = pd.read_sql_query('''
create table if not exists main_table as
with irlt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as irlt
from finance
where freq = 'M'
and measure_code = 'IRLT'
),
share as (
select
time_period_code as time_period,
ref_area as country,
obs_value as share
from finance
where freq = 'M'
and measure_code = 'SHARE'
),
unemp_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as unemp
from unemp
where freq = 'M'
and sex_code = '_T'
and age_code = 'Y_GE25'
and adjustment_code = 'N'
),
wage_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as wage
from wage
where
freq = 'M'
and adjustment_code = 'N' 
),
cpi_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as cpi
from cpi
where
freq = 'M'
and methodology_code = 'N'
and UNIT_MEASURE = 'PC'
and adjustment_code = 'N'
and expenditure_code = '_T'
),
cci_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as cci
from cci
where freq = 'M'
and measure_code = 'CCICP'
)
select t1.time_period, t1.country, t1.irlt, t2.share, t3.unemp, t4.wage, t5.cpi, t6.cci
from irlt t1
full join share t2 on t1.time_period = t2.time_period and t1.country = t2.country
full join unemp_dt t3 on t1.time_period = t3.time_period and t1.country = t3.country
full join wage_dt t4 on t1.time_period = t4.time_period and t1.country = t4.country
full join cpi_dt t5 on t1.time_period = t5.time_period and t1.country = t5.country
full join cci_dt t6 on t1.time_period = t6.time_period and t1.country = t6.country
''', conn)

DatabaseError: Execution failed on sql '
create table if not exists main_table as
with irlt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as irlt
from finance
where freq = 'M'
and measure_code = 'IRLT'
),
share as (
select
time_period_code as time_period,
ref_area as country,
obs_value as share
from finance
where freq = 'M'
and measure_code = 'SHARE'
),
unemp_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as unemp
from unemp
where freq = 'M'
and sex_code = '_T'
and age_code = 'Y_GE25'
and adjustment_code = 'N'
),
wage_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as wage
from wage
where
freq = 'M'
and adjustment_code = 'N' 
),
cpi_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as cpi
from cpi
where
freq = 'M'
and methodology_code = 'N'
and UNIT_MEASURE = 'PC'
and adjustment_code = 'N'
and expenditure_code = '_T'
),
cci_dt as (
select
time_period_code as time_period,
ref_area as country,
obs_value as cci
from cci
where freq = 'M'
and measure_code = 'CCICP'
)
select t1.time_period, t1.country, t1.irlt, t2.share, t3.unemp, t4.cpi, t5.cci
from irlt t1
full join share t2 on t1.time_period = t2.time_period and t1.country = t2.country
full join unemp_dt t3 on t1.time_period = t3.time_period and t1.country = t3.country
full join wage_dt t4 on t1.time_period = t4.time_period and t1.country = t4.country
full join cpi_dt t5 on t1.time_period = t5.time_period and t1.country = t5.country
full join cci_dt t6 on t1.time_period = t6.time_period and t1.country = t6.country
': no such column: t4.cpi