In [1]:
import fredapi as fred
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import requests
import os

# Set the API key

FRED_API_KEY = os.getenv('FRED_API_KEY')

# Create a FRED API object
fred_api = fred.Fred(api_key=FRED_API_KEY)

# Get the release dates for all releases
release_dates = fred_api.get_series_all_releases('UNRATE')

In [2]:
import sys

# 현재 작업 디렉토리 설정
current_dir = os.getcwd()
project_root = os.path.dirname(current_dir)
sys.path.append(project_root)

# 데이터 파일 경로 설정
data_path = os.path.join('../data', 'nowcast-model-variables.xlsx')

variables = pd.read_excel(data_path, engine='openpyxl', sheet_name='variables')
releases = pd.read_excel(data_path, engine='openpyxl', sheet_name='releases')

variables.to_csv('../data/nowcast-variables.csv')
releases.to_csv('../data/nowcast-releases.csv')

In [51]:
variables

Unnamed: 0,varname,fullname,dispgroup,disporder,release,units,st,d1,d2,hist_source,hist_source_key,hist_source_freq,hist_source_transform,nc_dfm_input,nc_method,nc_input_reason
0,mpce,PCE,Consumer,3,BEA.PI,billions of 2012 $,dlog,apchg,base,fred,PCEC96,m,none,1.0,dfm.m,
1,po,Personal Outlays,Consumer,1,BEA.PI,billions of 2012 $,dlog,apchg,none,fred,A068RC1,m,none,0.0,dfm.m,
2,ps,Personal Savings,Consumer,1,BEA.PI,billions of 2012 $,dlog,apchg,none,fred,PMSAVE,m,none,0.0,dfm.m,
3,psr,Personal Savings Rate,Consumer,1,BEA.PI,billions of 2012 $,base,base,none,fred,PSAVERT,m,none,1.0,calc,
4,pi,Personal Income,Consumer,1,BEA.PI,billions of 2012 $,dlog,apchg,none,fred,RPI,m,none,0.0,dfm.m,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,moo,Agricultural Commodities (DBA),Stocks_and_Commodities,2,YHOO,index,dlog,pchg,base,yahoo,DBA,d,none,1.0,dfm.m,
91,metals,Metal Commodities (DBB),Stocks_and_Commodities,2,YHOO,index,dlog,pchg,base,yahoo,DBB,d,none,1.0,dfm.m,
92,oil,Crude Oil Price (West Texas Intermediate),Stocks_and_Commodities,2,EIA.SPOT,$ per barrel,base,base,none,fred,DCOILWTICO,d,none,1.0,dfm.m,
93,usd,U.S. Dollar Index,Stocks_and_Commodities,1,YHOO,index,base,base,none,yahoo,UUP,d,none,0.0,dfm.m,


In [47]:
"""
데이터 수집 및 전처리를 위한 함수들
"""

import pandas as pd
import numpy as np
import logging
from datetime import datetime
import os

# 로깅 설정
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def get_fred_data(fred_api, series_id, start_date='2007-01-01', end_date=None) -> pd.Series:
    """
    FRED API를 통해 데이터를 가져오는 함수
    
    Parameters:
    -----------
    fred_api : fredapi.Fred
        FRED API 객체
    series_id : str
        FRED 시리즈 ID
    start_date : str
        시작 날짜 (YYYY-MM-DD 형식)
    end_date : str, optional
        종료 날짜 (YYYY-MM-DD 형식)
    
    Returns:
    --------
    pd.Series
        수집된 시계열 데이터
    """
    try:
        data = fred_api.get_series(series_id, observation_start=start_date, observation_end=end_date)
        data = pd.Series(data, index=pd.to_datetime(data.index))
        data.name = series_id
        return data
    except Exception as e:
        logger.error(f"FRED 데이터 수집 실패 - {series_id}: {str(e)}")
        return None

def get_yahoo_data(ticker, start_date='2007-01-01', end_date=None):
    """
    Yahoo Finance를 통해 데이터를 가져오는 함수
    
    Parameters:
    -----------
    ticker : str
        Yahoo Finance 티커 심볼
    start_date : str
        시작 날짜 (YYYY-MM-DD 형식)
    end_date : str, optional
        종료 날짜 (YYYY-MM-DD 형식)
    
    Returns:
    --------
    pd.Series
        수집된 종가 데이터
    """
    try:
        # 데이터 다운로드
        data = yf.download(ticker, start=start_date, end=end_date)
        
        # 종가만 선택하고 시리즈로 변환
        data[f'{ticker}'] = data['Close']
        
        # Ticker 제외 나머지는 모두 Drop
        data = data[f'{ticker}']
        
        # 인덱스 이름을 'Date'로 설정
        data.index.name = 'Date'
        
        return data
    except Exception as e:
        logger.error(f"Yahoo Finance 데이터 수집 실패 - {ticker}: {str(e)}")
        return None

# 데이터 수집 실행
start_date = '2007-01-01'
end_date = datetime.now().strftime('%Y-%m-%d')

# 결과 저장
output_dir = '../data/collected'
os.makedirs(output_dir, exist_ok=True)

get_fred_data(fred_api, 'UNRATE', start_date, end_date)

2007-01-01    4.6
2007-02-01    4.5
2007-03-01    4.4
2007-04-01    4.5
2007-05-01    4.4
             ... 
2024-10-01    4.1
2024-11-01    4.2
2024-12-01    4.1
2025-01-01    4.0
2025-02-01    4.1
Name: UNRATE, Length: 218, dtype: float64

In [None]:
def resample_to_daily(data, start_date, end_date):
    """
    데이터를 일별로 리샘플링하는 함수
    
    Parameters:
    -----------
    data : pd.Series
        원본 데이터
    start_date : str
        시작 날짜 (YYYY-MM-DD 형식)
    end_date : str
        종료 날짜 (YYYY-MM-DD 형식)
    
    Returns:
    --------
    pd.Series
        일별 리샘플링된 데이터
    """
    # 날짜 인덱스 생성
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # 데이터 리샘플링
    resampled = data.reindex(date_range)
    
    # 결측치 처리 (전진 채우기 후 역진 채우기)
    resampled = resampled.fillna(method='ffill').fillna(method='bfill')
    
    return resampled

def collect_data(variables_df, fred_api, start_date='2007-01-01', end_date=None):
    """
    FRED와 Yahoo Finance에서 데이터를 수집하고 일별로 리샘플링하는 함수
    
    Parameters:
    -----------
    variables_df : pd.DataFrame
        변수 정보가 담긴 데이터프레임
    fred_api : fredapi.Fred
        FRED API 객체
    start_date : str
        시작 날짜 (YYYY-MM-DD 형식)
    end_date : str, optional
        종료 날짜 (YYYY-MM-DD 형식)
    
    Returns:
    --------
    pd.DataFrame
        일별 리샘플링된 모든 데이터
    """
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    
    # 데이터 수집을 위한 딕셔너리
    collected_data = {}
    
    # 진행 상황 표시를 위한 tqdm 설정
    for _, row in tqdm(variables_df.iterrows(), total=len(variables_df), desc="데이터 수집 중"):
        try:
            varname = row['varname']
            source = row['hist_source']
            source_key = row['hist_source_key']
            
            # 데이터 소스별 수집
            if source == 'fred':
                data = get_fred_data(fred_api, source_key, start_date, end_date)
            elif source == 'yahoo':
                data = get_yahoo_data(source_key, start_date, end_date)
            else:
                logger.warning(f"알 수 없는 데이터 소스: {source} - {varname}")
                continue
            
            # 데이터 검증 및 리샘플링
            if data is not None and not data.empty:
                resampled_data = resample_to_daily(data, start_date, end_date)
                collected_data[varname] = resampled_data
            else:
                logger.warning(f"데이터 수집 실패 또는 빈 데이터: {varname}")
                
        except Exception as e:
            logger.error(f"데이터 수집 중 오류 발생 - {varname}: {str(e)}")
            continue
    
    # 데이터프레임 생성
    if not collected_data:
        logger.error("수집된 데이터가 없습니다.")
        return pd.DataFrame()
    
    # 모든 데이터를 하나의 데이터프레임으로 병합
    df = pd.DataFrame(collected_data)
    
    # 데이터 검증
    logger.info(f"수집된 데이터 크기: {df.shape}")
    logger.info(f"결측치 개수:\n{df.isnull().sum()}")
    
    return df

# 데이터 수집 실행
start_date = '2007-01-01'
end_date = datetime.now().strftime('%Y-%m-%d')

# 데이터 수집
all_data = collect_data(variables, fred_api, start_date, end_date)

# 결과 저장
if not all_data.empty:
    output_dir = '../data/collected'
    os.makedirs(output_dir, exist_ok=True)
    output_file = f'{output_dir}/collected_data_{datetime.now().strftime("%Y%m%d")}.csv'
    all_data.to_csv(output_file)
    logger.info(f"데이터가 {output_file}에 저장되었습니다.")
    
    # 데이터 확인
    print("\n수집된 데이터 정보:")
    print(all_data.info())
    print("\n결측치 개수:")
    print(all_data.isnull().sum())
    
    # 데이터 예시 출력
    print("\n데이터 예시 (처음 5행):")
    print(all_data.head())
else:
    logger.error("데이터 수집 실패")

In [49]:
import requests
import pandas as pd

API_KEY = 'e3e226dad48bd746bbe401b9e1d4de13'
BASE_URL = 'https://api.stlouisfed.org/fred'

source_key = 'CPIAUCSL'

source_metadata = []
unit_metadata = []

release_url = f'{BASE_URL}/series/release'
release_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
release_response = requests.get(release_url, params=release_params).json()
release_id = release_response['releases'][0]['id']  # or .get('id')

obs_url = f'{BASE_URL}/series/observations'
obs_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
obs_r = requests.get(obs_url, params=obs_params).json()
obs_unit = obs_r['units']
obs_data = obs_r['observations']

source_metadata.append(source_key)
source_metadata.append(obs_unit)

rel_url = f'{BASE_URL}/release/dates'
rel_params = {'api_key': API_KEY, 'file_type': 'json', 'release_id': release_id}
rel_data = requests.get(rel_url, params=rel_params).json()['release_dates']

obs_df = pd.DataFrame(obs_data)
obs_df['date'] = pd.to_datetime(obs_df['date'])
obs_df['target_month'] = obs_df['date'].dt.to_period('M').astype(str)
obs_df['base'] = (obs_df['date'] + pd.DateOffset(months=1)).dt.to_period('M').astype(str)
obs_df[f'{source_key}'] = obs_df['value'].astype(float)
obs_df = obs_df[['target_month', f'{source_key}', 'base']]

rel_df = pd.DataFrame(rel_data)
rel_df['release_date'] = pd.to_datetime(rel_df['date'])
rel_df['base'] = rel_df['release_date'].dt.to_period('M').astype(str)
rel_df = rel_df[['release_date', 'base']]

final_df = obs_df.merge(rel_df, on='base', how='left').drop(columns=['base'])
final_df.dropna(inplace=True)

final_df = final_df.set_index('release_date').resample('D').asfreq()
final_df

In [63]:
import requests
import pandas as pd

API_KEY = 'e3e226dad48bd746bbe401b9e1d4de13'
BASE_URL = 'https://api.stlouisfed.org/fred'

source_key = 'CPIAUCSL'

# 메타데이터 수집
meta_url = f'{BASE_URL}/series'
meta_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
meta_r = requests.get(meta_url, params=meta_params).json()['seriess']
meta_df = pd.DataFrame(meta_r)

# release id
release_url = f'{BASE_URL}/series/release'
release_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
release_id = requests.get(release_url, params=release_params).json()['releases'][0]['id']

# obs
obs_url = f'{BASE_URL}/series/observations'
obs_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
obs_data = requests.get(obs_url, params=obs_params).json()['observations']

# release dates
rel_url = f'{BASE_URL}/release/dates'
rel_params = {'api_key': API_KEY, 'file_type': 'json', 'release_id': release_id}
rel_data = requests.get(rel_url, params=rel_params).json()['release_dates']

# obs_df
obs_df = pd.DataFrame(obs_data)
obs_df['date'] = pd.to_datetime(obs_df['date'])
obs_df['target_month'] = obs_df['date'].dt.to_period('M').astype(str)
obs_df['base'] = (obs_df['date'] + pd.DateOffset(months=1)).dt.to_period('M').astype(str)
obs_df[source_key] = obs_df['value'].astype(float)
obs_df = obs_df[['target_month', source_key, 'base']]

# release_df
rel_df = pd.DataFrame(rel_data)
rel_df['release_date'] = pd.to_datetime(rel_df['date'])
rel_df['base'] = rel_df['release_date'].dt.to_period('M').astype(str)
rel_df = rel_df[['release_date', 'base']]

# merge
final_df = obs_df.merge(rel_df, on='base', how='inner').drop(columns=['base'])
final_df = final_df.set_index('release_date').resample('D').asfreq()
final_df


Unnamed: 0_level_0,target_month,CPIAUCSL
release_date,Unnamed: 1_level_1,Unnamed: 2_level_1
1949-03-24,1949-02,23.910
1949-03-25,,
1949-03-26,,
1949-03-27,,
1949-03-28,,
...,...,...
2025-03-08,,
2025-03-09,,
2025-03-10,,
2025-03-11,,


---

In [153]:
import requests
import pandas as pd

API_KEY = 'e3e226dad48bd746bbe401b9e1d4de13'
BASE_URL = 'https://api.stlouisfed.org/fred'

source_keys = pd.read_csv('../data/nowcast-variables.csv')

# hist_source가 fred인 경우
fred_keys = source_keys[source_keys['hist_source'] == 'fred']['hist_source_key']

# hist_source가 yahoo인 경우
yf_tickers = source_keys[source_keys['hist_source'] == 'yahoo']['hist_source_key']

# yf_tickers의 종가를 yfinacne로 다운로드하여 병합
import yfinance as yf

data_dict = {}
for ticker in yf_tickers:
    data = yf.download(ticker, start='2000-01-01', end='2024-12-31')
    data = data['Close']
    data.name = ticker
    data_dict[ticker] = data

market_df = pd.concat(data_dict.values(), axis=1)
market_df.columns = data_dict.keys()
market_df.index.name = 'Date'

[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [154]:
meta_dict = {}
data_dict = {}
skipped_keys = []

for source_key in fred_keys:
    print(f"Processing {source_key}...")
    try:
        # 메타데이터 수집
        meta_url = f'{BASE_URL}/series'
        meta_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
        meta_r = requests.get(meta_url, params=meta_params).json().get('seriess', [])
        if not meta_r:
            print(f"skip {source_key}: No metadata available")
            skipped_keys.append((source_key, "No metadata"))
            continue
        meta_df = pd.DataFrame(meta_r)
        meta_dict[source_key] = meta_df

        # release id
        release_url = f'{BASE_URL}/series/release'
        release_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
        release_response = requests.get(release_url, params=release_params).json()
        if 'releases' not in release_response or not release_response['releases']:
            print(f"skip {source_key}: No release info")
            skipped_keys.append((source_key, "No release info"))
            continue
        release_id = release_response['releases'][0]['id']

        # obs
        obs_url = f'{BASE_URL}/series/observations'
        obs_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
        obs_response = requests.get(obs_url, params=obs_params).json()
        if 'observations' not in obs_response:
            print(f"skip {source_key}: No observations")
            skipped_keys.append((source_key, "No observations"))
            continue
        obs_data = obs_response['observations']
        if not obs_data:
            print(f"skip {source_key}: Empty observations")
            skipped_keys.append((source_key, "Empty observations"))
            continue

        # release dates
        rel_url = f'{BASE_URL}/release/dates'
        rel_params = {'api_key': API_KEY, 'file_type': 'json', 'release_id': release_id}
        rel_data = requests.get(rel_url, params=rel_params).json()['release_dates']
        if not rel_data:
            print(f"skip {source_key}: No release dates")
            skipped_keys.append((source_key, "No release dates"))
            continue

        # obs_df
        obs_df = pd.DataFrame(obs_data)
        obs_df['date'] = pd.to_datetime(obs_df['date'], format='%Y-%m-%d', errors='coerce')
        obs_df['base'] = (obs_df['date'] + pd.DateOffset(months=1)).dt.to_period('M').astype(str)
        obs_df[source_key] = obs_df['value']
        obs_df = obs_df[['base', source_key]]

        # release_df
        rel_df = pd.DataFrame(rel_data)
        rel_df['release_date'] = pd.to_datetime(rel_df['date'], format='%Y-%m-%d', errors='coerce')
        rel_df['base'] = rel_df['release_date'].dt.to_period('M').astype(str)
        rel_df = rel_df[['release_date', 'base']]

        # merge + 중복 제거 + 리샘플링
        temp_df = obs_df.merge(rel_df, on='base', how='left')  # inner 대신 left로 변경
        temp_df = temp_df.drop(columns=['base']).set_index('release_date')
        temp_df = temp_df.groupby('release_date').first()  # 중복 제거
        temp_df = temp_df.resample('D').asfreq()

        data_dict[source_key] = temp_df

    except Exception as e:
        print(f"skip {source_key}: {e}")
        skipped_keys.append((source_key, str(e)))
        continue

# 데이터 병합
merged_df = pd.concat(data_dict.values(), axis=1, join='outer')
meta_full_df = pd.concat(meta_dict.values(), axis=0, ignore_index=True)

# 결과 출력
print("메타데이터 df shape:", meta_full_df.shape)
print("merged_df shape:", merged_df.shape)
print(meta_full_df.head())
print(merged_df.head())
print("\nSkipped keys:")
for key, reason in skipped_keys:
    print(f"{key}: {reason}")

Processing PCEC96...
Processing A068RC1...
Processing PMSAVE...
Processing PSAVERT...
Processing RPI...
Processing DSPIC96...
Processing TOTCI...
Processing RELACBW027SBOG...
Processing DRSFRMACBS...
Processing DRCCLACBS...
Processing DRBLACBS...
Processing GDPC1...
Processing PCECC96...
Processing DGDSRX1Q020SBEA...
Processing PCDGCC96...
Processing DMOTRX1Q020SBEA...
Processing DFDHRX1Q020SBEA...
Processing DREQRX1Q020SBEA...
Processing DODGRX1Q020SBEA...
Processing PCNDGC96...
Processing DFXARX1Q020SBEA...
Processing DCLORX1Q020SBEA...
Processing DGOERX1Q020SBEA...
Processing DONGRX1Q020SBEA...
Processing PCESVC96...
Processing DHUTRX1Q020SBEA...
Processing DHLCRX1Q020SBEA...
Processing DTRSRX1Q020SBEA...
Processing DRCARX1Q020SBEA...
Processing DFSARX1Q020SBEA...
Processing DIFSRX1Q020SBEA...
Processing DOTSRX1Q020SBEA...
Processing DNPIRX1Q020SBEA...
Processing GPDIC1...
Processing PNFIC1...
Processing B009RX1Q020SBEA...
Processing Y033RX1Q020SBEA...
Processing Y001RX1Q020SBEA...


In [165]:
source_key = "DCOILWTICO"
meta_url = f'{BASE_URL}/series'
meta_params = {'api_key': API_KEY, 'file_type': 'json', 'series_id': source_key}
response = requests.get(meta_url, params=meta_params)
print(f"Status: {response.status_code}")
print(response.json())

Status: 200
{'realtime_start': '2025-03-19', 'realtime_end': '2025-03-19', 'seriess': [{'id': 'DCOILWTICO', 'realtime_start': '2025-03-19', 'realtime_end': '2025-03-19', 'title': 'Crude Oil Prices: West Texas Intermediate (WTI) - Cushing, Oklahoma', 'observation_start': '1986-01-02', 'observation_end': '2025-03-10', 'frequency': 'Daily', 'frequency_short': 'D', 'units': 'Dollars per Barrel', 'units_short': '$ per Barrel', 'seasonal_adjustment': 'Not Seasonally Adjusted', 'seasonal_adjustment_short': 'NSA', 'last_updated': '2025-03-12 12:10:03-05', 'popularity': 77, 'notes': 'Definitions, Sources and Explanatory Notes (http://www.eia.doe.gov/dnav/pet/TblDefs/pet_pri_spt_tbldef2.asp)'}]}


In [None]:
import pandas as pd
import requests
import time
import yfinance as yf

API_KEY = 'e3e226dad48bd746bbe401b9e1d4de13'
BASE_URL = 'https://api.stlouisfed.org/fred'

source_keys = pd.read_csv('../data/nowcast-variables.csv')
fred_keys = source_keys[source_keys['hist_source'] == 'fred']['hist_source_key']
yf_tickers = source_keys[source_keys['hist_source'] == 'yahoo']['hist_source_key']

def fetch_yahoo_data(tickers, start='2000-01-01', end='2024-12-31'):
    data_dict = {}
    for ticker in tickers:
        print(f"Fetching Yahoo data for {ticker}...")
        data = yf.download(ticker, start=start, end=end)
        data = data['Close']
        data.name = ticker
        data_dict[ticker] = data
    market_df = pd.concat(data_dict.values(), axis=1)
    market_df.columns = data_dict.keys()
    market_df.index.name = 'Date'
    return market_df

def fetch_fred_series(source_key, api_key, base_url, timeout=10):
    try:
        # 메타데이터
        meta_url = f'{base_url}/series'
        meta_params = {'api_key': api_key, 'file_type': 'json', 'series_id': source_key}
        meta_response = requests.get(meta_url, params=meta_params, timeout=timeout)
        meta_json = meta_response.json()
        meta_r = meta_json.get('seriess', [])
        if not meta_r:
            return None, None, f"No metadata available - Response: {meta_json}"

        meta_df = pd.DataFrame(meta_r)
        frequency = meta_df['frequency_short'].iloc[0]  # 주기: D, W, M, Q 등

        release_url = f'{base_url}/series/release'
        release_params = {'api_key': api_key, 'file_type': 'json', 'series_id': source_key}
        release_response = requests.get(release_url, params=release_params, timeout=timeout)
        release_json = release_response.json()
        if 'releases' not in release_json or not release_json['releases']:
            return None, None, "No release info"
        release_id = release_json['releases'][0]['id']

        obs_url = f'{base_url}/series/observations'
        obs_params = {'api_key': api_key, 'file_type': 'json', 'series_id': source_key}
        obs_response = requests.get(obs_url, params=obs_params, timeout=timeout)
        obs_json = obs_response.json()
        if 'observations' not in obs_json or not obs_json['observations']:
            return None, None, "No observations or empty"

        obs_df = pd.DataFrame(obs_json['observations'])
        obs_df['date'] = pd.to_datetime(obs_df['date'], format='%Y-%m-%d', errors='coerce')
        obs_df[source_key] = obs_df['value']

        # 주기에 따라 날짜 조정
        if frequency == 'M':  # 월간
            obs_df['adjusted_date'] = obs_df['date'] + pd.offsets.MonthEnd(0) + pd.offsets.MonthBegin(1)
        elif frequency == 'Q':  # 분기
            obs_df['adjusted_date'] = obs_df['date'] + pd.offsets.QuarterEnd(0) + pd.offsets.MonthBegin(2)  # 2개월 뒤
        elif frequency == 'W':  # 주간
            obs_df['adjusted_date'] = obs_df['date'] + pd.offsets.Week(1)  # 1주 뒤
        else:
            obs_df['adjusted_date'] = obs_df['date']  # 기타 주기

        obs_df = obs_df[['adjusted_date', source_key]].set_index('adjusted_date')

        rel_url = f'{base_url}/release/dates'
        rel_params = {'api_key': api_key, 'file_type': 'json', 'release_id': release_id}
        rel_response = requests.get(rel_url, params=rel_params, timeout=timeout)
        rel_data = rel_response.json()['release_dates']
        if not rel_data:
            return None, None, "No release dates"

        rel_df = pd.DataFrame(rel_data)
        rel_df['release_date'] = pd.to_datetime(rel_df['date'], format='%Y-%m-%d', errors='coerce')
        rel_df = rel_df[['release_date']].sort_values('release_date')

        temp_df = obs_df.reset_index()
        temp_df = pd.merge_asof(temp_df, rel_df, left_on='adjusted_date', right_on='release_date', direction='forward')
        temp_df = temp_df.drop(columns=['adjusted_date'])
        temp_df = temp_df.groupby('release_date').first()
        temp_df = temp_df.resample('D').asfreq()

        return meta_df, temp_df, None

    except Exception as e:
        return None, None, str(e)

def fetch_fred_data(keys, api_key, base_url, delay_seconds=0.3):
    meta_dict = {}
    data_dict = {}
    skipped_keys = []

    for source_key in keys:
        print(f"Processing {source_key}...")
        meta_df, series_df, error = fetch_fred_series(source_key, api_key, base_url)
        if error:
            print(f"skip {source_key}: {error}")
            skipped_keys.append((source_key, error))
        else:
            meta_dict[source_key] = meta_df
            data_dict[source_key] = series_df
        time.sleep(delay_seconds)

    return meta_dict, data_dict, skipped_keys

# 실행
market_df = fetch_yahoo_data(yf_tickers)

mid_point = len(fred_keys) // 2
first_half = fred_keys[:mid_point]
second_half = fred_keys[mid_point:]

print("Fetching first half of FRED keys...")
meta_dict1, data_dict1, skipped1 = fetch_fred_data(first_half, API_KEY, BASE_URL)

print("Waiting before second half...")
time.sleep(60)

print("Fetching second half of FRED keys...")
meta_dict2, data_dict2, skipped2 = fetch_fred_data(second_half, API_KEY, BASE_URL)

meta_dict = {**meta_dict1, **meta_dict2}
data_dict = {**data_dict1, **data_dict2}
skipped_keys = skipped1 + skipped2

fred_merged_df = pd.concat(data_dict.values(), axis=1, join='outer')
fred_meta_full_df = pd.concat(meta_dict.values(), axis=0, ignore_index=True)
final_df = pd.concat([market_df, fred_merged_df], axis=1)

print("FRED 메타데이터 df shape:", fred_meta_full_df.shape)
print("FRED merged_df shape:", fred_merged_df.shape)
print("Market df shape:", market_df.shape)
print("Final df shape:", final_df.shape)
print(fred_meta_full_df.head())
print(final_df.head())
print("\nSkipped keys:")
for key, reason in skipped_keys:
    print(f"{key}: {reason}")

# 주기별 데이터 확인
print("\nCPIAUCSL (월간):")
print(final_df[final_df['CPIAUCSL'].notna()][['CPIAUCSL']])
print("\nGASREGW (주간):")
print(final_df[final_df['GASREGW'].notna()][['GASREGW']])
print("\nGDPC1 (분기, 예시):")
print(final_df[final_df['GDPC1'].notna()][['GDPC1']] if 'GDPC1' in final_df.columns else "GDPC1 not in data")

In [186]:
# 추가 키 다운로드 및 병합
additional_keys = ['GASREGW']  # 추가로 다운로드할 키 목록
print("\nFetching additional FRED keys...")
additional_meta_dict = {}
additional_data_dict = {}
additional_skipped = []

for key in additional_keys:
    print(f"Processing additional key {key}...")
    meta_df, series_df, error = fetch_fred_series(key, API_KEY, BASE_URL)
    if error:
        print(f"skip {key}: {error}")
        additional_skipped.append((key, error))
    else:
        additional_meta_dict[key] = meta_df
        additional_data_dict[key] = series_df
    time.sleep(1)  # 추가 요청 간 딜레이

# 기존 데이터에 병합
if additional_data_dict:
    additional_df = pd.concat(additional_data_dict.values(), axis=1, join='outer')
    final_df = pd.concat([final_df, additional_df], axis=1)

if additional_meta_dict:
    additional_meta_df = pd.concat(additional_meta_dict.values(), axis=0, ignore_index=True)
    fred_meta_full_df = pd.concat([fred_meta_full_df, additional_meta_df], axis=0, ignore_index=True)

skipped_keys.extend(additional_skipped)


Fetching additional FRED keys...
Processing additional key GASREGW...


In [187]:
print(final_df[final_df['GASREGW'].notna()][['GASREGW']])

           GASREGW
2009-09-28   1.191
2009-10-05   2.499
2009-10-12   2.468
2009-10-19   2.489
2009-10-26   2.574
...            ...
2025-02-18   3.128
2025-02-24   3.148
2025-03-03   3.125
2025-03-10   3.078
2025-03-17   3.069

[806 rows x 1 columns]


In [188]:
final_df.to_csv('../data/final_df.csv')
fred_meta_full_df.to_csv('../data/fred_meta_full_df.csv')

## 주요 거시경제 지표의 메타 정보를 인기 순으로 다운로드

In [85]:
# Step 1: 모든 소스 가져오기
source_url = f'{BASE_URL}/sources'
source_params = {'api_key': API_KEY, 'file_type': 'json'}
sources = requests.get(source_url, params=source_params).json()['sources']
source_ids = [s['id'] for s in sources]

all_meta = []

# Step 2: 소스별 시리즈 수집
for sid in source_ids:
    series_url = f'{BASE_URL}/source/releases'
    series_params = {
        'api_key': API_KEY,
        'file_type': 'json',
        'source_id': sid
    }
    releases = requests.get(series_url, params=series_params).json().get('releases', [])
    for release in releases:
        rel_id = release['id']
        series_fetch_url = f'{BASE_URL}/release/series'
        series_fetch_params = {
            'api_key': API_KEY,
            'file_type': 'json',
            'release_id': rel_id,
            'order_by': 'popularity',
            'sort_order': 'desc',
            'limit': 1000
        }
        series_data = requests.get(series_fetch_url, params=series_fetch_params).json()
        all_meta.extend(series_data.get('seriess', []))

# Step 3: DataFrame 변환 및 필터링
meta_df = pd.DataFrame(all_meta)
meta_df = meta_df.drop_duplicates(subset=['id'])
meta_df = meta_df[meta_df['popularity'] >= 10].sort_values('popularity', ascending=False).reset_index(drop=True)

print(meta_df[['id', 'title', 'popularity']])

                     id                                              title  \
0                T10Y2Y  10-Year Treasury Constant Maturity Minus 2-Yea...   
1              FEDFUNDS                       Federal Funds Effective Rate   
2              CPIAUCSL  Consumer Price Index for All Urban Consumers: ...   
3                UNRATE                                  Unemployment Rate   
4                T10Y3M  10-Year Treasury Constant Maturity Minus 3-Mon...   
...                 ...                                                ...   
5275  BOGZ1FL263061145Q  Rest of the World; Treasury Securities Held by...   
5276  BOGZ1LM503062003Q  Other Financial Business; Municipal Securities...   
5277           FFWSJLOW  Low Value of the Federal Funds Rate for the In...   
5278            FFHTLOW  Low Value of the Federal Funds Rate for the In...   
5279             WUIBRA                 World Uncertainty Index for Brazil   

      popularity  
0            100  
1             98  
2     