# 01_02 Data Load & Feature Engineering
Dieses Notebook lädt die Rohdaten (via yfinance) und erstellt die Features für das Transformer-Modell.

In [1]:
import os, sys
import pandas as pd
import yfinance as yf
from pathlib import Path

# Add root project to path to import local package
ROOT = Path("..").resolve()
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from finance_transformer.features import build_feature_frame

In [2]:
TICKER = "AAPL"
START = "2010-01-01"
END = "2026-01-01"

DATA_DIR = ROOT / "data"
DATA_DIR.mkdir(exist_ok=True)
OUTPUT_CSV = DATA_DIR / f"{TICKER}_features.csv"

In [3]:
print(f"Downloading {TICKER} from {START} to {END}...")
raw = yf.download(TICKER, start=START, end=END, auto_adjust=True, progress=False)
print(f"Downloaded {len(raw)} rows.")

Downloading AAPL from 2010-01-01 to 2026-01-01...
Downloaded 4024 rows.


In [4]:
# Feature Engineering with library function
df = build_feature_frame(raw)
print(f"Features generated: {df.shape}")
df.head()

Features generated: (3974, 15)


Unnamed: 0_level_0,ret1,ret5,sma20_rel,std20,sma50_rel,std50,rsi14,macd_norm,hl_range,vol_z20,dow_1,dow_2,dow_3,dow_4,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-03-17,-0.001471,-0.003207,-0.050143,0.012431,-0.075676,0.018395,69.691123,0.027102,0.014189,-0.425141,False,True,False,False,6.721593
2010-03-18,0.002362,-0.003776,-0.047549,0.012425,-0.076942,0.018396,70.266899,0.026761,0.010639,-1.069798,False,False,True,False,6.737486
2010-03-19,-0.01074,-0.019384,-0.032634,0.012679,-0.065959,0.018311,64.308581,0.02566,0.018043,0.243804,False,False,False,True,6.66551
2010-03-22,0.011186,0.004058,-0.037982,0.012475,-0.075088,0.018362,67.408882,0.024897,0.026029,-0.431518,False,False,False,False,6.740489
2010-03-23,0.015934,0.01727,-0.046337,0.011458,-0.088275,0.018464,71.287246,0.025116,0.020494,0.490009,True,False,False,False,6.848753


In [5]:
# Save to CSV
df.to_csv(OUTPUT_CSV)
print(f"Saved dataset to {OUTPUT_CSV}")

Saved dataset to C:\Users\jacin\DL_PROJECT\finance_transformer_lstm\TRANSFORMER\data\AAPL_features.csv
