In [6]:
import pandas as pd
import numpy as np
import sys
from pathlib import Path
# add project root (one level up from notebooks/) to sys.path
root = Path.cwd().parent   # si estás dentro de /notebooks
sys.path.append(str(root))

from src.features import make_basic_features
from sklearn.linear_model import LogisticRegression
import pickle

# load data
df = pd.read_parquet("../data/processed/ko_cleaned.parquet")
df = make_basic_features(df)

# target: will tomorrow go up? (like Stage 10b)
df["y_up"] = (df["daily_ret"].shift(-1) > 0).astype(int)

# drop NA from rolling/shift
df = df.dropna(subset=["lag1", "volatility_21d", "y_up"])

# time-aware split
split_idx = int(len(df) * 0.8)
train = df.iloc[:split_idx]
test  = df.iloc[split_idx:]

X_train = train[["lag1","volatility_21d"]].values
y_train = train["y_up"].values

# train model
clf = LogisticRegression(max_iter=500)
clf.fit(X_train, y_train)

# save to /model/
Path("../model").mkdir(exist_ok=True, parents=True)
with open("../model/ko_model.pkl", "wb") as f:
    pickle.dump(clf, f)

print("Saved model/model/ko_model.pkl")


Saved model/model/ko_model.pkl
