In [None]:
import pandas as pd
import numpy as np
from pathlib import Path


processed_dir = Path("../data/processed")
df = pd.read_csv(processed_dir / "seagate_no_outliers.csv", parse_dates=["date"])


if "Return" not in df.columns:
    df["Return"] = df["price"].pct_change()


# Feature Engineering

# 1. Log Return
df["LogReturn"] = np.log(df["price"] / df["price"].shift(1))

# 2. Cumulative Return
df["CumReturn"] = (1 + df["Return"]).cumprod() - 1

# 3. Lag Features
for lag in [1, 2, 5]:
    df[f"Return_Lag{lag}"] = df["Return"].shift(lag)

# 4. Moving Averages
for window in [5, 10, 20]:
    df[f"MA_{window}"] = df["price"].rolling(window).mean()

# 5. Rolling Volatility
for window in [5, 20]:
    df[f"Volatility_{window}"] = df["Return"].rolling(window).std()

# 6. Bollinger Bands (20-day)
df["BB_Mid"] = df["price"].rolling(20).mean()
df["BB_Upper"] = df["BB_Mid"] + 2 * df["price"].rolling(20).std()
df["BB_Lower"] = df["BB_Mid"] - 2 * df["price"].rolling(20).std()

# 7. RSI (14-day)
window = 14
delta = df["price"].diff()
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = pd.Series(gain).rolling(window).mean()
avg_loss = pd.Series(loss).rolling(window).mean()
rs = avg_gain / (avg_loss + 1e-9)  # 避免除0
df["RSI_14"] = 100 - (100 / (1 + rs))


# Save engineered dataset

df.to_csv(processed_dir / "seagate_features.csv", index=False)
print(f"Feature-engineered file saved to {processed_dir/'seagate_features.csv'}")


print(df[["date","price","Return","MA_5","Volatility_5","RSI_14"]].head(15))


Feature-engineered file saved to ..\data\processed\seagate_features.csv
         date       price    Return       MA_5  Volatility_5     RSI_14
0  2025-02-24   98.742737 -0.007635        NaN           NaN        NaN
1  2025-02-25   98.387543 -0.003597        NaN           NaN        NaN
2  2025-02-26   98.930191  0.005515        NaN           NaN        NaN
3  2025-02-27   99.324852  0.003989        NaN           NaN        NaN
4  2025-02-28  100.548286  0.012318  99.186722      0.007856        NaN
5  2025-03-03   98.723000 -0.018153  99.182774      0.011625        NaN
6  2025-03-05   91.194954 -0.017538  97.744257      0.014113        NaN
7  2025-03-07   86.824142  0.015697  95.323047      0.016190        NaN
8  2025-03-10   85.255394 -0.018068  92.509155      0.017530        NaN
9  2025-03-11   85.807915  0.006481  89.561081      0.016221        NaN
10 2025-03-12   85.571121 -0.002760  86.930705      0.014813        NaN
11 2025-03-13   84.475952 -0.012798  85.586905      0.013785    