In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

# 데이터 로드
data = pd.read_csv("your_stock_data.csv", parse_dates=['date'])  # date 열을 datetime 형식으로 파싱
data = data.dropna()  # 결측치 제거

data['daily_return'] = data['close'].pct_change()
data['volatility'] = data['close'].rolling(window=20).std()  # 20일 이동표준편차

# RSI 계산 (상대강도지수, 14일 기준)
delta = data['close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
data['RSI'] = 100 - (100 / (1 + rs))

# MACD 계산
ema_12 = data['close'].ewm(span=12, adjust=False).mean()
ema_26 = data['close'].ewm(span=26, adjust=False).mean()
data['MACD'] = ema_12 - ema_26
data['Signal_Line'] = data['MACD'].ewm(span=9, adjust=False).mean()

# 볼린저 밴드 계산
data['MA20'] = data['close'].rolling(window=20).mean()
data['BB_upper'] = data['MA20'] + (2 * data['close'].rolling(window=20).std())
data['BB_lower'] = data['MA20'] - (2 * data['close'].rolling(window=20).std())

# 1일 후 종가가 상승하면 1, 하락하면 0으로 라벨링
data['target'] = np.where(data['close'].shift(-1) > data['close'], 1, 0)
data = data.dropna()  # 이후 생긴 결측치 제거

# 사용하지 않을 열 제거
X = data.drop(columns=['date', 'open', 'high', 'low', 'close', 'volume', 'target'])
y = data['target']

ModuleNotFoundError: No module named 'pandas'

In [None]:




# 시계열 고려한 학습-테스트 분할
tscv = TimeSeriesSplit(n_splits=5)
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)

# 시계열 교차 검증
for train_idx, test_idx in tscv.split(X):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    gb_clf.fit(X_train, y_train)
    y_pred = gb_clf.predict(X_test)
    print(f"정확도: {accuracy_score(y_test, y_pred):.4f}")
