# 株価予測

- [超簡単Pythonで株価予測（LightGBM 利用）機械学習](https://note.com/10mohi6/n/n4b1196fea816)
- [超簡単Pythonで株価予測（Optuna・LightGBM 利用）ハイパーパラメータ自動最適化](https://note.com/10mohi6/n/n46d1bb0267b7)

In [1]:
!pip install scikit-learn lightgbm pandas-datareader optuna



In [2]:
import pandas_datareader as pdr
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import accuracy_score

df = pdr.get_data_yahoo("AAPL", "2010-11-01", "2020-11-01")
df["Diff"] = df.Close.diff()
df["SMA_2"] = df.Close.rolling(2).mean()
df["Force_Index"] = df["Close"] * df["Volume"]
df["y"] = df["Diff"].apply(lambda x: 1 if x > 0 else 0).shift(-1)
df = df.drop(
  ["Open", "High", "Low", "Close", "Volume", "Diff", "Adj Close"],
  axis=1,
).dropna()
# print(df)
X = df.drop(["y"], axis=1).values
y = df["y"].values
X_train, X_test, y_train, y_test = train_test_split(
  X,
  y,
  test_size=0.2,
  shuffle=False,
)
clf = lgb.LGBMRegressor(learning_rate=0.01, n_estimators=20)
clf.fit(
  X_train,
  y_train,
)
y_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_pred > 0.5))

  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'
  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


0.5456349206349206


In [3]:
import pandas_datareader as pdr
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import accuracy_score
import numpy as np
import optuna

def objective(trial):
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=0.2,
        shuffle=False,
    )
    dtrain = lgb.Dataset(X_train, label=y_train)
    param = {
        "objective": "binary",
        "metric": "binary_logloss",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(X_test)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(y_test, pred_labels)
    return accuracy

df = pdr.get_data_yahoo("AAPL", "2010-11-01", "2020-11-01")
df["Diff"] = df.Close.diff()
df["SMA_2"] = df.Close.rolling(2).mean()
df["Force_Index"] = df["Close"] * df["Volume"]
df["y"] = df["Diff"].apply(lambda x: 1 if x > 0 else 0).shift(-1)
df = df.drop(
    ["Open", "High", "Low", "Close", "Volume", "Diff", "Adj Close"],
    axis=1,
).dropna()
# print(df)
X = df.drop(["y"], axis=1).values
y = df["y"].values
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    shuffle=False,
)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)
trial = study.best_trial
clf = lgb.LGBMRegressor(**dict(trial.params.items()))
clf.fit(
    X_train,
    y_train,
)
y_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_pred > 0.5))

[32m[I 2022-01-23 15:29:37,647][0m A new study created in memory with name: no-name-624e69e8-ef5e-4885-8748-30ef34b04ecd[0m
[32m[I 2022-01-23 15:29:37,665][0m Trial 0 finished with value: 0.5178571428571429 and parameters: {'lambda_l1': 0.6549029734483581, 'lambda_l2': 0.0014821895175676266, 'num_leaves': 95, 'feature_fraction': 0.8453580751798422, 'bagging_fraction': 0.4329555521440063, 'bagging_freq': 4, 'min_child_samples': 68}. Best is trial 0 with value: 0.5178571428571429.[0m
[32m[I 2022-01-23 15:29:37,686][0m Trial 1 finished with value: 0.4861111111111111 and parameters: {'lambda_l1': 5.338986107817803e-08, 'lambda_l2': 0.0006611799580550206, 'num_leaves': 217, 'feature_fraction': 0.7334914984464125, 'bagging_fraction': 0.41168616971983696, 'bagging_freq': 5, 'min_child_samples': 88}. Best is trial 0 with value: 0.5178571428571429.[0m
[32m[I 2022-01-23 15:29:37,715][0m Trial 2 finished with value: 0.5158730158730159 and parameters: {'lambda_l1': 6.381255858523819e-07

0.5535714285714286
