In [1]:
# 基本パッケージ（numpy,Pandas,matplotlib）
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 線形サポートベクターマシーン
from sklearn.svm import LinearSVC
# ランダムフォレスト
from sklearn.ensemble import RandomForestClassifier
# train_test_split（データを分割出してくれる）
from sklearn.model_selection import train_test_split
# accuracy_score（正解率を測れる）
from sklearn.metrics import accuracy_score
# グリッドサーチ（ハイパーパラメータを自動的に最適化してくれる）
from sklearn.model_selection import GridSearchCV
# 正規化
from sklearn.preprocessing import MinMaxScaler
# 交差検証用
from sklearn.model_selection import cross_val_score
# 交互作用特徴量、多項式特徴量生成
from sklearn.preprocessing import PolynomialFeatures

  from numpy.core.umath_tests import inner1d


In [2]:
# 表示関連
# DataFrameの列数設定
pd.set_option('display.max_columns', 500)

In [3]:
# 株価データの読み込み
stock_data = pd.read_csv("stock_Price_Prediction_v2.4.csv", encoding="shift-jis")

# 読み込みデータの表示
stock_data.head()

Unnamed: 0,Low,Adj Close,CAD,CHF,AUD,IDR(100),INR,PHP,KRW(100),THB,KWD,TWD,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,day_1,day_2,day_3,day_4,day_5,day_6,day_7,day_8,day_9,day_10,day_11,day_12,day_13,day_14,day_15,day_16,day_17,day_18,day_19,day_20,day_21,day_22,day_23,day_24,day_25,day_26,day_27,day_28,day_29,day_30,day_31,weekday_0,weekday_1,weekday_2,weekday_3,weekday_4,answer,"SEK_bin_(-0.068, -0.0105]","SEK_bin_(-0.0105, -0.0061]","SEK_bin_(-0.0061, -0.00346]","SEK_bin_(-0.00346, -0.00154]","SEK_bin_(-0.00154, 0.000598]","SEK_bin_(0.000598, 0.00221]","SEK_bin_(0.00221, 0.00413]","SEK_bin_(0.00413, 0.00656]","SEK_bin_(0.00656, 0.01]","SEK_bin_(0.01, 0.0825]","SGD_bin_(-0.0473, -0.00699]","SGD_bin_(-0.00699, -0.00403]","SGD_bin_(-0.00403, -0.00225]","SGD_bin_(-0.00225, -0.000917]","SGD_bin_(-0.000917, 0.000283]","SGD_bin_(0.000283, 0.00139]","SGD_bin_(0.00139, 0.00265]","SGD_bin_(0.00265, 0.00424]","SGD_bin_(0.00424, 0.00671]","SGD_bin_(0.00671, 0.0634]","Volume_bin_(-0.927, -0.512]","Volume_bin_(-0.512, -0.388]","Volume_bin_(-0.388, -0.276]","Volume_bin_(-0.276, -0.155]","Volume_bin_(-0.155, -0.0192]","Volume_bin_(-0.0192, 0.133]","Volume_bin_(0.133, 0.319]","Volume_bin_(0.319, 0.625]","Volume_bin_(0.625, 1.106]","Volume_bin_(1.106, 11.805]","Open_bin_(-0.09620000000000001, -0.0162]","Open_bin_(-0.0162, -0.00927]","Open_bin_(-0.00927, -0.00528]","Open_bin_(-0.00528, -0.00222]","Open_bin_(-0.00222, 0.000725]","Open_bin_(0.000725, 0.00342]","Open_bin_(0.00342, 0.00639]","Open_bin_(0.00639, 0.0102]","Open_bin_(0.0102, 0.0159]","Open_bin_(0.0159, 0.16]","ZAR_bin_(-0.155, -0.0148]","ZAR_bin_(-0.0148, -0.00855]","ZAR_bin_(-0.00855, -0.00494]","ZAR_bin_(-0.00494, -0.00209]","ZAR_bin_(-0.00209, 0.000603]","ZAR_bin_(0.000603, 0.00299]","ZAR_bin_(0.00299, 0.00571]","ZAR_bin_(0.00571, 0.00916]","ZAR_bin_(0.00916, 0.0142]","ZAR_bin_(0.0142, 0.129]",SEK,SGD,Volume,Open,ZAR,High,GBP,SEK^2,SEK SGD,SEK Volume,SEK Open,SEK ZAR,SEK High,SEK GBP,SGD^2,SGD Volume,SGD Open,SGD ZAR,SGD High,SGD GBP,Volume^2,Volume Open,Volume ZAR,Volume High,Volume GBP,Open^2,Open ZAR,Open High,Open GBP,ZAR^2,ZAR High,ZAR GBP,High^2,High GBP,GBP^2
0,0.004525,0.010801,-0.001198,0.010974,-0.001687,0.007299,0.0,0.003831,0.0,-0.003257,0.002004,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,-0.003846,-0.000831,2.643314,-0.00625,0.009338,0.024889,-0.002711,1.5e-05,3e-06,-0.010167,2.4e-05,-3.6e-05,-9.6e-05,1e-05,6.909852e-07,-0.002197,5e-06,-8e-06,-2.1e-05,2e-06,6.987108,-0.016521,0.024683,0.065789,-0.007167,3.9e-05,-5.8e-05,-0.000156,1.7e-05,8.7e-05,0.000232,-2.5e-05,0.000619,-6.7e-05,7.351908e-06
1,-0.003604,0.0187,0.003238,-0.001622,0.00169,-0.007246,0.003663,0.0,-0.008893,-0.006536,0.003609,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,-0.001544,0.001941,0.208818,0.026954,0.010934,0.004337,-0.00068,2e-06,-3e-06,-0.000322,-4.2e-05,-1.7e-05,-7e-06,1e-06,3.768293e-06,0.000405,5.2e-05,2.1e-05,8e-06,-1e-06,0.043605,0.005629,0.002283,0.000906,-0.000142,0.000727,0.000295,0.000117,-1.8e-05,0.00012,4.7e-05,-7e-06,1.9e-05,-3e-06,4.619962e-07
2,0.028933,0.000874,0.000837,0.002,-0.004639,0.0,-0.00365,-0.003817,-0.000997,0.003289,-0.003367,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,-0.00464,-0.005674,-0.317789,0.0,-0.015807,-0.011226,-0.007273,2.2e-05,2.6e-05,0.001475,-0.0,7.3e-05,5.2e-05,3.4e-05,3.219377e-05,0.001803,-0.0,9e-05,6.4e-05,4.1e-05,0.10099,-0.0,0.005023,0.003568,0.002311,0.0,-0.0,-0.0,-0.0,0.00025,0.000177,0.000115,0.000126,8.2e-05,5.288979e-05
3,-0.002636,-0.003493,-0.009078,-0.008855,-0.006779,0.0,-0.007326,-0.003831,0.000998,-0.003279,-0.006665,-0.005236,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1.0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,-0.003885,-0.001253,-0.435145,-0.004374,0.007608,0.001747,-0.006324,1.5e-05,5e-06,0.001691,1.7e-05,-3e-05,-7e-06,2.5e-05,1.569031e-06,0.000545,5e-06,-1e-05,-2e-06,8e-06,0.189351,0.001904,-0.00331,-0.00076,0.002752,1.9e-05,-3.3e-05,-8e-06,2.8e-05,5.8e-05,1.3e-05,-4.8e-05,3e-06,-1.1e-05,3.999859e-05
4,-0.003524,-0.002629,-0.001929,-0.005914,-0.008105,0.007299,-0.00369,-0.007692,-0.012961,-0.003289,-0.005992,-0.007895,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.01014,-0.001254,1.40135,0.0,-0.010906,-0.004359,-0.00122,0.000103,1.3e-05,-0.01421,-0.0,0.000111,4.4e-05,1.2e-05,1.572969e-06,-0.001758,-0.0,1.4e-05,5e-06,2e-06,1.963781,0.0,-0.015283,-0.006109,-0.00171,0.0,-0.0,-0.0,-0.0,0.000119,4.8e-05,1.3e-05,1.9e-05,5e-06,1.488157e-06


In [4]:
# DataFrameをNumPy配列へ変換
# 正解ラベルの変換
answers = stock_data.answer.values
# 説明変数の変換
stock_data.drop(columns=['answer'], inplace=True)
explanatory_variable = stock_data.values

In [5]:
# データの正規化        
ms = MinMaxScaler()
ms.fit(explanatory_variable)
explanatory_variable = ms.transform(explanatory_variable)

In [6]:
# データの分割（データの80%を訓練用に、20％をテスト用に分割する）
X_train, X_test, y_train, y_test = train_test_split(explanatory_variable, answers, test_size=0.2, random_state=1,shuffle = False)

In [7]:
# グリッドサーチするパラメータを設定
parameters = {'C':[0.01,0.1,1,10,100],'loss':['hinge', 'squared_hinge']}
# グリッドサーチを実行
lsvc =  LinearSVC(random_state=1)
grid_search = GridSearchCV(lsvc, param_grid=parameters, cv=5)
grid_search = grid_search.fit(X_train , y_train)

# グリッドサーチ結果(最適パラメータ)を取得
GS_C, GS_loss = grid_search.best_params_.values()
print ("最適パラメータ：{}".format(grid_search.best_params_))

最適パラメータ：{'C': 10, 'loss': 'squared_hinge'}


In [8]:
# 最適パラメーターを指定して学習
clf = LinearSVC(loss=GS_loss, C=GS_C, random_state=1)
clf.fit(X_train , y_train)

LinearSVC(C=10, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=1, tol=0.0001,
     verbose=0)

In [9]:
# 学習後のモデルによるテスト
# トレーニングデータを用いた予測
y_train_pred = clf.predict(X_train)
# テストデータを用いた予測
y_val_pred = clf.predict(X_test)

In [10]:
# 正解率の計算
train_score = accuracy_score(y_train, y_train_pred)
test_score = accuracy_score(y_test, y_val_pred)
# 正解率を表示
print("トレーニングデータに対する正解率：" + str(train_score * 100) + "%")
print("テストデータに対する正解率：" + str(test_score * 100) + "%")

トレーニングデータに対する正解率：75.93516209476309%
テストデータに対する正解率：69.61394769613948%


In [11]:
#  交差検証
## 5分割し交差検証
scores = cross_val_score(clf, explanatory_variable, answers, cv = 5)
## 各分割におけるスコア
print('Cross-Validation scores: {}'.format(scores))
## スコアの平均値
print('Average score: {}'.format(np.mean(scores)))

Cross-Validation scores: [0.72478207 0.74595268 0.71446384 0.70822943 0.69038702]
Average score: 0.7167630055539955
