In [1]:
# Pandas、globのimport
import pandas as pd
import glob

In [2]:
# 表示関連
# DataFrameの列数設定
pd.set_option('display.max_columns', 100)

In [3]:
# 2001～2018年の株価データをマージする
# globでファイル名の一覧を取得
stock_price_files = glob.glob('stockPrice/*.csv')
stock_price_list = []

# ファイルを読み込み、DataFrameでlistに格納する。
for f in stock_price_files:
    stock_price_list.append(pd.read_csv(f, header=1, encoding="shift-jis", parse_dates = [0]))
    
# Listに格納されたデータを全てconcat関数で連結    
stock_price_all = pd.concat(stock_price_list)

# 列名の変更
stock_price_all.rename(columns = {'日付':'Date','始値':'Open','高値':'High','安値':'Low','終値':'Close','終値調整値':'Adj Close',
                                  '出来高':'Volume'}, inplace = True)

# 欠損値のチェック
print(stock_price_all.isnull().sum())

# 読み込みデータの表示
stock_price_all.head()

Date         0
Open         0
High         0
Low          0
Close        0
Volume       0
Adj Close    0
dtype: int64


Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2001-07-13,12490,12490,12300,12310,259430,12310
1,2001-07-16,12400,12400,12250,12330,99740,12330
2,2001-07-17,12170,12170,12100,12120,93150,12120
3,2001-07-18,12150,12150,11830,11840,165330,11840
4,2001-07-19,11990,11990,11860,11920,620220,11920


In [4]:
# 2002～2018年の為替レートを取得
market_Data = pd.read_csv('market/market.csv', header=0, encoding="shift-jis", parse_dates = [0])

# 欠損値のチェック
print(market_Data.isnull().sum())

# 読み込みデータの表示
market_Data.head()

Date        0
USD         0
GBP         0
EUR         0
CAD         0
CHF         0
SEK         0
DKK         0
NOK         0
AUD         0
NZD         0
ZAR         0
BHD         0
IDR(100)    0
CNY         0
HKD         0
INR         0
PHP         0
SGD         0
KRW(100)    0
THB         0
KWD         0
SAR         0
AED         0
MXN         0
TWD         0
dtype: int64


Unnamed: 0,Date,USD,GBP,EUR,CAD,CHF,SEK,DKK,NOK,AUD,NZD,ZAR,BHD,IDR(100),CNY,HKD,INR,PHP,SGD,KRW(100),THB,KWD,SAR,AED,MXN,TWD
0,2002-04-01,133.15,189.79,116.12,83.48,79.28,12.87,15.63,15.08,71.14,58.8,11.76,353.65,1.37,16.09,17.07,2.73,2.61,72.21,10.12,3.07,434.14,35.52,36.26,14.81,3.82
1,2002-04-02,133.2,191.78,117.18,83.38,80.15,13.0,15.77,15.24,71.02,58.86,11.78,353.78,1.38,16.09,17.08,2.73,2.62,72.18,10.12,3.06,435.01,35.53,36.27,14.8,3.82
2,2002-04-03,133.2,191.26,116.96,83.65,80.02,12.95,15.75,15.23,71.14,58.81,11.89,353.78,1.37,16.09,17.08,2.74,2.62,72.12,10.03,3.04,436.58,35.53,36.27,14.77,3.82
3,2002-04-04,133.1,191.13,117.15,83.72,80.18,12.93,15.77,15.33,70.81,58.52,12.02,353.52,1.37,16.08,17.06,2.73,2.61,72.26,10.02,3.05,435.11,35.5,36.24,14.75,3.82
4,2002-04-05,132.3,189.74,116.32,82.96,79.47,12.87,15.65,15.22,70.33,58.36,11.83,351.39,1.37,15.98,16.96,2.71,2.6,71.85,10.03,3.04,432.21,35.29,36.03,14.66,3.8


In [5]:
# 内部結合で結合する
merge_Data = pd.merge(stock_price_all, market_Data, on='Date', how='inner')

# 目的変数の作成
merge_Data['diff'] = merge_Data['Close'] - merge_Data['Open']
merge_Data['answer'] = merge_Data['diff'].apply( lambda x: 0 if x < 0 else 1 )


In [6]:
# 不要な列を削除
merge_Data.drop(columns=['Date','Close','diff'], inplace=True)

# 各列を変化率へ一括変換
merge_Data_change = merge_Data.pct_change()

# 正解ラベルの付与
merge_Data_change['answer'] = merge_Data['answer']

# 正解ラベルをずらし、説明変数から見て未来の値とする
merge_Data_change.answer = merge_Data_change.answer.shift(-1)

# 読み込みデータの表示
display(merge_Data_change)

# 最初と最後の行を削除
merge_Data_change.drop(0, axis=0, inplace=True)
merge_Data_change.drop(len(merge_Data_change), axis=0, inplace=True)
merge_Data_change.to_csv("stock_Price_Prediction.csv",index = False, encoding="shift-jis")

Unnamed: 0,Open,High,Low,Volume,Adj Close,USD,GBP,EUR,CAD,CHF,SEK,DKK,NOK,AUD,NZD,ZAR,BHD,IDR(100),CNY,HKD,INR,PHP,SGD,KRW(100),THB,KWD,SAR,AED,MXN,TWD,answer
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0
1,-0.004444,0.000000,0.004525,-0.514297,0.010801,0.000376,0.010485,0.009128,-0.001198,0.010974,0.010101,0.008957,0.010610,-0.001687,0.001020,0.001701,0.000368,0.007299,0.000000,0.000586,0.000000,0.003831,-0.000415,0.000000,-0.003257,0.002004,0.000282,0.000276,-0.000675,0.000000,1.0
2,-0.006250,0.024889,-0.003604,2.643314,0.018700,0.000000,-0.002711,-0.001877,0.003238,-0.001622,-0.003846,-0.001268,-0.000656,0.001690,-0.000849,0.009338,0.000000,-0.007246,0.000000,0.000000,0.003663,0.000000,-0.000831,-0.008893,-0.006536,0.003609,0.000000,0.000000,-0.002027,0.000000,1.0
3,0.026954,0.004337,0.028933,0.208818,0.000874,-0.000751,-0.000680,0.001624,0.000837,0.002000,-0.001544,0.001270,0.006566,-0.004639,-0.004931,0.010934,-0.000735,0.000000,-0.000622,-0.001171,-0.003650,-0.003817,0.001941,-0.000997,0.003289,-0.003367,-0.000844,-0.000827,-0.001354,0.000000,0.0
4,0.000000,-0.011226,-0.002636,-0.317789,-0.003493,-0.006011,-0.007273,-0.007085,-0.009078,-0.008855,-0.004640,-0.007609,-0.007175,-0.006779,-0.002734,-0.015807,-0.006025,0.000000,-0.006219,-0.005862,-0.007326,-0.003831,-0.005674,0.000998,-0.003279,-0.006665,-0.005915,-0.005795,-0.006102,-0.005236,1.0
5,-0.004374,0.001747,-0.003524,-0.435145,-0.002629,-0.005669,-0.006324,-0.005416,-0.001929,-0.005914,-0.003885,-0.005112,-0.005913,-0.008105,-0.015936,0.007608,-0.005663,0.007299,-0.005632,-0.005307,-0.003690,-0.007692,-0.001253,-0.012961,-0.003289,-0.005992,-0.005667,-0.006106,-0.002729,-0.007895,0.0
6,0.000000,-0.004359,-0.012378,1.401350,-0.017575,0.000000,-0.001220,-0.005532,-0.004227,-0.006835,-0.010140,-0.005780,-0.003305,-0.003727,-0.003831,-0.010906,0.000000,0.007246,0.000000,0.000000,0.000000,0.000000,-0.001254,0.001010,-0.003300,0.003608,0.000000,0.000559,-0.002052,0.002653,1.0
7,-0.021968,-0.007881,-0.007162,-0.424734,0.006261,-0.006461,-0.002496,0.000956,-0.006307,0.000127,-0.004728,0.000646,-0.001326,-0.001295,-0.002972,-0.009330,-0.006468,-0.014388,-0.006293,-0.006520,-0.007407,-0.003876,-0.006976,-0.007064,-0.003311,-0.007120,-0.006555,-0.006698,-0.006854,-0.007937,0.0
8,0.020665,0.004413,0.009017,-0.203500,-0.005333,0.003060,0.002768,0.002171,0.004882,0.001912,0.004751,0.002582,0.003984,0.003890,0.010519,0.003425,0.003082,0.007299,0.003167,0.002983,0.003731,0.003891,0.003091,0.002033,0.000000,0.001752,0.003155,0.003091,0.001380,0.002667,0.0
9,-0.021127,-0.017575,-0.021448,2.397004,-0.011618,0.003432,0.003716,0.005026,0.006073,0.007377,0.005516,0.005151,0.008598,0.011338,0.013532,0.005119,0.003417,0.014493,0.003157,0.003569,0.003717,0.003876,0.004062,0.003043,0.003322,0.003428,0.003432,0.003361,-0.002757,0.002660,1.0
