In [1]:
# 基本パッケージ（numpy,Pandas,matplotlib）
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# globのimport
import glob

In [2]:
# 表示関連
# DataFrameの列数設定
pd.set_option('display.max_columns', 500)

In [3]:
# 2001～2018年の株価データをマージする（日経平均）
## globでファイル名の一覧を取得
stock_price_files = glob.glob('stockPrice/*.csv')
stock_price_list = []

# ファイルを読み込み、DataFrameでlistに格納する。
for f in stock_price_files:
    stock_price_list.append(pd.read_csv(f, header=1, encoding="shift-jis", parse_dates = [0]))
    
# Listに格納されたデータを全てconcat関数で連結    
stock_price_all = pd.concat(stock_price_list)

# 列名の変更
stock_price_all.rename(columns = {'日付':'Date','始値':'Open','高値':'High','安値':'Low','終値':'Close','終値調整値':'Adj Close',
                                  '出来高':'Volume'}, inplace = True)

# EMAの作成
stock_price_all['ema_5'] = stock_price_all['Adj Close'].ewm(span=5).mean()

# MACDの作成
stock_price_all['macd'] = stock_price_all['Adj Close'].ewm(span=5).mean() - stock_price_all['Adj Close'].ewm(span=25).mean()


# 移動平均乖離率の作成
creating_feature = pd.DataFrame({'Date' : stock_price_all['Date'] })
creating_feature['pvema_5'] = (stock_price_all['Adj Close'] - stock_price_all['ema_5'])/stock_price_all['ema_5']

# 欠損値のチェック
print(stock_price_all.isnull().sum())

# データの確認
stock_price_all.head()

Date         0
Open         0
High         0
Low          0
Close        0
Volume       0
Adj Close    0
ema_5        0
macd         0
dtype: int64


Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close,ema_5,macd
0,2001-07-13,12490,12490,12300,12310,259430,12310,12310.0,0.0
1,2001-07-16,12400,12400,12250,12330,99740,12330,12322.0,1.6
2,2001-07-17,12170,12170,12100,12120,93150,12120,12226.315789,-21.871844
3,2001-07-18,12150,12150,11830,11840,165330,11840,12065.846154,-67.735955
4,2001-07-19,11990,11990,11860,11920,620220,11920,12009.85782,-73.911543


In [4]:
# creating_featureの確認
creating_feature.head()

Unnamed: 0,Date,pvema_5
0,2001-07-13,0.0
1,2001-07-16,0.000649
2,2001-07-17,-0.008696
3,2001-07-18,-0.018718
4,2001-07-19,-0.007482


In [5]:
# 2001～2018年の先物情報を取得（金）
gold_data = pd.read_csv('future/gold/gold.csv', header=0, encoding="utf-8", parse_dates = [0],
                       date_parser=lambda date: pd.to_datetime(date, format='%Y年%m月%d日'))

# 不要な列を削除
gold_data.drop(columns=['前日比%'], inplace=True)

# 列名の変更
gold_data.rename(columns = {'日付け':'Date','始値':'Open_Gold','高値':'High_Gold','安値':'Low_Gold','終値':'Close_Gold','出来高':'Volume_Gold'}, inplace = True)

# 日付でソート
gold_data.sort_values('Date',inplace=True)

# float型へ変更
gold_data['Open_Gold'] = gold_data['Open_Gold'].str.replace(',','')
gold_data['High_Gold'] = gold_data['High_Gold'].str.replace(',','')
gold_data['Low_Gold'] = gold_data['Low_Gold'].str.replace(',','')
gold_data['Close_Gold'] = gold_data['Close_Gold'].str.replace(',','')
gold_data['Volume_Gold'] = gold_data['Volume_Gold'].str.replace('K','')
gold_data['Volume_Gold'].replace('-',0, inplace=True)
gold_data['Volume_Gold'].replace(0,np.nan, inplace=True)

gold_data = gold_data.astype({'Open_Gold': float, 'High_Gold': float, 'Low_Gold': float, 'Close_Gold': float, 'Volume_Gold': float} )

# 欠損値の補完
gold_data = gold_data.fillna(method='ffill')

# 欠損値のチェック
print(gold_data.isnull().sum())

# future_dataへコピー
future_data = gold_data.copy()

# データの確認
future_data.head()

Date           0
Close_Gold     0
Open_Gold      0
High_Gold      0
Low_Gold       0
Volume_Gold    2
dtype: int64


Unnamed: 0,Date,Close_Gold,Open_Gold,High_Gold,Low_Gold,Volume_Gold
4504,2001-01-02,268.4,268.4,268.4,268.4,
4503,2001-01-03,268.0,268.0,268.0,268.0,
4502,2001-01-04,267.3,267.3,267.3,267.3,0.0
4501,2001-01-05,268.0,268.0,268.0,268.0,0.0
4500,2001-01-08,268.0,268.0,268.0,268.0,0.0


In [6]:
# 2001～2018年の先物情報を取得（銅）
bronze_data = pd.read_csv('future/bronze/bronze.csv', header=0, encoding="utf-8", parse_dates = [0],
                       date_parser=lambda date: pd.to_datetime(date, format='%Y年%m月%d日'))

# 不要な列を削除
bronze_data.drop(columns=['前日比%'], inplace=True)

# 列名の変更
bronze_data.rename(columns = {'日付け':'Date','始値':'Open_Bronze','高値':'High_Bronze','安値':'Low_Bronze','終値':'Close_Bronze','出来高':'Volume_Bronze'}, inplace = True)

# 日付でソート
bronze_data.sort_values('Date',inplace=True)

# float型へ変更
bronze_data['Volume_Bronze'] = bronze_data['Volume_Bronze'].str.replace('K','')
bronze_data['Volume_Bronze'].replace('-',0, inplace=True)
bronze_data['Volume_Bronze'].replace(0,np.nan, inplace=True)

bronze_data = bronze_data.astype({'Open_Bronze': float, 'High_Bronze': float, 'Low_Bronze': float, 'Close_Bronze': float, 'Volume_Bronze': float} )

# 欠損値の補完
bronze_data = bronze_data.fillna(method='ffill')

# 欠損値のチェック
print(bronze_data.isnull().sum())

# future_dataと内部結合
future_data= pd.merge(future_data, bronze_data, on='Date', how='inner')

# データの確認
future_data.head()

Date             0
Close_Bronze     0
Open_Bronze      0
High_Bronze      0
Low_Bronze       0
Volume_Bronze    0
dtype: int64


Unnamed: 0,Date,Close_Gold,Open_Gold,High_Gold,Low_Gold,Volume_Gold,Close_Bronze,Open_Bronze,High_Bronze,Low_Bronze,Volume_Bronze
0,2001-01-02,268.4,268.4,268.4,268.4,,0.818,0.846,0.846,0.818,0.37
1,2001-01-03,268.0,268.0,268.0,268.0,,0.824,0.809,0.824,0.807,0.59
2,2001-01-04,267.3,267.3,267.3,267.3,0.0,0.815,0.826,0.826,0.815,0.62
3,2001-01-05,268.0,268.0,268.0,268.0,0.0,0.832,0.82,0.833,0.815,0.42
4,2001-01-08,268.0,268.0,268.0,268.0,0.0,0.833,0.836,0.839,0.832,0.52


In [7]:
# 2002～2018年の為替レートを取得
market_data = pd.read_csv('market/market.csv', header=0, encoding="shift-jis", parse_dates = [0])

# 欠損値のチェック
print(market_data.isnull().sum())

# レート情報をずらす
market_data = market_data.shift(-1)

# データの確認
market_data.head()

Date        0
USD         0
GBP         0
EUR         0
CAD         0
CHF         0
SEK         0
DKK         0
NOK         0
AUD         0
NZD         0
ZAR         0
BHD         0
IDR(100)    0
CNY         0
HKD         0
INR         0
PHP         0
SGD         0
KRW(100)    0
THB         0
KWD         0
SAR         0
AED         0
MXN         0
TWD         0
dtype: int64


Unnamed: 0,Date,USD,GBP,EUR,CAD,CHF,SEK,DKK,NOK,AUD,NZD,ZAR,BHD,IDR(100),CNY,HKD,INR,PHP,SGD,KRW(100),THB,KWD,SAR,AED,MXN,TWD
0,2002-04-02,133.2,191.78,117.18,83.38,80.15,13.0,15.77,15.24,71.02,58.86,11.78,353.78,1.38,16.09,17.08,2.73,2.62,72.18,10.12,3.06,435.01,35.53,36.27,14.8,3.82
1,2002-04-03,133.2,191.26,116.96,83.65,80.02,12.95,15.75,15.23,71.14,58.81,11.89,353.78,1.37,16.09,17.08,2.74,2.62,72.12,10.03,3.04,436.58,35.53,36.27,14.77,3.82
2,2002-04-04,133.1,191.13,117.15,83.72,80.18,12.93,15.77,15.33,70.81,58.52,12.02,353.52,1.37,16.08,17.06,2.73,2.61,72.26,10.02,3.05,435.11,35.5,36.24,14.75,3.82
3,2002-04-05,132.3,189.74,116.32,82.96,79.47,12.87,15.65,15.22,70.33,58.36,11.83,351.39,1.37,15.98,16.96,2.71,2.6,71.85,10.03,3.04,432.21,35.29,36.03,14.66,3.8
4,2002-04-08,131.55,188.54,115.69,82.8,79.0,12.82,15.57,15.13,69.76,57.43,11.92,349.4,1.38,15.89,16.87,2.7,2.58,71.76,9.9,3.03,429.62,35.09,35.81,14.62,3.77


In [8]:
# 2002～2018年の主要指数データをマージする
## globでファイル名の一覧を取得
world_indices_files = glob.glob('worldIndices/*.csv')

# ファイルを読み込み、DataFrameでlistに格納する。
for i in range(len(world_indices_files)):
    if i == 0:
         world_indices_data = pd.read_csv(world_indices_files[i], header=0, encoding='shift-jis', parse_dates = [0])
         world_indices_data.drop(columns=['Close','Volume'], inplace=True)
         world_indices_data.rename(columns = {'Open':'Open'+ world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'High':'High'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'Low':'Low'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'Adj Close':'Adj Close'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'Volume':'Volume'+world_indices_files[i].replace('worldIndices\\','').replace('.csv','')}, inplace = True)
    else:      
        work_data = pd.read_csv(world_indices_files[i], header=0, encoding='shift-jis', parse_dates = [0])
        work_data.drop(columns=['Close','Volume'], inplace=True)
        work_data.rename(columns = {'Open':'Open'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'High':'High'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'Low':'Low'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'Adj Close':'Adj Close'+world_indices_files[i].replace('worldIndices\\','').replace('.csv',''),
                                    'Volume':'Volume'+world_indices_files[i].replace('worldIndices\\','').replace('.csv','')}, inplace = True)
        world_indices_data = pd.merge(world_indices_data, work_data, on='Date', how='left')

# 欠損値のチェック
print(world_indices_data.isnull().sum())

# 欠損値の補完
world_indices_data = world_indices_data.fillna(method='ffill')

# データの確認
world_indices_data.head()

Date                    0
Open000001.SS           2
High000001.SS           2
Low000001.SS            2
Adj Close000001.SS      2
Open^AORD             463
High^AORD             463
Low^AORD              463
Adj Close^AORD        463
Open^AXJO             465
High^AXJO             465
Low^AXJO              465
Adj Close^AXJO        465
Open^BFX               63
High^BFX               63
Low^BFX                63
Adj Close^BFX          63
Open^BSESN            200
High^BSESN            200
Low^BSESN             200
Adj Close^BSESN       200
Open^BVSP             184
High^BVSP             184
Low^BVSP              184
Adj Close^BVSP        184
Open^DJI              144
High^DJI              144
Low^DJI               144
Adj Close^DJI         144
Open^FCHI              64
                     ... 
Low^MERV              231
Adj Close^MERV        231
Open^MXX              139
High^MXX              139
Low^MXX               139
Adj Close^MXX         139
Open^N100              64
High^N100   

Unnamed: 0,Date,Open000001.SS,High000001.SS,Low000001.SS,Adj Close000001.SS,Open^AORD,High^AORD,Low^AORD,Adj Close^AORD,Open^AXJO,High^AXJO,Low^AXJO,Adj Close^AXJO,Open^BFX,High^BFX,Low^BFX,Adj Close^BFX,Open^BSESN,High^BSESN,Low^BSESN,Adj Close^BSESN,Open^BVSP,High^BVSP,Low^BVSP,Adj Close^BVSP,Open^DJI,High^DJI,Low^DJI,Adj Close^DJI,Open^FCHI,High^FCHI,Low^FCHI,Adj Close^FCHI,Open^GDAXI,High^GDAXI,Low^GDAXI,Adj Close^GDAXI,Open^GSPC,High^GSPC,Low^GSPC,Adj Close^GSPC,Open^GSPTSE,High^GSPTSE,Low^GSPTSE,Adj Close^GSPTSE,Open^HSI,High^HSI,Low^HSI,Adj Close^HSI,Open^IPSA,High^IPSA,Low^IPSA,Adj Close^IPSA,Open^IXIC,High^IXIC,Low^IXIC,Adj Close^IXIC,Open^JKSE,High^JKSE,Low^JKSE,Adj Close^JKSE,Open^KS11,High^KS11,Low^KS11,Adj Close^KS11,Open^MERV,High^MERV,Low^MERV,Adj Close^MERV,Open^MXX,High^MXX,Low^MXX,Adj Close^MXX,Open^N100,High^N100,Low^N100,Adj Close^N100,Open^NYA,High^NYA,Low^NYA,Adj Close^NYA,Open^RUT,High^RUT,Low^RUT,Adj Close^RUT,Open^TWII,High^TWII,Low^TWII,Adj Close^TWII,Open^VIX,High^VIX,Low^VIX,Adj Close^VIX,Open^XAX,High^XAX,Low^XAX,Adj Close^XAX
0,2002-04-01,1598.026001,1609.916992,1589.406982,1608.505981,,,,,,,,,,,,,3482.939941,3519.320068,3482.939941,3500.179932,13256.0,13499.0,13068.0,13467.0,10402.070313,10402.070313,10263.679688,10362.700195,,,,,,,,,1147.390015,1147.839966,1132.869995,1146.540039,7854.0,7932.100098,7850.0,7888.5,,,,,1163.630005,1164.579956,1149.910034,1151.329956,1834.589966,1865.369995,1817.25,1862.619995,482.524994,486.667999,481.006989,486.653564,900.469971,900.469971,874.900024,875.830017,,,,,7362.140137,7427.160156,7341.350098,7371.890137,,,,,6327.109863,6327.109863,6327.109863,6327.109863,506.459991,506.459991,498.399994,504.5,6118.450195,6213.319824,6118.450195,6186.416504,18.99,19.5,18.610001,18.73,910.48999,915.210022,910.27002,914.75
1,2002-04-02,1608.519043,1610.727051,1575.972046,1582.883057,3363.300049,3363.300049,3332.399902,3338.5,3414.800049,3418.0,3380.199951,3387.300049,2766.360107,2766.360107,2740.689941,2755.050049,3505.399902,3537.290039,3490.870117,3505.790039,13482.0,13535.0,13245.0,13245.0,10352.459961,10352.459961,10264.860352,10313.709961,4673.029785,4688.160156,4626.459961,4627.330078,5379.640137,5379.640137,5289.799805,5311.080078,1146.540039,1146.540039,1135.709961,1136.76001,7895.899902,7920.100098,7886.700195,7893.399902,11032.360352,11055.980469,10816.730469,10878.040039,1150.619995,1151.089966,1139.849976,1139.969971,1836.030029,1839.369995,1804.400024,1804.400024,487.178009,491.790009,485.949005,489.074493,878.900024,905.340027,871.450012,905.340027,436.220001,447.399994,432.75,435.089996,7370.149902,7387.310059,7316.689941,7316.689941,817.280029,819.659973,812.359985,812.849976,6305.330078,6305.330078,6305.330078,6305.330078,504.5,504.5,500.48999,500.48999,6248.040039,6326.129883,6230.350098,6243.436523,19.120001,19.41,19.120001,19.16,914.75,919.669983,913.880005,915.849976
2,2002-04-03,1582.744019,1603.797974,1582.671997,1594.508057,3338.899902,3343.100098,3329.0,3329.0,3387.300049,3393.300049,3377.800049,3377.800049,2755.110107,2784.169922,2748.649902,2783.360107,3500.76001,3500.76001,3458.850098,3462.98999,13280.0,13345.0,13061.0,13085.0,10311.80957,10339.860352,10139.480469,10198.290039,4618.649902,4644.209961,4582.709961,4632.259766,5293.370117,5322.549805,5266.060059,5281.839844,1136.76001,1138.849976,1119.680054,1125.400024,7869.0,7887.100098,7832.0,7842.299805,10831.230469,10853.879883,10783.0,10833.959961,1139.969971,1146.709961,1139.72998,1146.359985,1809.589966,1813.359985,1770.609985,1784.349976,489.048004,492.079987,487.93399,488.142517,899.97998,919.890015,898.280029,918.590027,435.089996,435.089996,426.390015,431.399994,7316.589844,7319.600098,7191.75,7191.939941,812.070007,813.369995,806.469971,811.590027,6245.689941,6245.689941,6245.689941,6245.689941,500.48999,501.529999,495.869995,496.600006,6220.490234,6306.439941,6207.129883,6294.63623,19.41,20.469999,19.4,20.200001,915.849976,916.109985,907.22998,907.97998
3,2002-04-04,1595.017944,1643.540039,1583.634033,1641.698975,3329.0,3329.300049,3317.0,3322.899902,3377.800049,3378.100098,3365.699951,3372.600098,2776.040039,2807.409912,2773.469971,2805.22998,3463.290039,3529.02002,3463.290039,3512.550049,13090.0,13370.0,13041.0,13360.0,10199.540039,10261.879883,10157.259766,10235.169922,4604.879883,4621.569824,4536.549805,4538.310059,5284.200195,5290.120117,5211.740234,5254.950195,1125.400024,1130.449951,1120.060059,1126.339966,7830.700195,7862.0,7773.700195,7784.5,10766.55957,10838.219727,10747.990234,10831.370117,1147.069946,1154.400024,1147.069946,1150.26001,1776.530029,1800.829956,1770.160034,1789.75,489.811005,502.5,489.811005,502.485077,912.940002,920.869995,908.820007,918.01001,432.940002,437.040009,415.040009,415.040009,7190.609863,7260.879883,7169.609863,7253.5,807.900024,809.25,796.159973,798.23999,6249.180176,6249.180176,6249.180176,6249.180176,496.600006,498.579987,494.73999,498.369995,6303.709961,6307.319824,6164.060059,6207.066406,19.9,20.09,19.459999,19.780001,907.97998,908.27002,900.97998,903.609985
4,2002-04-05,1650.504028,1657.093994,1628.546021,1634.642944,3322.899902,3345.5,3322.300049,3340.199951,3372.600098,3395.800049,3371.699951,3390.0,2800.120117,2805.51001,2766.72998,2771.179932,3516.429932,3538.48999,3499.060059,3500.570068,13362.0,13445.0,13233.0,13276.0,10235.799805,10335.299805,10217.389648,10271.639648,4536.399902,4554.759766,4506.870117,4515.700195,5250.47998,5293.279785,5235.100098,5260.529785,1126.339966,1133.310059,1119.48999,1122.72998,7808.600098,7808.600098,7774.700195,7782.100098,10766.55957,10838.219727,10747.990234,10831.370117,1150.26001,1154.52002,1148.839966,1151.800049,1796.920044,1803.209961,1769.949951,1770.030029,500.941986,509.899994,500.941986,508.978882,912.940002,920.869995,908.820007,918.01001,415.040009,421.450012,396.589996,401.059998,7254.180176,7357.009766,7254.180176,7335.759766,798.159973,798.72998,792.669983,794.330017,6246.540039,6246.540039,6246.540039,6246.540039,498.369995,501.149994,497.51001,497.76001,6303.709961,6307.319824,6164.060059,6207.066406,19.33,19.4,18.780001,19.129999,907.97998,908.059998,897.23999,898.609985


In [9]:
# 内部結合で結合する
merge_work1= pd.merge(world_indices_data,market_data, on='Date', how='inner')
merge_work2= pd.merge(merge_work1, future_data, on='Date', how='inner')
merge_Data = pd.merge(stock_price_all, merge_work2, on='Date', how='inner')
# データの確認
merge_Data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close,ema_5,macd,Open000001.SS,High000001.SS,Low000001.SS,Adj Close000001.SS,Open^AORD,High^AORD,Low^AORD,Adj Close^AORD,Open^AXJO,High^AXJO,Low^AXJO,Adj Close^AXJO,Open^BFX,High^BFX,Low^BFX,Adj Close^BFX,Open^BSESN,High^BSESN,Low^BSESN,Adj Close^BSESN,Open^BVSP,High^BVSP,Low^BVSP,Adj Close^BVSP,Open^DJI,High^DJI,Low^DJI,Adj Close^DJI,Open^FCHI,High^FCHI,Low^FCHI,Adj Close^FCHI,Open^GDAXI,High^GDAXI,Low^GDAXI,Adj Close^GDAXI,Open^GSPC,High^GSPC,Low^GSPC,Adj Close^GSPC,Open^GSPTSE,High^GSPTSE,Low^GSPTSE,Adj Close^GSPTSE,Open^HSI,High^HSI,Low^HSI,Adj Close^HSI,Open^IPSA,High^IPSA,Low^IPSA,Adj Close^IPSA,Open^IXIC,High^IXIC,Low^IXIC,Adj Close^IXIC,Open^JKSE,High^JKSE,Low^JKSE,Adj Close^JKSE,Open^KS11,High^KS11,Low^KS11,Adj Close^KS11,Open^MERV,High^MERV,Low^MERV,Adj Close^MERV,Open^MXX,High^MXX,Low^MXX,Adj Close^MXX,Open^N100,High^N100,Low^N100,Adj Close^N100,Open^NYA,High^NYA,Low^NYA,Adj Close^NYA,Open^RUT,High^RUT,Low^RUT,Adj Close^RUT,Open^TWII,High^TWII,Low^TWII,Adj Close^TWII,Open^VIX,High^VIX,Low^VIX,Adj Close^VIX,Open^XAX,High^XAX,Low^XAX,Adj Close^XAX,USD,GBP,EUR,CAD,CHF,SEK,DKK,NOK,AUD,NZD,ZAR,BHD,IDR(100),CNY,HKD,INR,PHP,SGD,KRW(100),THB,KWD,SAR,AED,MXN,TWD,Close_Gold,Open_Gold,High_Gold,Low_Gold,Volume_Gold,Close_Bronze,Open_Bronze,High_Bronze,Low_Bronze,Volume_Bronze
0,2002-04-02,11200,11250,11100,11230,37540,11230,11226.135581,52.741194,1608.519043,1610.727051,1575.972046,1582.883057,3363.300049,3363.300049,3332.399902,3338.5,3414.800049,3418.0,3380.199951,3387.300049,2766.360107,2766.360107,2740.689941,2755.050049,3505.399902,3537.290039,3490.870117,3505.790039,13482.0,13535.0,13245.0,13245.0,10352.459961,10352.459961,10264.860352,10313.709961,4673.029785,4688.160156,4626.459961,4627.330078,5379.640137,5379.640137,5289.799805,5311.080078,1146.540039,1146.540039,1135.709961,1136.76001,7895.899902,7920.100098,7886.700195,7893.399902,11032.360352,11055.980469,10816.730469,10878.040039,1150.619995,1151.089966,1139.849976,1139.969971,1836.030029,1839.369995,1804.400024,1804.400024,487.178009,491.790009,485.949005,489.074493,878.900024,905.340027,871.450012,905.340027,436.220001,447.399994,432.75,435.089996,7370.149902,7387.310059,7316.689941,7316.689941,817.280029,819.659973,812.359985,812.849976,6305.330078,6305.330078,6305.330078,6305.330078,504.5,504.5,500.48999,500.48999,6248.040039,6326.129883,6230.350098,6243.436523,19.120001,19.41,19.120001,19.16,914.75,919.669983,913.880005,915.849976,133.2,191.78,117.18,83.38,80.15,13.0,15.77,15.24,71.02,58.86,11.78,353.78,1.38,16.09,17.08,2.73,2.62,72.18,10.12,3.06,435.01,35.53,36.27,14.8,3.82,305.9,303.0,306.8,302.5,0.44,0.761,0.765,0.767,0.762,0.14
1,2002-04-03,11130,11530,11060,11440,136770,11440,11297.423721,103.521195,1582.744019,1603.797974,1582.671997,1594.508057,3338.899902,3343.100098,3329.0,3329.0,3387.300049,3393.300049,3377.800049,3377.800049,2755.110107,2784.169922,2748.649902,2783.360107,3500.76001,3500.76001,3458.850098,3462.98999,13280.0,13345.0,13061.0,13085.0,10311.80957,10339.860352,10139.480469,10198.290039,4618.649902,4644.209961,4582.709961,4632.259766,5293.370117,5322.549805,5266.060059,5281.839844,1136.76001,1138.849976,1119.680054,1125.400024,7869.0,7887.100098,7832.0,7842.299805,10831.230469,10853.879883,10783.0,10833.959961,1139.969971,1146.709961,1139.72998,1146.359985,1809.589966,1813.359985,1770.609985,1784.349976,489.048004,492.079987,487.93399,488.142517,899.97998,919.890015,898.280029,918.590027,435.089996,435.089996,426.390015,431.399994,7316.589844,7319.600098,7191.75,7191.939941,812.070007,813.369995,806.469971,811.590027,6245.689941,6245.689941,6245.689941,6245.689941,500.48999,501.529999,495.869995,496.600006,6220.490234,6306.439941,6207.129883,6294.63623,19.41,20.469999,19.4,20.200001,915.849976,916.109985,907.22998,907.97998,133.2,191.26,116.96,83.65,80.02,12.95,15.75,15.23,71.14,58.81,11.89,353.78,1.37,16.09,17.08,2.74,2.62,72.12,10.03,3.04,436.58,35.53,36.27,14.77,3.82,302.6,306.0,306.0,302.0,0.28,0.74,0.75,0.75,0.74,0.19
2,2002-04-04,11430,11580,11380,11450,165330,11450,11348.28248,134.680137,1595.017944,1643.540039,1583.634033,1641.698975,3329.0,3329.300049,3317.0,3322.899902,3377.800049,3378.100098,3365.699951,3372.600098,2776.040039,2807.409912,2773.469971,2805.22998,3463.290039,3529.02002,3463.290039,3512.550049,13090.0,13370.0,13041.0,13360.0,10199.540039,10261.879883,10157.259766,10235.169922,4604.879883,4621.569824,4536.549805,4538.310059,5284.200195,5290.120117,5211.740234,5254.950195,1125.400024,1130.449951,1120.060059,1126.339966,7830.700195,7862.0,7773.700195,7784.5,10766.55957,10838.219727,10747.990234,10831.370117,1147.069946,1154.400024,1147.069946,1150.26001,1776.530029,1800.829956,1770.160034,1789.75,489.811005,502.5,489.811005,502.485077,912.940002,920.869995,908.820007,918.01001,432.940002,437.040009,415.040009,415.040009,7190.609863,7260.879883,7169.609863,7253.5,807.900024,809.25,796.159973,798.23999,6249.180176,6249.180176,6249.180176,6249.180176,496.600006,498.579987,494.73999,498.369995,6303.709961,6307.319824,6164.060059,6207.066406,19.9,20.09,19.459999,19.780001,907.97998,908.27002,900.97998,903.609985,133.1,191.13,117.15,83.72,80.18,12.93,15.77,15.33,70.81,58.52,12.02,353.52,1.37,16.08,17.06,2.73,2.61,72.26,10.02,3.05,435.11,35.5,36.24,14.75,3.82,301.1,302.6,302.6,299.0,0.25,0.731,0.733,0.733,0.731,0.23
3,2002-04-05,11430,11450,11350,11410,112790,11410,11368.854987,140.145122,1650.504028,1657.093994,1628.546021,1634.642944,3322.899902,3345.5,3322.300049,3340.199951,3372.600098,3395.800049,3371.699951,3390.0,2800.120117,2805.51001,2766.72998,2771.179932,3516.429932,3538.48999,3499.060059,3500.570068,13362.0,13445.0,13233.0,13276.0,10235.799805,10335.299805,10217.389648,10271.639648,4536.399902,4554.759766,4506.870117,4515.700195,5250.47998,5293.279785,5235.100098,5260.529785,1126.339966,1133.310059,1119.48999,1122.72998,7808.600098,7808.600098,7774.700195,7782.100098,10766.55957,10838.219727,10747.990234,10831.370117,1150.26001,1154.52002,1148.839966,1151.800049,1796.920044,1803.209961,1769.949951,1770.030029,500.941986,509.899994,500.941986,508.978882,912.940002,920.869995,908.820007,918.01001,415.040009,421.450012,396.589996,401.059998,7254.180176,7357.009766,7254.180176,7335.759766,798.159973,798.72998,792.669983,794.330017,6246.540039,6246.540039,6246.540039,6246.540039,498.369995,501.149994,497.51001,497.76001,6303.709961,6307.319824,6164.060059,6207.066406,19.33,19.4,18.780001,19.129999,907.97998,908.059998,897.23999,898.609985,132.3,189.74,116.32,82.96,79.47,12.87,15.65,15.22,70.33,58.36,11.83,351.39,1.37,15.98,16.96,2.71,2.6,71.85,10.03,3.04,432.21,35.29,36.03,14.66,3.8,300.1,300.7,302.5,298.7,0.09,0.721,0.733,0.733,0.718,0.24
4,2002-04-08,11380,11470,11310,11380,63710,11380,11372.569991,132.222417,1633.831055,1643.676025,1631.067017,1640.650024,3340.199951,3352.199951,3333.199951,3340.300049,3390.0,3402.300049,3381.800049,3389.0,2776.629883,2776.949951,2744.300049,2752.840088,3502.090088,3521.409912,3478.929932,3480.169922,13248.0,13248.0,13114.0,13160.0,10258.910156,10258.910156,10120.870117,10249.080078,4504.180176,4511.870117,4415.75,4431.879883,5261.370117,5261.370117,5128.02002,5180.330078,1122.72998,1125.410034,1111.790039,1125.290039,7736.600098,7763.399902,7719.5,7747.600098,10833.160156,10856.639648,10702.44043,10723.679688,1151.680054,1152.040039,1144.579956,1146.359985,1741.099976,1786.400024,1733.839966,1785.869995,509.300995,518.093994,509.139008,515.064697,920.830017,926.22998,900.23999,900.690002,398.809998,399.559998,383.779999,385.290009,7325.069824,7325.069824,7270.580078,7271.77002,792.330017,792.429993,778.51001,781.780029,6255.839844,6255.839844,6255.839844,6255.839844,497.76001,503.019989,493.76001,503.01001,6204.569824,6246.319824,6176.790039,6190.806641,20.35,20.459999,19.57,19.610001,898.609985,904.48999,898.580017,904.450012,131.55,188.54,115.69,82.8,79.0,12.82,15.57,15.13,69.76,57.43,11.92,349.4,1.38,15.89,16.87,2.7,2.58,71.76,9.9,3.03,429.62,35.09,35.81,14.62,3.77,300.5,299.0,301.5,299.7,0.1,0.721,0.724,0.724,0.714,0.23


In [10]:
# 目的変数の作成
merge_Data['diff'] = merge_Data['Close'] - merge_Data['Open']
answersData = pd.DataFrame({'Date' : merge_Data['Date']})
answersData['answer'] =merge_Data['diff'].apply( lambda x: 0 if x < 0 else 1 )

# 不要な列を削除
merge_Data.drop(columns=['Date','Close','diff'], inplace=True)

# 各列を変化率へ一括変換
merge_Data_pct = merge_Data.pct_change()

# 作製した特徴量と目的変数の付与
make_data = pd.merge(creating_feature, answersData, on='Date', how='inner')
make_data.drop(columns=['Date'], inplace=True)
stock_Price_Prediction = pd.concat([merge_Data_pct, make_data], axis=1)

# 正解ラベルをずらし、説明変数から見て未来の値とする
stock_Price_Prediction.answer = stock_Price_Prediction.answer.shift(-1)


In [11]:
# 欠損値のチェック
print(stock_Price_Prediction.isnull().sum())

# NaN,INF,-INFは削除する
stock_Price_Prediction = stock_Price_Prediction.replace([np.inf, -np.inf], np.nan)
stock_Price_Prediction = stock_Price_Prediction.dropna()

# CSVへ変換
stock_Price_Prediction.to_csv("stock_Price_Prediction_v4.1.csv",index = False, encoding="shift-jis")

Open                    1
High                    1
Low                     1
Volume                  1
Adj Close               1
ema_5                   1
macd                    1
Open000001.SS           1
High000001.SS           1
Low000001.SS            1
Adj Close000001.SS      1
Open^AORD               1
High^AORD               1
Low^AORD                1
Adj Close^AORD          1
Open^AXJO               1
High^AXJO               1
Low^AXJO                1
Adj Close^AXJO          1
Open^BFX                1
High^BFX                1
Low^BFX                 1
Adj Close^BFX           1
Open^BSESN              1
High^BSESN              1
Low^BSESN               1
Adj Close^BSESN         1
Open^BVSP               1
High^BVSP               1
Low^BVSP                1
                     ... 
NOK                     1
AUD                     1
NZD                     1
ZAR                     1
BHD                     1
IDR(100)                1
CNY                     1
HKD         