In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import datetime

import plotly.graph_objects as go
import numpy as np
import glob

from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from keras.layers import Dense, LSTM, Dropout, Flatten
from keras.models import Model
from keras.models import Sequential

from fbprophet import Prophet

from sklearn.ensemble import RandomForestRegressor
import seaborn as sns

In [None]:
# 　https://kabuoji3.comから株データを取得
def get_dfs(stock_number):
    dfs = []
#     取得データの西暦
    year = [2015,2016,2017,2018,2019,2020,2021] 
    for y in year:
        url = 'https://kabuoji3.com/stock/{}/{}/'.format(stock_number,y)
        headers = {
             "User-Agent":  ""
        }
        soup = BeautifulSoup(requests.get(url, headers = headers).content,'html.parser')
        tag_tr = soup.find_all('tr')
        head = [h.text for h in tag_tr[0].find_all('th')]
            
        data = []
        for i in range(1,len(tag_tr)):
            data.append([d.text for d in tag_tr[i].find_all('td')])
        df = pd.DataFrame(data, columns = head)
        
        col = ['始値','高値','安値','終値','出来高','終値調整']
        for c in col:
            df[c] = df[c].astype(float)
        df['日付'] = [datetime.strptime(i,'%Y-%m-%d') for i in df['日付']]
        dfs.append(df)
    return dfs

def concatenate(dfs):
    data = pd.concat(dfs,axis=0)
    data = data.reset_index(drop=True)
    col = ['始値','高値','安値','終値','出来高','終値調整']
    for c in col:
        data[c] = data[c].astype(float)
    return data

# 
def create_chart(csv):
    print(csv)
    df = pd.read_csv(csv)
    df = df.drop(['出来高','終値調整'], axis = 1)
    df.head()
    df.columns = ['date', 'open', 'high', 'low', 'close']

    x = np.arange(len(df['date']))

    interval = 20
    vals = [df.index[i*interval] for i in range(len(df)//interval+1)]
    labels = [df.loc[i*interval,'date'] for i in range(len(df)//interval +1)]

    fig = go.Figure(
            data=go.Candlestick(
                    x = x,
                    open=df['open'],
                    high=df['high'],
                    low=df['low'],
                    close=df['close'],
                    hovertext= ['date:{}<br>open:{}<br>high:{}<br>low:{}<br>close:{}'
                                       .format(df.loc[i,'date'],df.loc[i,'open'],df.loc[i,'high'],df.loc[i,'low'],df.loc[i,'close']) for i in range(len(df))] ,
                    hoverinfo="text"),
            layout = go.Layout(
                    xaxis = dict(
                        ticktext = labels,
                        tickvals = vals,
                        tickangle=-45
                    ),
            )
    )
    fig.show()
    
# 全結合モデル作成
def create_model_feed_forward():
    model_1 = Sequential()
    model_1.add(Dense(5, activation='relu', input_shape=(20,)))
    model_1.add(Dropout(0.5))
    model_1.add(Dense(1, activation='linear'))
    model_1.summary()
    model_1.compile(optimizer='adam',
               loss='mse',
               metrics=['mae'])
    return model_1

# LSTMモデル作成
def create_model_lstm():
    model_2 =Sequential()
    model_2.add(LSTM(10,
                 dropout=0.2,
                 recurrent_dropout=0.2,
                 input_shape=(20,1)))
    model_2.add(Dense(5, activation='relu'))
    model_2.add(Dropout(0.5))
    model_2.add(Dense(1, activation='linear'))
    model_2.summary()
    model_2.compile(optimizer='adam',
               loss='mse',
               metrics=['mae'])
    return model_2
    
def getInputLabel(data, period=20):
    period = period
    input_tensor = []
    label_tensor = []
    for i in range(0, len(data) - period, 1):
        input_tensor.append(data.values[i:i + period,0])
        label_tensor.append(data.values[i + period,0])
    input_tensor = np.array(input_tensor)
    label_tensor = np.array(label_tensor)
    return input_tensor, label_tensor

# prophetモデルの実行
def prophet(csv):
    df_prophet = pd.read_csv(csv)
    model = Prophet()
    model.fit(df_prophet.rename(columns={"日付" : "ds", "終値調整" : "y"}))

    future_data = model.make_future_dataframe(periods=365, freq = 'd')
    future_data = future_data[future_data['ds'].dt.weekday < 5]

    forecast_data = model.predict(future_data)
    print(csv)
    model.plot(forecast_data).savefig("prophet/image/" + csv.replace('.csv', '').replace('kabu_csv/', ''))
#     model.plot_components(forecast_data).savefig("prophet/image/" + csv.replace('.csv', '').replace('kabu_csv/', ''))
    forecast_data.to_csv("prophet/csv/" + csv.replace('kabu_csv/', ''))
#     model.plot_components(forecast_data)

# ランダムフォレスト
def random_forest():
    return RandomForestRegressor(n_estimators=1000)

In [None]:
#作成したコードリストを読み込む
code_list = pd.read_csv('code_list.csv')

#複数のデータフレームをcsvで保存
for i in range(len(code_list)):
    k = code_list.loc[i,'code']
    v = code_list.loc[i,'name']
    print(k,v)
    dfs = get_dfs(k)
    data = concatenate(dfs) 
    data.to_csv('kabu_csv/{}-{}.csv'.format(k,v), index=False)

In [None]:
csv_list = glob.glob('kabu_csv/*.csv')
for csv in csv_list:
    create_chart(csv)

In [None]:
for csv in csv_list:
    prophet(csv)

In [None]:
for csv in csv_list:
    data = pd.read_csv(csv)
    pre_df = pd.DataFrame(data["日付"], columns=["日付"])

    pre_df["平均値（高安）"] = (data["高値"].values+ data["安値"].values) /2
    pre_df["平均値（始終）"] = (data["始値"].values+ data["終値"].values) /2

    pre_df["前日比（高安）"] = pre_df["平均値（高安）"].diff()
    pre_df["前日比（始終）"] = pre_df["平均値（始終）"].diff()

    pre_df["前日比（高安）"] = pre_df["前日比（高安）"].fillna(pre_df["前日比（高安）"].mean())
    pre_df["前日比（始終）"] = pre_df["前日比（始終）"].fillna(pre_df["前日比（始終）"].mean())

    tmp = pd.merge(data, pre_df).drop('日付', axis=1)
    tmp = tmp- tmp.mean()
    tmp = tmp/tmp.std()
    
    input_tensor, label_tensor = getInputLabel(data = tmp)

    X_train, X_test, y_train, y_test = train_test_split(input_tensor, label_tensor, test_size=0.2,random_state=100, shuffle = False)
    earlystopping = EarlyStopping(monitor='loss', patience=5)
    
    model_1 = create_model_feed_forward()
    model_1.fit(X_train, y_train, batch_size=10, epochs=50, callbacks=[earlystopping])
    score = model_1.evaluate(X_train, y_train,verbose=1)

    print("Test loss:", score[0])
    print("Test accuracy:", score[1])
    print('\n')
    print("全結合: " + csv)
    print(model_1.evaluate(X_test, y_test))
    print('\n')
    
    plt.figure()
    predicted = model_1.predict(X_test)
    result = pd.DataFrame(predicted)
    result.columns = ['predict']
    result['actual'] = y_test
    result.plot()
    plt.savefig("feed_forward/image/" + csv.replace('.csv', '').replace('kabu_csv/', ''))
    plt.show()
   
    
    print('\n')
    
    model_2 = create_model_lstm()

    model_2.fit(X_train[:,:,np.newaxis], y_train, batch_size=10, epochs=50, callbacks=[earlystopping])
    
#     # 予測値算出
#     predicted = model_2.predict(X_test[:,:,np.newaxis])
#     result = pd.DataFrame(predicted)
#     result.columns = ['predict']
#     result['actual'] = y_test
#     result.plot().savefig("lstm/image/" + csv.replace('.csv', '').replace('kabu_csv/', ''))
#     plt.show()
    
    # 予測結果を保存する行列
    future_pred = input_tensor[:,0].copy()

    # 予測期間は観測値の終端から3年間を設定
    pred_time_length = 12*3

    for tmp in range(pred_time_length):
        # 観測結果の最後尾から予測に使うデータをピックアップ
        X_future_pred = future_pred[-1*20:]
        # 予測
        y_future_pred = model_2.predict( X_future_pred.reshape(1,20,1) )
        # 予測値をfuture_predの最後尾に追加
        future_pred = np.append(future_pred, y_future_pred)
        #print(y_future_pred ,  future_pred[-5:])

    # プロット
    fig2 = plt.figure()
    plt.plot(input_tensor[:,0], color='blue',  label="observed")  # 実測値
    plt.plot(range(len(input_tensor),len(input_tensor)+pred_time_length), future_pred[-1*pred_time_length:],  color='red',  label="feature pred")   # 予測値
    plt.legend()
    plt.grid()
    plt.show()
    fig2.savefig("lstm/image/" + csv.replace('.csv', '').replace('kabu_csv/', ''))

    score = model_2.evaluate(X_train[:,:,np.newaxis], y_train, verbose=1)
    print("Test loss:",score[0])
    print("Test accuracy:",score[1])

    print('\n')
    print("lstm: " + csv)
    print(model_2.evaluate(X_test[:,:,np.newaxis], y_test))
    print('\n')

In [None]:
model_3 = random_forest()
df_ran_result = pd.DataFrame()
for csv in csv_list:
    df_ran = pd.read_csv(csv)
    df_ran_result[csv.replace('.csv', '').replace('kabu_csv/', '')] = df_ran["終値調整"].diff().drop(0)
df_ran_result = df_ran_result.fillna(df_ran_result.mean())

In [None]:
x=df_ran_result[df_ran_result.columns.values]
y=df_ran_result[""]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.27)

In [None]:
from sklearn import metrics
model_3.fit(x_train,y_train)
y_pred=model_3.predict(x_test)

In [None]:
feature_imp = pd.Series(model_3.feature_importances_,index=[df_ran_result.columns.values]).sort_values(ascending=False)
print(feature_imp)

In [None]:
# valueX=
# pred=model_3.predict(valueX)