<a href="https://colab.research.google.com/github/sira4154/AI4BA/blob/main/Test_model_stock_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [137]:
!pip install yfinance



In [138]:
!pip install bs4



In [139]:
!pip install pmdarima



# Import library

In [140]:
from datetime import date
import yfinance as yf
from fbprophet import Prophet
from fbprophet.plot import plot_plotly
from plotly import graph_objs as go
from bs4 import BeautifulSoup 
import requests

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from pmdarima import auto_arima

#Scraping set-100 name

In [141]:
def thai_stock():
    url = "https://www.settrade.com/C13_MarketSummary.jsp?detail=SET100"
    data = requests.get(url).text
    soup = BeautifulSoup(data, "html.parser")
    selector = 'a.link-stt'
    symbols = soup.select(selector)
    set100 = list(map(lambda s: s.text, symbols[9:109]))
    set100BK = [i +".BK" for i in set100]
    return set100BK

In [142]:
thai_sc = thai_stock()

# Name list SET-100

In [143]:
print(thai_sc)

['ACE.BK', 'ADVANC.BK', 'AEONTS.BK', 'AMATA.BK', 'AOT.BK', 'AP.BK', 'AWC.BK', 'BAM.BK', 'BANPU.BK', 'BBL.BK', 'BCH.BK', 'BCP.BK', 'BCPG.BK', 'BDMS.BK', 'BEC.BK', 'BEM.BK', 'BGRIM.BK', 'BH.BK', 'BLA.BK', 'BPP.BK', 'BTS.BK', 'CBG.BK', 'CENTEL.BK', 'CHG.BK', 'CK.BK', 'CKP.BK', 'COM7.BK', 'CPALL.BK', 'CPF.BK', 'CPN.BK', 'CRC.BK', 'DOHOME.BK', 'DTAC.BK', 'EA.BK', 'EGCO.BK', 'EPG.BK', 'ESSO.BK', 'GLOBAL.BK', 'GPSC.BK', 'GULF.BK', 'GUNKUL.BK', 'HANA.BK', 'HMPRO.BK', 'INTUCH.BK', 'IRPC.BK', 'IVL.BK', 'JMART.BK', 'JMT.BK', 'KBANK.BK', 'KCE.BK', 'KEX.BK', 'KKP.BK', 'KTB.BK', 'KTC.BK', 'LH.BK', 'MAJOR.BK', 'MEGA.BK', 'MINT.BK', 'MTC.BK', 'OR.BK', 'ORI.BK', 'OSP.BK', 'PLANB.BK', 'PTG.BK', 'PTT.BK', 'PTTEP.BK', 'PTTGC.BK', 'QH.BK', 'RATCH.BK', 'RBF.BK', 'RCL.BK', 'RS.BK', 'SAWAD.BK', 'SCBB.BK', 'SCC.BK', 'SCGP.BK', 'SINGER.BK', 'SIRI.BK', 'SPALI.BK', 'SPRC.BK', 'STA.BK', 'STARK.BK', 'STEC.BK', 'STGT.BK', 'SUPER.BK', 'SYNEX.BK', 'TASCO.BK', 'TCAP.BK', 'THANI.BK', 'TIDLOR.BK', 'TISCO.BK', 'TOP.BK', '

ทำการ copy เลือกหุ้นจาก list ด้านบนหนึ่งตัวใส่ลงใน selected_stock โดยหุ้นแต่ละตัวจะลงท้ายด้วย .BK เพื่อให้ดึงใน yfinance 

In [144]:
START = "2015-01-01" #เก็บ data ตั้งแต่วันที่
TODAY = date.today().strftime("%Y-%m-%d") #วันนี้
n_years = 1
period = n_years * 30
# selected_stock = "INTUCH.BK" #เลือกหุ้นจากด้านบนใส่ลงในนี้ 1 ตัว
time_step = 60
sc = MinMaxScaler(feature_range = (0, 1))
list_score = []

# Prophet model 

In [145]:
def load_data(ticker):
    data = yf.download(ticker, START, TODAY)
    data.reset_index(inplace=True)
    return data

def create_data_prophet(data):
  data1 = data[['Date', 'Close']]
  data1 = data1.rename(columns={"Date": "ds", "Close": "y"})
  train_size = int(len(data1)*0.85)
  test_size = int(len(data1) - train_size)
  train_set = data1[:train_size].copy()
  test_set = data1[train_size:].copy().reset_index(drop = True)
  return train_size,test_size,train_set,test_set

def prophet_model(train_set, test_set):
  m = Prophet()
  m.fit(train_set)
  future = m.make_future_dataframe(periods= len(test_set))
  forecast = m.predict(future)
  return forecast

def evaluate_prophet(test_set, forecast, scores_model, train_size,selected_stock):
  y_test = test_set["y"]
  y_pred = forecast[train_size:]["yhat"].reset_index(drop = True)

  mae = mean_absolute_error(y_test, y_pred) 
  rmse = mean_squared_error(y_test, y_pred, squared = False)
  r2 = r2_score(y_test, y_pred)
  scores_model.append({'Model': 'Prophet','MAE': mae,'RMSE': rmse,
                    'R2_score': r2, "Stock": selected_stock})
  return scores_model

# Prepare Data for (RNN,LSTM,Conv1D)

In [146]:
def prepare_data_train(train_set, time_step, sc, train_size):
  x_train = []
  y_train = []
  train_set1 = train_set.reset_index()["y"]
  train_set1 = np.array(train_set1).reshape(-1,1)
  training_set_scaled = sc.fit_transform(train_set1)
  for i in range(time_step, train_size):
     x_train.append(training_set_scaled[i-time_step:i, 0])
     y_train.append(training_set_scaled[i, 0])
  x_train, y_train = np.array(x_train), np.array(y_train)
  x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
  return x_train,y_train

def prepare_data_test(data, time_step,train_size,sc):
  x_test = []
  inputs_test = data[train_size  - time_step:]["Close"].values
  inputs_test = inputs_test.reshape(-1,1)
  inputs_test = sc.transform(inputs_test)
  for i in range(time_step, len(inputs_test)):
     x_test.append(inputs_test[i-time_step:i, 0])
  x_test = np.array(x_test)
  x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
  y_test = data[train_size:]["Close"].values
  return x_test, y_test

def evaluate(train_size, y_test, y_pred, name,data,scores_model,selected_stock):
  y_test = data[train_size:]["Close"].values
  mae = mean_absolute_error(y_test, y_pred) 
  rmse = mean_squared_error(y_test, y_pred, squared = False)
  r2 = r2_score(y_test, y_pred)
  scores_model.append({'Model': name,'MAE': mae,'RMSE': rmse,
                      'R2_score': r2, "Stock": selected_stock})
  return scores_model

# LSTM

In [147]:
def LSTM_model(time_step, x_train, y_train, x_test):
    tf.keras.backend.clear_session()
    tf.random.set_seed(1)
    name = "LSTM"
    modellstm = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True,input_shape=(time_step,1)),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1),
  ])

    modellstm.compile(loss='mean_squared_error', optimizer='adam')
    modellstm.fit(x_train,y_train,epochs=20,batch_size=64,verbose=0)
    y_pred = modellstm.predict(x_test)
    y_pred_lstm = sc.inverse_transform(y_pred)
    return y_pred_lstm, name

# RNN

In [148]:
def RNN_model(time_step, x_train, y_train, x_test):
  tf.keras.backend.clear_session()
  tf.random.set_seed(1)
  name = "RNN"
  modelRNN = tf.keras.models.Sequential([
  tf.keras.layers.SimpleRNN(64, return_sequences=True,input_shape=(time_step,1)),
  tf.keras.layers.SimpleRNN(32,),
  tf.keras.layers.Dense(1),
  ])

  modelRNN.compile(loss='mean_squared_error', optimizer='adam')
  modelRNN.fit(x_train,y_train,epochs=20,batch_size=64,verbose=0)
  y_pred = modelRNN.predict(x_test)
  y_pred_rnn = sc.inverse_transform(y_pred)
  return y_pred_rnn, name

# CNN

In [149]:
def CNN_model(time_step, x_train, y_train, x_test):
  tf.keras.backend.clear_session()
  tf.random.set_seed(1)
  name = "CNN"
  modelCNN = tf.keras.models.Sequential([
  tf.keras.layers.Conv1D(64, 3, activation = "relu",input_shape=(time_step,1)),
  tf.keras.layers.MaxPooling1D(2),
  tf.keras.layers.Conv1D(32, 3, activation = "relu"),
  tf.keras.layers.GlobalMaxPool1D(),
  tf.keras.layers.Dense(1),
  ])

  modelCNN.compile(loss='mean_squared_error', optimizer='adam')
  modelCNN.fit(x_train,y_train,epochs=20,batch_size=64,verbose=0)
  y_pred = modelCNN.predict(x_test)
  y_pred_cnn = sc.inverse_transform(y_pred)
  return y_pred_cnn, name

# Auto arima

In [150]:
def auto_arima_model(train_set, test_set):
  name = "Auto-Arima"
  training = train_set ['y']
  validation = test_set ['y']
  model = auto_arima(training, start_p=1, start_q=1,max_p=3, max_q=3, m=12,start_P=0,d=1, D=1, trace=False,error_action='ignore',suppress_warnings=True)
  model.fit(training)
  forecast = model.predict(n_periods=len(validation))
  return forecast,name

# Combine model

In [151]:
data.shape[0]

1

In [152]:
def predict_stock(*arg):    
    stock = ["AOT.BK", "COM7.BK", "MINT.BK", "PTT.BK", "INTUCH.BK"]
    scores_model = []
    for selected_stock in stock :
      data = load_data(selected_stock)
      if data.shape[0] < 1000 :
        pass 
      else :
        train_size,test_size,train_set,test_set = create_data_prophet(data)
        forecast = prophet_model(train_set, test_set)
        score_Prophet = evaluate_prophet(test_set, forecast, scores_model, train_size,selected_stock)
        x_train,y_train = prepare_data_train(train_set, time_step, sc, train_size)
        x_test, y_test = prepare_data_test(data, time_step,train_size,sc)
        y_pred_lstm, name_lstm = LSTM_model(time_step, x_train, y_train, x_test)
        score_LSTM = evaluate(train_size, y_test, y_pred_lstm, name_lstm, data,scores_model,selected_stock)
        y_pred_rnn, name_rnn = RNN_model(time_step, x_train, y_train, x_test)
        score_RNN = evaluate(train_size, y_test, y_pred_rnn, name_rnn, data,scores_model,selected_stock)
        y_pred_cnn, name_cnn = CNN_model(time_step, x_train, y_train, x_test)
        score_CNN = evaluate(train_size, y_test, y_pred_cnn, name_cnn, data,scores_model,selected_stock)
        y_pred_arima,name_arima = auto_arima_model(train_set, test_set)
        score_arima = evaluate(train_size, y_test, y_pred_arima, name_arima, data,scores_model,selected_stock)
    return scores_model

In [153]:
scores_model = predict_stock()

[*********************100%***********************]  1 of 1 completed

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.



[*********************100%***********************]  1 of 1 completed

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.



[*********************100%***********************]  1 of 1 completed

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.



[*********************100%***********************]  1 of 1 completed

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.



[*********************100%***********************]  1 of 1 completed

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





In [154]:
scores_model

[{'MAE': 13.335206720068943,
  'Model': 'Prophet',
  'R2_score': -20.337286525545508,
  'RMSE': 13.796891768110617,
  'Stock': 'AOT.BK'},
 {'MAE': 1.2518534142426336,
  'Model': 'LSTM',
  'R2_score': 0.6903157106360627,
  'RMSE': 1.6621539640657124,
  'Stock': 'AOT.BK'},
 {'MAE': 0.8975284590703271,
  'Model': 'RNN',
  'R2_score': 0.8345520402165141,
  'RMSE': 1.2149051439118428,
  'Stock': 'AOT.BK'},
 {'MAE': 2.7237677842043757,
  'Model': 'CNN',
  'R2_score': -0.11861444364276941,
  'RMSE': 3.1590163537446783,
  'Stock': 'AOT.BK'},
 {'MAE': 43.68639684148367,
  'Model': 'Auto-Arima',
  'R2_score': -270.5311542612873,
  'RMSE': 49.21772051005327,
  'Stock': 'AOT.BK'},
 {'MAE': 13.168175446830372,
  'Model': 'Prophet',
  'R2_score': -0.7969510927302528,
  'RMSE': 15.694036268128523,
  'Stock': 'COM7.BK'},
 {'MAE': 4.233877937161193,
  'Model': 'LSTM',
  'R2_score': 0.6748562626043688,
  'RMSE': 6.675813878504323,
  'Stock': 'COM7.BK'},
 {'MAE': 3.2904659816196986,
  'Model': 'RNN',
  '

In [155]:
list_score.append(scores_model)

In [156]:
results = pd.DataFrame(scores_model).sort_values(by='RMSE',ascending=True)
results

Unnamed: 0,Model,MAE,RMSE,R2_score,Stock
17,RNN,0.50991,0.636748,0.827218,PTT.BK
12,RNN,0.555483,0.724222,0.80959,MINT.BK
16,LSTM,0.604638,0.767899,0.748712,PTT.BK
11,LSTM,0.808069,1.014399,0.626436,MINT.BK
2,RNN,0.897528,1.214905,0.834552,AOT.BK
1,LSTM,1.251853,1.662154,0.690316,AOT.BK
13,CNN,1.434275,1.667416,-0.009336,MINT.BK
22,RNN,1.330758,1.827842,0.944388,INTUCH.BK
18,CNN,1.558476,1.882547,-0.510274,PTT.BK
21,LSTM,1.479403,2.200794,0.919379,INTUCH.BK


In [197]:
df1 = results.groupby(by = "Model").mean().sort_values(by='RMSE',ascending=True)
df1

Unnamed: 0_level_0,MAE,RMSE,R2_score
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RNN,0.91041,1.448656,0.965068
LSTM,1.457465,2.176693,0.921135
CNN,3.830394,4.876126,0.604233
Prophet,20.185541,21.461776,-6.666917
Auto ARIMA,24.082796,26.65689,-10.827921
