In [1]:
import pandas as pd
import os
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler

In [2]:
DIR = os.path.join('./data/train/AAPL.csv')

df = pd.read_csv(DIR)

In [3]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2012-01-03,14.621429,14.732143,14.607143,14.686786,12.466097,302220800
1,2012-01-04,14.642857,14.81,14.617143,14.765714,12.533086,260022000
2,2012-01-05,14.819643,14.948214,14.738214,14.929643,12.67223,271269600
3,2012-01-06,14.991786,15.098214,14.972143,15.085714,12.804701,318292800
4,2012-01-09,15.196429,15.276786,15.048214,15.061786,12.784396,394024400


In [4]:
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
                                 open=df['Open'],
                                 high=df['High'],
                                 low=df['Low'],
                                 close=df['Close'])])
fig.update_layout(title='AAPL Stock Price',
                  xaxis_title='Date',
                  yaxis_title='Price (USD)',
                  xaxis_rangeslider_visible=False,
                  height=600,
                  width=800)
fig.show()

In [5]:
fig = px.line(df, x=df.Date, y=['Close', 'Open', 'High', 'Low'], title='AAPL Stock Price')
fig.update_layout(xaxis_title='Time', yaxis_title='Price (USD)', height=600, width=1200)
fig.show()

In [6]:
df['Date'].tail()

2256    2020-12-18
2257    2020-12-21
2258    2020-12-22
2259    2020-12-23
2260    2020-12-24
Name: Date, dtype: object

In [7]:
df = df[['Close']]
df.shape

(2261, 1)

In [8]:
scaler = MinMaxScaler(feature_range=(0, 1))

df_scaled = scaler.fit_transform(df)
df_scaled.shape

(2261, 1)

In [3]:
import yfinance as yf
import pandas as pd
import os

In [4]:
tickers = ["AAPL", 'AMZN', 'BIDU', 'GOOG', 'INTC', 'MSFT', 'NFLX', 'NVDA', 'TCEHY', 'TSLA']            
start  = "2025-01-01"    
end    = "2025-06-30"     

for ticker in tickers:
    df = yf.download(ticker, start=start, end=end)
    df.to_csv(os.path.join('./data/test', f'{ticker}.csv'))
    print(f"Downloaded {ticker} data from {start} to {end}")


  df = yf.download(ticker, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded AAPL data from 2025-01-01 to 2025-06-30
Downloaded AMZN data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded BIDU data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded GOOG data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded INTC data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded MSFT data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded NFLX data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)


Downloaded NVDA data from 2025-01-01 to 2025-06-30


[*********************100%***********************]  1 of 1 completed
  df = yf.download(ticker, start=start, end=end)
[*********************100%***********************]  1 of 1 completed

Downloaded TCEHY data from 2025-01-01 to 2025-06-30
Downloaded TSLA data from 2025-01-01 to 2025-06-30





In [5]:
tickers = ['AMZN', 'BIDU', 'GOOG', 'INTC', 'MSFT', 'NFLX', 'NVDA', 'TCEHY', 'TSLA']   
df = pd.read_csv(os.path.join('./data/test', f'AAPL.csv'))
df = df[['Close']]
for ticker in tickers:
    tmp = pd.read_csv(os.path.join('./data/test', f'{ticker}.csv'))
    tmp = tmp[['Close']]
    df = pd.concat([df, tmp], axis=0)


In [6]:
df.to_csv(os.path.join('./data/test', 'test.csv'), index=False)

In [21]:
tickers = ['AAPL', 'AMZN', 'BIDU', 'GOOG', 'INTC', 'MSFT', 'NFLX', 'NVDA', 'TCEHY', 'TSLA']   
df = pd.read_csv(os.path.join('./data/val', f'AAPL.csv'))
df = df[['Close']]
for ticker in tickers:
    tmp = pd.read_csv(os.path.join('./data/val', f'{ticker}.csv'))
    tmp = tmp[['Close']]
    df = pd.concat([df, tmp], axis=0)

df.to_csv(os.path.join('./data/val', 'val.csv'), index=False)

In [23]:
tickers = ['AAPL', 'AMZN', 'BIDU', 'GOOG', 'INTC', 'MSFT', 'NFLX', 'NVDA', 'TCEHY', 'TSLA']  
for ticker in tickers:
    df = pd.read_csv(os.path.join('./data/train', f'{ticker}.csv'))
    df3 = pd.read_csv(os.path.join('./data/test', f'{ticker}.csv'))
    df2 = pd.read_csv(os.path.join('./data/val', f'{ticker}.csv'))
    df = pd.concat([df, df2, df3], axis=0)
    df.to_csv(os.path.join('./data', f'{ticker}.csv'), index=False)
    print(f"Saved {ticker} data to ./data/{ticker}.csv")

Saved AAPL data to ./data/AAPL.csv
Saved AMZN data to ./data/AMZN.csv
Saved BIDU data to ./data/BIDU.csv
Saved GOOG data to ./data/GOOG.csv
Saved INTC data to ./data/INTC.csv
Saved MSFT data to ./data/MSFT.csv
Saved NFLX data to ./data/NFLX.csv
Saved NVDA data to ./data/NVDA.csv
Saved TCEHY data to ./data/TCEHY.csv
Saved TSLA data to ./data/TSLA.csv


In [26]:
import os
fileList = os.listdir("/home/bangbui/Documents/Stock-Predict/data")
fileList

['MSFT.csv',
 'GOOG.csv',
 'TCEHY.csv',
 'TSLA.csv',
 'INTC.csv',
 'BIDU.csv',
 'AMZN.csv',
 'AAPL.csv',
 'NVDA.csv',
 'NFLX.csv']

In [27]:
companyList = []
for file in fileList:
    companyName = file.split(".")[0]
    if  companyName != "" and companyName !="sample_data":
        companyList.append(companyName)
print(companyList)

['MSFT', 'GOOG', 'TCEHY', 'TSLA', 'INTC', 'BIDU', 'AMZN', 'AAPL', 'NVDA', 'NFLX']


In [29]:
import pandas as pd
stockList = ['AMZN', 'NVDA','AAPL','BIDU','GOOG','INTC','MSFT','NFLX','TCEHY','TSLA']
df_ = {}
for i in stockList:
    df_[i] = pd.read_csv("/home/bangbui/Documents/Stock-Predict/data/" + i + ".csv", index_col = 'Date')

df_

{'AMZN':                   Open        High         Low       Close   Adj Close  \
 Date                                                                     
 2012-01-03  175.889999  179.479996  175.550003  179.029999  179.029999   
 2012-01-04  179.210007  180.500000  176.070007  177.509995  177.509995   
 2012-01-05  175.940002  178.250000  174.050003  177.610001  177.610001   
 2012-01-06  178.070007  184.649994  177.500000  182.610001  182.610001   
 2012-01-09  182.759995  184.369995  177.000000  178.559998  178.559998   
 ...                ...         ...         ...         ...         ...   
 2025-06-23  209.789993  210.389999  207.309998  208.470001         NaN   
 2025-06-24  212.139999  214.339996  211.050003  212.770004         NaN   
 2025-06-25  214.619995  216.029999  211.110001  211.990005         NaN   
 2025-06-26  213.119995  218.039993  212.009995  217.119995         NaN   
 2025-06-27  219.919998  223.300003  216.740005  223.300003         NaN   
 
               

In [31]:
from sklearn.model_selection import train_test_split
def split(dataframe, border, col):
    return dataframe.loc[:border,col], dataframe.loc[border:,col]

df_new = {}
for i in stockList:
    df_new[i] = {}
    df_new[i]["Train"] = df_[i].query('Date <=  "2023-12-31"').reset_index(drop = False)
    df_new[i]["Val"]  = df_[i].query('Date >= "2024-01-01" and Date <= "2024-12-31"').reset_index(drop = False)
    df_new[i]["Test"] = df_[i].query('Date >= "2025-01-01"').reset_index(drop = False)

In [37]:
for i in stockList:
  df_new[i]["Train"].drop(columns=['Open', 'High','Low','Adj Close','Volume'],inplace = True)
  df_new[i]["Val"].drop(columns=['Open', 'High','Low','Adj Close','Volume'],inplace = True)
  df_new[i]["Test"].drop(columns=['Open', 'High','Low','Adj Close','Volume'],inplace = True)

In [38]:
df_new[i]["Train"]

Unnamed: 0,Date,Close
0,2012-01-03,1.872000
1,2012-01-04,1.847333
2,2012-01-05,1.808000
3,2012-01-06,1.794000
4,2012-01-09,1.816667
...,...,...
3009,2023-12-22,252.539993
3010,2023-12-26,256.609985
3011,2023-12-27,261.440002
3012,2023-12-28,253.179993


In [40]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np
transform_train = {}
transform_test = {}
transform_val = {}
scaler = {}

for num, i in enumerate(stockList):
    sc = MinMaxScaler(feature_range=(0,1))
    a0 = np.array(df_new[i]["Train"]['Close'])
    a1 = np.array(df_new[i]["Val"]['Close'])
    a2 = np.array(df_new[i]["Test"]['Close'])
    a0 = a0.reshape(a0.shape[0],1)
    a1 = a1.reshape(a1.shape[0],1)
    a2 = a2.reshape(a2.shape[0],1)
    transform_train[i] = sc.fit_transform(a0)
    transform_val[i] = sc.transform(a1)
    transform_test[i] = sc.transform(a2)
    scaler[i] = sc

del a0
del a1
del a2

In [41]:
scaler

{'AMZN': MinMaxScaler(),
 'NVDA': MinMaxScaler(),
 'AAPL': MinMaxScaler(),
 'BIDU': MinMaxScaler(),
 'GOOG': MinMaxScaler(),
 'INTC': MinMaxScaler(),
 'MSFT': MinMaxScaler(),
 'NFLX': MinMaxScaler(),
 'TCEHY': MinMaxScaler(),
 'TSLA': MinMaxScaler()}

In [42]:
for i in transform_train.keys():
    print(i, transform_train[i].shape)
print("\n")
for i in transform_test.keys():
    print(i, transform_test[i].shape)
print("\n")
for i in transform_val.keys():
    print(i, transform_val[i].shape)

AMZN (3014, 1)
NVDA (3014, 1)
AAPL (3014, 1)
BIDU (3014, 1)
GOOG (3014, 1)
INTC (3014, 1)
MSFT (3014, 1)
NFLX (3014, 1)
TCEHY (3014, 1)
TSLA (3014, 1)


AMZN (121, 1)
NVDA (121, 1)
AAPL (121, 1)
BIDU (121, 1)
GOOG (121, 1)
INTC (121, 1)
MSFT (121, 1)
NFLX (121, 1)
TCEHY (121, 1)
TSLA (121, 1)


AMZN (251, 1)
NVDA (251, 1)
AAPL (251, 1)
BIDU (251, 1)
GOOG (251, 1)
INTC (251, 1)
MSFT (251, 1)
NFLX (251, 1)
TCEHY (251, 1)
TSLA (251, 1)


In [44]:
trainset = {}
valset = {}
testset = {}
for j in stockList:
    trainset[j] = {}
    X_train = []
    y_train = []
    for i in range(10,3014):
        X_train.append(transform_train[j][i-10:i,0])
        y_train.append(transform_train[j][i,0])
    X_train, y_train = np.array(X_train), np.array(y_train)
    trainset[j]["X"] = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))
    trainset[j]["y"] = y_train

    testset[j] = {}
    X_test = []
    y_test = []
    for i in range(10, 121):
        X_test.append(transform_test[j][i-10:i,0])
        y_test.append(transform_test[j][i,0])
    X_test, y_test = np.array(X_test), np.array(y_test)
    testset[j]["X"] = np.reshape(X_test, (X_test.shape[0], X_train.shape[1], 1))
    testset[j]["y"] = y_test

    valset[j] = {}
    X_val = []
    y_val = []
    for i in range(10, 251):
        X_val.append(transform_val[j][i-10:i,0])
        y_val.append(transform_val[j][i,0])
    X_val, y_val = np.array(X_val), np.array(y_val)
    valset[j]["X"] = np.reshape(X_val, (X_val.shape[0], X_train.shape[1], 1))
    valset[j]["y"] = y_val

In [45]:
arr_buff = []
for i in stockList:
    buff = {}
    buff["X_train"] = trainset[i]["X"].shape
    buff["y_train"] = trainset[i]["y"].shape
    buff["X_val"] = valset[i]["X"].shape
    buff["y_val"] = valset[i]["y"].shape
    buff["X_test"] = testset[i]["X"].shape
    buff["y_test"] = testset[i]["y"].shape
    arr_buff.append(buff)

pd.DataFrame(arr_buff, index=stockList)

Unnamed: 0,X_train,y_train,X_val,y_val,X_test,y_test
AMZN,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
NVDA,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
AAPL,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
BIDU,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
GOOG,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
INTC,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
MSFT,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
NFLX,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
TCEHY,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"
TSLA,"(3004, 10, 1)","(3004,)","(241, 10, 1)","(241,)","(111, 10, 1)","(111,)"


In [2]:
# load model 
import joblib
# .h5
from keras.models import load_model

model = load_model('./weight/stacked_lstm/model-best.h5')
scaler = joblib.load('./weight/stacked_lstm/scaler.pkl')

2025-06-30 18:54:07.075341: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-30 18:54:07.172147: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-30 18:54:07.172174: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-30 18:54:07.191771: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-30 18:54:07.222370: I tensorflow/core/platform/cpu_feature_guar



2025-06-30 18:54:08.874300: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-06-30 18:54:09.216980: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-06-30 18:54:09.219955: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-



In [10]:
model.predict(df_scaled)



array([[0.01049637],
       [0.01050148],
       [0.01051212],
       ...,
       [0.01739822],
       [0.01732978],
       [0.01740493]], dtype=float32)