In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import plotly.express as px
import plotly.io as pio
import dask
import warnings
import yfinance as yf
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint



warnings.filterwarnings('ignore')
pio.renderers.default = 'vscode'

px.defaults.template = "plotly_dark"

In [2]:
df_apple = yf.download('AAPL', start='2020-01-01', end='2023-12-31')
df_samsung = yf.download('005930.KS', start='2020-01-01', end='2023-12-31')
df_xiaomi = yf.download('1810.HK', start='2020-01-01', end='2023-12-31')


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [3]:
def edit_df(df):
    """change column's name and reset index"""
    df.columns = [c[0] for c in df.columns]
    df = df.reset_index()
    return df

In [4]:
df_apple = edit_df(df_apple)
df_samsung = edit_df(df_samsung)
df_xiaomi = edit_df(df_xiaomi)


In [5]:
df_apple

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,72.796021,72.856613,71.545387,71.799873,135480400
1,2020-01-03,72.088287,72.851753,71.862884,72.020424,146322800
2,2020-01-06,72.662727,72.701508,70.954017,71.206085,118387200
3,2020-01-07,72.320961,72.929306,72.100403,72.672394,108872000
4,2020-01-08,73.484360,73.787323,72.022865,72.022865,132079200
...,...,...,...,...,...,...
1001,2023-12-22,192.656189,194.457363,192.029255,194.228473,37122800
1002,2023-12-26,192.108871,192.944773,191.889943,192.666139,28919300
1003,2023-12-27,192.208359,192.556659,190.158404,191.551588,48087700
1004,2023-12-28,192.636261,193.710998,192.228256,193.193528,34049900


In [6]:
df_samsung

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,48825.472656,49533.088202,48648.568770,49090.828486,12993228
1,2020-01-03,49090.828125,50063.799493,48560.116470,49533.087838,15422255
2,2020-01-06,49090.828125,49179.280068,48294.760642,48560.116470,10278951
3,2020-01-07,49356.179688,49886.891297,49179.275818,49267.727753,10009778
4,2020-01-08,50240.699219,50771.410830,49444.631802,49709.987607,23501171
...,...,...,...,...,...,...
980,2023-12-21,73566.125000,73566.125000,72879.507833,73173.772333,13478766
981,2023-12-22,74448.914062,74841.266706,73958.473258,74350.825902,14515608
982,2023-12-26,75135.531250,75233.619411,74252.737802,74645.090445,13164909
983,2023-12-27,76871.039062,76871.039062,75392.749850,75589.855078,20651042


In [7]:
df_xiaomi

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,11.220000,11.220000,10.780000,10.860000,248889455
1,2020-01-03,10.900000,11.440000,10.900000,11.200000,219547199
2,2020-01-06,10.980000,11.180000,10.640000,10.720000,179639996
3,2020-01-07,11.000000,11.300000,10.920000,11.060000,171287427
4,2020-01-08,11.080000,11.280000,10.820000,10.820000,189314498
...,...,...,...,...,...,...
979,2023-12-21,15.960000,16.000000,15.600000,15.900000,64563507
980,2023-12-22,15.680000,16.100000,15.520000,15.980000,91757167
981,2023-12-27,16.320000,16.540001,15.680000,15.960000,127850403
982,2023-12-28,16.280001,16.700001,16.040001,16.400000,155342841


- **Open:** Opening stock price of the day
- **Close:** Closing stock price of the day
- **High:** Highest stock price of the data
- **Low:** Lowest stock price of the day
- **Volume:** total number of shares traded during a specific period

In [8]:
def lines_graph(df, sym):
    """lines graphs"""

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df[f'Close'],
                             mode='lines',
                             name='Close Price'))

    fig.update_layout(
        title=f"Closing Price of {((yf.Ticker(sym)).info)['longName']} Stock",
        xaxis_title="Date",
        yaxis_title="Close Price",
        height=500,
        margin=dict(t=50, b=50)
    )
    fig.show()

In [9]:
lines_graph(df_apple, 'AAPL')
lines_graph(df_samsung, '005930.KS')
lines_graph(df_xiaomi, '1810.HK')

In [10]:
def candlestick_graph(df, sym):
    """candlestick graph"""
    fig = go.Figure(data=go.Candlestick(x=df['Date'], open=df[f'Open'],  high=df[f'High'],
                    low=df[f'Low'],
                    close=df[f'Close']))
    fig.update_layout(
        title=f"{((yf.Ticker(sym)).info)['longName']} stock candlestick chart",
        height=500,
        margin=dict(t=50, b=50)
    )

    fig.show()

In [11]:
candlestick_graph(df_apple, "AAPL")
candlestick_graph(df_samsung, '005930.KS')
candlestick_graph(df_xiaomi, '1810.HK')

In [12]:
train_df_apple, test_df_apple = df_apple.iloc[3:int(len(df_apple)*0.9)], df_apple.iloc[int(len(df_apple)*0.9):]
train_df_samsung, test_df_samsung = df_samsung.iloc[3:int(len(df_samsung)*0.9)], df_samsung.iloc[int(len(df_samsung)*0.9):]
train_df_xiaomi, test_df_xiaomi = df_xiaomi.iloc[3:int(len(df_xiaomi)*0.9)], df_xiaomi.iloc[int(len(df_xiaomi)*0.9):]


In [13]:
def train_test_graph(df, sym, train_data, test_data):
    """train and test graph"""
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_data['Date'],
                            y=train_data['Close'],
                            mode='lines',
                            name='Train close Price'))
    fig.add_trace(go.Scatter(x=test_data['Date'],
                            y=test_data['Close'],
                            mode='lines',
                            name='Test close Price'))
    fig.add_trace(go.Scatter(x=df['Date'],
                            y=df['Close'],
                            mode='lines',
                            name='Original Close Price'))

    fig.update_layout(
        title=f"Closing Price of{((yf.Ticker(sym)).info)['longName']} Stock",
        xaxis_title="Date",
        yaxis_title="Close Price",
        height=500,
        margin=dict(t=50, b=50)
    )
    fig.show()



In [None]:
train_test_graph(df_apple, 'AAPL', train_df_apple, test_df_apple)
train_test_graph(df_samsung, '005930.KS', train_df_samsung, test_df_samsung)
train_test_graph(df_xiaomi, '1810.HK', train_df_xiaomi, test_df_xiaomi)

In [40]:
def training_model(train_data, test_data):
    train_data_close = train_data["Close"]
    test_data_close = test_data["Close"]

    scaler = MinMaxScaler()
    train_data_normalized = scaler.fit_transform(
        np.array(train_data_close).reshape(-1, 1)
    )
    test_data_normalized = scaler.transform(np.array(test_data_close).reshape(-1, 1))

    model = Sequential()
    model.add(
        LSTM(64, return_sequences=True, input_shape=(train_data_normalized.shape[1], 1))
    )
    model.add(Dropout(0.2))
    model.add(LSTM(32, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(16))
    model.add(Dropout(0.2))
    model.add(Dense(1))

    model.compile(loss="mean_squared_error", optimizer="adam")

    early_stopping = EarlyStopping(
        monitor="val_loss", patience=5, verbose=1, mode="min"
    )
    model_checkpoint = ModelCheckpoint(
        "Forcasting_prediction_model.keras",
        monitor="val_loss",
        verbose=1,
        save_best_only=True,
        mode="min",
    )
    history = model.fit(
        x=train_data_normalized.reshape(
            (train_data_normalized.shape[0], train_data_normalized.shape[1], 1)
        ),
        y=train_data_normalized,
        epochs=50,
        batch_size=32,
        validation_split=0.1,
        callbacks=[early_stopping, model_checkpoint],
        verbose=1,
    )

    loss_df = pd.DataFrame(
        {
            "Epochs": range(1, len(history.history["loss"]) + 1),
            "Training Loss": history.history["loss"],
            "Validation Loss": history.history["val_loss"],
        }
    )

    fig = px.line(
        loss_df,
        x="Epochs",
        y=["Training Loss", "Validation Loss"],
        labels={"value": "Loss", "variable": "Loss Type"},
        title="Training and Validation Loss",
    )

    fig.update_layout(legend_title_text="Loss Type")

    fig.show()

    best_model = Sequential()
    best_model.add(
        LSTM(64, return_sequences=True, input_shape=(train_data_normalized.shape[1], 1))
    )
    best_model.add(Dropout(0.2))
    best_model.add(LSTM(32, return_sequences=True))
    best_model.add(Dropout(0.2))
    best_model.add(LSTM(16))
    best_model.add(Dropout(0.2))
    best_model.add(Dense(1))
    best_model.compile(loss="mean_squared_error", optimizer="adam")
    best_model.load_weights("Forcasting_prediction_model.keras")

    predictions_normalized = best_model.predict(
        test_data_normalized.reshape(
            (test_data_normalized.shape[0], test_data_normalized.shape[1], 1)
        )
    )
    predictions = scaler.inverse_transform(predictions_normalized)

    mse = mean_squared_error(test_data_normalized, predictions_normalized)
    mae = mean_absolute_error(test_data_normalized, predictions_normalized)
    r2 = r2_score(test_data_normalized, predictions_normalized)

    print(f"MSE: {mse}")
    print(f"MAE: {mae}")
    print(f"R-squared: {r2}")

In [None]:
training_model("apple_model")