In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import plotly.express as px
import plotly.io as pio
import dask
import warnings
import yfinance as yf
import plotly.graph_objects as go

warnings.filterwarnings('ignore')
pio.renderers.default = 'vscode'

px.defaults.template = "plotly_dark"

In [60]:

df_apple = yf.download('AAPL', start='2020-01-01', end='2023-12-31')
df_samsung = yf.download('005930.KS', start='2020-01-01', end='2023-12-31')
df_xiaomi = yf.download('1810.HK', start='2020-01-01', end='2023-12-31')


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [63]:
df_apple.columns[0]


('Close', 'AAPL')

In [64]:
def edit_df(df):
    """change column's name and reset index"""
    df.columns = [c[0] for c in df.columns]
    df = df.reset_index()
    return df

In [65]:
df_apple = edit_df(df_apple)
df_samsung = edit_df(df_samsung)
df_xiaomi = edit_df(df_xiaomi)


In [66]:
df_apple

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,72.796021,72.856613,71.545387,71.799873,135480400
1,2020-01-03,72.088287,72.851753,71.862884,72.020424,146322800
2,2020-01-06,72.662720,72.701500,70.954010,71.206077,118387200
3,2020-01-07,72.320984,72.929329,72.100426,72.672417,108872000
4,2020-01-08,73.484344,73.787308,72.022850,72.022850,132079200
...,...,...,...,...,...,...
1001,2023-12-22,192.656174,194.457347,192.029240,194.228458,37122800
1002,2023-12-26,192.108856,192.944757,191.889928,192.666124,28919300
1003,2023-12-27,192.208359,192.556659,190.158404,191.551588,48087700
1004,2023-12-28,192.636276,193.711013,192.228271,193.193544,34049900


In [67]:
df_samsung

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,48825.472656,49533.088202,48648.568770,49090.828486,12993228
1,2020-01-03,49090.820312,50063.791526,48560.108742,49533.079955,15422255
2,2020-01-06,49090.820312,49179.272241,48294.752956,48560.108742,10278951
3,2020-01-07,49356.179688,49886.891297,49179.275818,49267.727753,10009778
4,2020-01-08,50240.699219,50771.410830,49444.631802,49709.987607,23501171
...,...,...,...,...,...,...
980,2023-12-21,73566.117188,73566.117188,72879.500094,73173.764563,13478766
981,2023-12-22,74448.914062,74841.266706,73958.473258,74350.825902,14515608
982,2023-12-26,75135.539062,75233.627234,74252.745523,74645.098207,13164909
983,2023-12-27,76871.046875,76871.046875,75392.757512,75589.862760,20651042


In [68]:
df_xiaomi

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,11.220000,11.220000,10.780000,10.860000,248889455
1,2020-01-03,10.900000,11.440000,10.900000,11.200000,219547199
2,2020-01-06,10.980000,11.180000,10.640000,10.720000,179639996
3,2020-01-07,11.000000,11.300000,10.920000,11.060000,171287427
4,2020-01-08,11.080000,11.280000,10.820000,10.820000,189314498
...,...,...,...,...,...,...
979,2023-12-21,15.960000,16.000000,15.600000,15.900000,64563507
980,2023-12-22,15.680000,16.100000,15.520000,15.980000,91757167
981,2023-12-27,16.320000,16.540001,15.680000,15.960000,127850403
982,2023-12-28,16.280001,16.700001,16.040001,16.400000,155342841


In [69]:

def lines_graph(df, sym):
    """lines graphs"""

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df[f'Close'],
                             mode='lines',
                             name='Close Price'))

    fig.update_layout(
        title=f"Closing Price of {((yf.Ticker(sym)).info)['longName']} Stock",
        xaxis_title="Date",
        yaxis_title="Close Price",
        height=500,
        margin=dict(t=50, b=50)
    )
    fig.show()

In [70]:
lines_graph(df_apple, 'AAPL')
lines_graph(df_samsung, '005930.KS')
lines_graph(df_xiaomi, '1810.HK')

In [71]:
def candlestick_graph(df, sym):
    """candlestick graph"""
    fig = go.Figure(data=go.Candlestick(x=df['Date'], open=df[f'Open'],  high=df[f'High'],
                    low=df[f'Low'],
                    close=df[f'Close']))
    fig.update_layout(
        title=f"{((yf.Ticker(sym)).info)['longName']} stock candlestick chart",
        height=500,
        margin=dict(t=50, b=50)
    )

    fig.show()

In [72]:
candlestick_graph(df_apple, "AAPL")
candlestick_graph(df_samsung, '005930.KS')
candlestick_graph(df_xiaomi, '1810.HK')

In [28]:
train_data, test_data = df_apple.iloc[3:int(len(df_apple)*0.9)], df_apple.iloc[int(len(df_apple)*0.9):]


In [47]:
def train_test_graph(train_data, test_data, sym):
    """train and test graph"""
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_data['Date'],
                            y=train_data[f'Close {sym}'],
                            mode='lines',
                            name='Train close price'))
    fig.add_trace(go.Scatter(x=test_data['Date'],
                            y=test_data[f'Close {sym}'],
                            mode='lines',
                            name='test close price'))
    fig.add_trace(go.Scatter(x=df_apple['Date'],
                            y=df_apple[f'Close {sym}'],
                            mode='lines',
                            name='original Close Price'))

    fig.update_layout(
        title=f"Closing Price of {((yf.Ticker(sym)).info)['longName']} Stock",
        xaxis_title="Date",
        yaxis_title="Close Price",
        height=500,
        margin=dict(t=50, b=50)
    )
    fig.show()

train_test_graph(train_data, test_data, 'AAPL')

In [40]:

import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
def training_model( modelname):
    train_data_close = train_data['Close AAPL']
    test_data_close = test_data['Close AAPL']

    scaler = MinMaxScaler()
    train_data_normalized = scaler.fit_transform(np.array(train_data_close).reshape(-1, 1))
    test_data_normalized = scaler.transform(np.array(test_data_close).reshape(-1, 1))

    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=(train_data_normalized.shape[1], 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(32, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(16))
    model.add(Dropout(0.2))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='adam')

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min')
    model_checkpoint = ModelCheckpoint(f'{modelname}.keras', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    history = model.fit(x=train_data_normalized.reshape((train_data_normalized.shape[0], train_data_normalized.shape[1], 1)),
                        y=train_data_normalized,
                        epochs=50,
                        batch_size=32,
                        validation_split=0.1,
                        callbacks=[early_stopping, model_checkpoint],
                        verbose=1)

    loss_df = pd.DataFrame({
        'Epochs': range(1, len(history.history['loss']) + 1),
        'Training Loss': history.history['loss'],
        'Validation Loss': history.history['val_loss']
    })

    fig = px.line(loss_df, x='Epochs', y=['Training Loss', 'Validation Loss'],
                  labels={'value': 'Loss', 'variable': 'Loss Type'},
                  title='Training and Validation Loss')

    fig.update_layout(legend_title_text='Loss Type')

    fig.show()

    best_model = Sequential()
    best_model.add(LSTM(64, return_sequences=True, input_shape=(train_data_normalized.shape[1], 1)))
    best_model.add(Dropout(0.2))
    best_model.add(LSTM(32, return_sequences=True))
    best_model.add(Dropout(0.2))
    best_model.add(LSTM(16))
    best_model.add(Dropout(0.2))
    best_model.add(Dense(1))
    best_model.compile(loss='mean_squared_error', optimizer='adam')
    best_model.load_weights(f'{modelname}.keras')

    predictions_normalized = best_model.predict(test_data_normalized.reshape((test_data_normalized.shape[0], test_data_normalized.shape[1], 1)))
    predictions = scaler.inverse_transform(predictions_normalized)

    mse = mean_squared_error(test_data_normalized, predictions_normalized)
    mae = mean_absolute_error(test_data_normalized, predictions_normalized)
    r2 = r2_score(test_data_normalized, predictions_normalized)

    print(f'MSE: {mse}')
    print(f'MAE: {mae}')
    print(f'R-squared: {r2}')

In [41]:
training_model("apple_model")

Epoch 1/50
[1m23/26[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 16ms/step - loss: 0.3082
Epoch 1: val_loss improved from inf to 0.60712, saving model to apple_model.keras
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 130ms/step - loss: 0.3056 - val_loss: 0.6071
Epoch 2/50
[1m24/26[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 17ms/step - loss: 0.2053
Epoch 2: val_loss improved from 0.60712 to 0.24862, saving model to apple_model.keras
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 0.2015 - val_loss: 0.2486
Epoch 3/50
[1m25/26[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - loss: 0.0517
Epoch 3: val_loss improved from 0.24862 to 0.01678, saving model to apple_model.keras
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0507 - val_loss: 0.0168
Epoch 4/50
[1m25/26[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.0284
Epoch 4: val_loss di

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 725ms/step
MSE: 1.9072076121296877e-05
MAE: 0.0035382677301259388
R-squared: 0.9946096190770698
