In [37]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, LSTM

In [39]:
def convert_date(date_str):
    return pd.to_datetime(date_str, format='%d.%m.%Y').timestamp()


def convert_number(number_str):
    if number_str == '':
        return None
    return float(number_str.replace('.', '').replace(',', '.'))

In [40]:
import sqlite3

conn = sqlite3.connect("./final_stock_data.db")
curs = conn.cursor()

curs.execute("SELECT * FROM stock_prices")
data = curs.fetchall()

conn.close()
dataframe = pd.DataFrame(data, columns=['issuer', 'date', 'cena_posledna', 'mak', 'min', 'average', 'percentChange',
                                        'kolichina', 'prometbest', 'vkupenPromet'])

dataframe['date'] = dataframe['date'].apply(convert_date).astype(int)
dataframe['cena_posledna'] = dataframe['cena_posledna'].apply(convert_number)

df2 = pd.DataFrame()
df2['time'] = dataframe['date']
df2['close'] = dataframe['cena_posledna']
df2['issuer'] = dataframe['issuer']

# df2 = df2.dropna(subset=['close', 'time'])
df2 = df2.sort_values(by=['time'])



# df2 = df2.to_json(orient="records")

In [41]:
df2

Unnamed: 0,time,close,issuer
124629,1357084800,570.0,MAKP
194993,1357084800,8500.0,PPIV
2916,1357084800,307.0,ATPP
238359,1357084800,3000.0,SLAV
330093,1357084800,159.0,ZILU
...,...,...,...
344436,1733184000,17550.0,PKB
344438,1733184000,38000.0,PPIV
344300,1733184000,310.0,GECT
344306,1733184000,310.0,GRDN


In [42]:
from sklearn.preprocessing import LabelEncoder

In [43]:
encoder = LabelEncoder()

df2['issuer'] = encoder.fit_transform(df2['issuer'])

In [44]:
df2

Unnamed: 0,time,close,issuer
124629,1357084800,570.0,73
194993,1357084800,8500.0,101
2916,1357084800,307.0,6
238359,1357084800,3000.0,122
330093,1357084800,159.0,160
...,...,...,...
344436,1733184000,17550.0,99
344438,1733184000,38000.0,101
344300,1733184000,310.0,36
344306,1733184000,310.0,38


In [45]:
df2.dropna(subset=['close'], inplace=True)

In [46]:
len(df2)

335707

In [47]:
df2["time"] = pd.to_datetime(df2["time"], unit="s")

In [48]:
df2

Unnamed: 0,time,close,issuer
124629,2013-01-02,570.0,73
194993,2013-01-02,8500.0,101
2916,2013-01-02,307.0,6
238359,2013-01-02,3000.0,122
330093,2013-01-02,159.0,160
...,...,...,...
344436,2024-12-03,17550.0,99
344438,2024-12-03,38000.0,101
344300,2024-12-03,310.0,36
344306,2024-12-03,310.0,38


In [49]:
df2.set_index('time', inplace=True)

In [51]:
df2

Unnamed: 0_level_0,close,issuer
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-02,570.0,73
2013-01-02,8500.0,101
2013-01-02,307.0,6
2013-01-02,3000.0,122
2013-01-02,159.0,160
...,...,...
2024-12-03,17550.0,99
2024-12-03,38000.0,101
2024-12-03,310.0,36
2024-12-03,310.0,38


In [52]:
df2.sort_index(inplace=True)

In [53]:
df2

Unnamed: 0_level_0,close,issuer
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-02,570.0,73
2013-01-02,8500.0,101
2013-01-02,307.0,6
2013-01-02,3000.0,122
2013-01-02,159.0,160
...,...,...
2024-12-03,17550.0,99
2024-12-03,38000.0,101
2024-12-03,310.0,36
2024-12-03,310.0,38


In [54]:
periods = range(3, 0, -1)
shifted = df2.shift(periods=periods)

In [56]:
data = pd.concat([df2, shifted], axis=1)

In [57]:
data

Unnamed: 0_level_0,close,issuer,close_3,issuer_3,close_2,issuer_2,close_1,issuer_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-01-02,570.0,73,,,,,,
2013-01-02,8500.0,101,,,,,570.0,73.0
2013-01-02,307.0,6,,,570.0,73.0,8500.0,101.0
2013-01-02,3000.0,122,570.0,73.0,8500.0,101.0,307.0,6.0
2013-01-02,159.0,160,8500.0,101.0,307.0,6.0,3000.0,122.0
...,...,...,...,...,...,...,...,...
2024-12-03,17550.0,99,45.0,96.0,16.0,100.0,310.0,35.0
2024-12-03,38000.0,101,16.0,100.0,310.0,35.0,17550.0,99.0
2024-12-03,310.0,36,310.0,35.0,17550.0,99.0,38000.0,101.0
2024-12-03,310.0,38,17550.0,99.0,38000.0,101.0,310.0,36.0


In [58]:
data.dropna(inplace=True)

In [59]:
data

Unnamed: 0_level_0,close,issuer,close_3,issuer_3,close_2,issuer_2,close_1,issuer_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-01-02,3000.0,122,570.0,73.0,8500.0,101.0,307.0,6.0
2013-01-02,159.0,160,8500.0,101.0,307.0,6.0,3000.0,122.0
2013-01-02,3690.0,162,307.0,6.0,3000.0,122.0,159.0,160.0
2013-01-02,400.0,114,3000.0,122.0,159.0,160.0,3690.0,162.0
2013-01-02,65695.0,121,159.0,160.0,3690.0,162.0,400.0,114.0
...,...,...,...,...,...,...,...,...
2024-12-03,17550.0,99,45.0,96.0,16.0,100.0,310.0,35.0
2024-12-03,38000.0,101,16.0,100.0,310.0,35.0,17550.0,99.0
2024-12-03,310.0,36,310.0,35.0,17550.0,99.0,38000.0,101.0
2024-12-03,310.0,38,17550.0,99.0,38000.0,101.0,310.0,36.0


In [61]:
X, y = data.drop(columns=["close"], axis=1), data["close"]

In [62]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

In [63]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

In [64]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [65]:
(X_train.shape[0], 3, (X_train.shape[1] // 3))

(268563, 3, 2)

In [66]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, LSTM

In [71]:
model = Sequential([
    Input(shape=(7, (X_train.shape[1] // 7))),
    LSTM(64, return_sequences=True, activation="relu"),
    LSTM(32, activation="relu"),
    Dense(1, activation="linear"),
])

In [72]:
model.summary()

In [73]:
model.compile(
    loss="mean_squared_error",
    optimizer="adam",
    metrics=["mean_squared_error"],
)

In [74]:
history = model.fit(X_train, y_train, validation_split=0.2, epochs=16, batch_size=8)

Epoch 1/16
[1m26857/26857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2ms/step - loss: 162667776.0000 - mean_squared_error: 162667776.0000 - val_loss: 240952624.0000 - val_mean_squared_error: 240952624.0000
Epoch 2/16
[1m26857/26857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2ms/step - loss: 157425936.0000 - mean_squared_error: 157425936.0000 - val_loss: 230534768.0000 - val_mean_squared_error: 230534768.0000
Epoch 3/16
[1m26857/26857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2ms/step - loss: 149360368.0000 - mean_squared_error: 149360368.0000 - val_loss: 225020896.0000 - val_mean_squared_error: 225020896.0000
Epoch 4/16
[1m26857/26857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2ms/step - loss: 144048736.0000 - mean_squared_error: 144048736.0000 - val_loss: 230119584.0000 - val_mean_squared_error: 230119584.0000
Epoch 5/16
[1m26857/26857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 2ms/step - loss: 145923568.0000 - mean_squ

In [76]:
model.save('./stock_lstm.keras')