In [1]:
import requests
from matplotlib import pyplot as plt
from datetime import datetime
import json
import pandas as pd
import numpy as np
import tensorflow as tf

## Load the model

In [42]:
from tensorflow.keras.models import load_model

model = load_model('../model_25s_7d.h5')



In [43]:
model.summary()

In [44]:
WINDOW_SIZE = 30
HORIZON = 7

## Extend the model

In [50]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# Build a new model with Functional API using the loaded model and adding a new layer
new_input = Input(shape=model.input_shape[1:])

x = new_input
for layer in model.layers[:-1]:
    x = layer(x)
    layer.trainable = False

x = Dense(64, activation='relu')(x)
new_output = Dense(HORIZON, activation='linear')(x)
new_model = Model(new_input, new_output, name='new_model')

# Compile the new model
new_model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])

new_model.summary()

## Preprocessing the data

In [67]:
# download the data from cache
import os
import pickle

def load_apple_data():
    filename = "/Users/alexeyks/PycharmProjects/SMPP/StockMarketPP/cache/AAPL_data.pkl"
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            cached = pickle.load(f)
            # Check if the cache is still valid, let's say we refresh it every day
            return cached['data']
    return None

data = load_apple_data()

In [68]:
# process the data
data = data['Time Series (Daily)']

df = pd.DataFrame.from_dict(data, orient='index')
df = df.apply(pd.to_numeric)
df.index = pd.to_datetime(df.index)

close_prices = df['4. close']
# do not use the last 30 days
close_prices = close_prices.sort_index()[:-30]
close_prices.head()

2024-02-01    186.86
2024-02-02    185.85
2024-02-05    187.68
2024-02-06    189.30
2024-02-07    189.41
Name: 4. close, dtype: float64


Functions from the original notebook with the initial model training

In [69]:
def get_labelled_windows(x, horizon=7):
  """
  Creates labels for windowed dataset.

  E.g. if horizon=1 (default)
  Input: [1, 2, 3, 4, 5, 6] -> Output: ([1, 2, 3, 4, 5], [6])
  """
  return x[:, :-horizon], x[:, -horizon:]

In [70]:
def make_windows(x, window_size=30, horizon=7):
  """
  Turns a 1D array into a 2D array of sequential windows of window_size.
  """
  # 1. Create a window of specific window_size (add the horizon on the end for later labelling)
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  # print(f"Window step:\n {window_step}")

  # 2. Create a 2D array of multiple window steps (minus 1 to account for 0 indexing)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T # create 2D array of windows of size window_size
  # print(f"Window indexes:\n {window_indexes[:3], window_indexes[-3:], window_indexes.shape}")

  # 3. Index on the target array (time series) with 2D array of multiple window steps
  windowed_array = x[window_indexes]

  # 4. Get the labelled windows
  windows, labels = get_labelled_windows(windowed_array, horizon=horizon)

  return windows, labels

In [61]:
def make_train_test_splits(windows, labels, test_split=0.2):
  """
  Splits matching pairs of windows and labels into train and test splits.
  """
  split_size = int(len(windows) * (1-test_split)) # this will default to 80% train/20% test
  train_windows = windows[:split_size]
  train_labels = labels[:split_size]
  test_windows = windows[split_size:]
  test_labels = labels[split_size:]
  return train_windows, test_windows, train_labels, test_labels

In [71]:
# Create windows and labels
windows, labels = make_windows(close_prices.values, window_size=30, horizon=7)
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(windows, labels, test_split=0.1)

## Train the model

In [72]:
# fit the model
new_model.fit(x=train_windows, 
          y=train_labels, 
          epochs=100, 
          batch_size=32, 
          verbose=1, 
          validation_data=(test_windows, test_labels),
          callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=200, restore_best_weights=True), tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=100, verbose=1)])

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 4.9775 - mean_absolute_error: 4.9775 - val_loss: 5.9132 - val_mean_absolute_error: 5.9132 - learning_rate: 0.0010
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 4.5466 - mean_absolute_error: 4.5466 - val_loss: 6.8652 - val_mean_absolute_error: 6.8652 - learning_rate: 0.0010
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 4.0198 - mean_absolute_error: 4.0198 - val_loss: 8.0552 - val_mean_absolute_error: 8.0552 - learning_rate: 0.0010
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 3.7406 - mean_absolute_error: 3.7406 - val_loss: 9.2678 - val_mean_absolute_error: 9.2678 - learning_rate: 0.0010
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 3.6876 - mean_absolute_error: 3.6876 - val_loss: 10.0858 - val_mean_absolute_error: 

<keras.src.callbacks.history.History at 0x169a5dd80>

In [73]:
def predict_prices_7days():
    prices = [float(data[date]['4. close']) for date in sorted(data.keys())[-30:]]
    # Assume model expects input shape [1, 30, 1] for one feature per day
    prices = np.array(prices).reshape(1, -1, 1)
    predictions = new_model.predict(prices)
    return predictions.flatten().tolist()

predictions = predict_prices_7days()
predictions

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


[209.37155151367188,
 210.1636199951172,
 211.3349609375,
 211.29542541503906,
 214.39039611816406,
 216.74713134765625,
 217.32171630859375]