In [78]:
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Code specific to Google Colab
    print("Running in Google Colab")

    # Additional setup commands for Colab
    !pip install neuralforecast
    !pip install gymnasium
else:
    # Code for other environments (e.g., VS Code)
    print("Running in another environment (e.g., VS Code)")

Running in another environment (e.g., VS Code)


In [79]:
if IN_COLAB:
    # Retrive required files
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/environments/stockenv.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/cleandata.py
    !wget --recursive --no-parent --cut-dirs=4 -P /content https://raw.githubusercontent.com//CodeBeckZero/MADDQN/main/utilities/data.py
    # Move all directories and files from content/raw.githubusercontent.com to content/
    !mv /content/raw.githubusercontent.com/* /content/

    # Delete the raw.githubusercontent.com directory
    !rm -rf /content/raw.githubusercontent.com

In [104]:
import sys
import os
import torch
import random
import pickle
import itertools
import contextlib
import logging
import io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utilities.cleandata as cln
from utilities.data import UniStockEnvDataStruct
from environments.stockenv import ContinuousOHLCVEnv
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from neuralforecast.core import NeuralForecast
from neuralforecast.models import TimesNet
from neuralforecast.losses.numpy import mae, mse
from datetime import datetime

os.environ['NIXTLA_ID_AS_COL'] = '1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow messages if relevant
os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'


# Set the default device
if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
    torch.set_default_tensor_type(torch.FloatTensor)

# Print the chosen device
print(f'Using device: {device}')


def set_seed(seed):
    """Set seed for reproducibility."""
    # Python random module
    random.seed(seed)

    # NumPy
    np.random.seed(seed)

    # PyTorch
    torch.manual_seed(seed)

    # If you are using CUDA
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
        # Additional settings to force determinism in your operations:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

Using device: cpu


In [None]:
RANDOM_SEED = 42
set_seed(RANDOM_SEED)

if not IN_COLAB:
    pwd = "C:/programming/MADDQN"
    sys.path.append(pwd)

# Input Data Location, File Name, Stock name for labels
input_url = 'https://raw.githubusercontent.com/CodeBeckZero/MADDQN/main/input_data'

stock_inputs ={'DJI':'^DJI_daily.csv',
               'NDAQ': '^IXIC_daily.csv',
               'SP500': '^SPX_daily.csv',
               'AAPL': 'AAPL_daily.csv',
               'AMZN': 'AMZN_daily.csv',
               'GOOGL': 'GOOGL_daily.csv',
               'MSFT': 'MSFT_daily.csv',
               'SINE': 'sine_wave_daily.csv',
               'FORD': 'F_daily.csv',
               'JNJ': 'JNJ_daily.csv',
               'NEE': 'NEE_daily.csv',
               'PFE': 'PFE_daily.csv',
               'TSLA': 'TSLA_daily.csv'}


# Exodenus Variable Input
exo_var_datasets = ['DJI','NDAQ','SP500']
exo_vars = ['close']

exo_vars_df = pd.DataFrame()
for name, file in stock_inputs.items():
    if name in exo_var_datasets:
        df_ohlcv = cln.YAHOO_csv_input(file, input_url)
        if exo_var_datasets.index(name) == 0:
            exo_vars_df['date'] = df_ohlcv['date']
            for column in exo_vars:
                exo_vars_df[f'{name}_{column}'] = df_ohlcv[column]
        else:
            df_merge = df_ohlcv[['date'] + exo_vars]  # Selecting 'date' and desired columns
            exo_vars_df = pd.merge(exo_vars_df, df_merge, on='date', how='inner')
            for column in exo_vars:
                exo_vars_df.rename(columns={column:f'{name}_{column}'},inplace=True)

exo_vars_df.rename(columns={'date':'ds'},inplace=True)

# Training Inputs
trn_keys = ['AAPL']
training_range = ('2007-01-03','2020-12-31')
trn_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in training_range]

# Validation Inputs
val_keys = trn_keys
validation_range = ('2021-01-04','2023-12-28')
val_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in validation_range]

# Testing Inputs
tst_keys = ['AAPL','AMZN','MSFT','SINE','FORD','JNJ','NEE', 'PFE','TSLA']
testing_range = ('2007-01-03','2023-12-28')
tst_dt_range = [datetime.strptime(dt_str, "%Y-%m-%d") for dt_str in testing_range]

window_size = 28 # Window size
n_prediction = 5 # Number points to predict in the future
price_based_on = 'close'
columns = ['open','high','low','close','volume']

import_dataset = True




In [None]:
if import_dataset:
    with open(pwd+"/gen_data/unidata.pkl", "rb") as file:
        env = pickle.load(file)

else:
    env = {}

    for name, file in stock_inputs.items():
        if name in set(trn_keys + val_keys + tst_keys):
            # Import
            df = cln.YAHOO_csv_input(file, input_url)
            data_dic = UniStockEnvDataStruct(df,price_based_on,window_size)
            env[name] = data_dic

In [None]:
display(env['SP500']['rw_long_wstd_price'][0].dtypes)

ds           datetime64[ns]
unique_id            object
y                   float64
dtype: object

In [None]:
trn_env = {}
tst_env = {}

for name in trn_keys:
    data = env[name]['long_raw_price']
    data_idx = data[(data['ds'] >= trn_dt_range[0]) & (data['ds'] <= trn_dt_range[1])]
    trn_env[name] = data_idx

for name in tst_keys:
    data = env[name]['long_raw_price']
    data_idx = data[(data['ds'] >= tst_dt_range[0]) & (data['ds'] <= tst_dt_range[1])]
    tst_env[name] = data_idx


In [None]:
fig, ax = plt.subplots()

ax.plot(env['DJI']['long_df']['ds'], env['DJI']['long_df']['y'], label='close')

ax.set_xlabel('Time')
ax.set_ylabel('Y Value')

# Add legend to distinguish between different values
ax.legend()

# Rotate and align x-axis labels
fig.autofmt_xdate()

# Adjust layout to prevent overlapping labels
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
model = TimesNet(h = n_prediction, # Forecast horizon
                 input_size = window_size, # Length of Batches
                 batch_size = 1, # Number of timeseries to predict
                 #futr_exog_list = remaining_columns,
                 hidden_size = 128, # Size of embedding for embedding and encoders,
                 dropout = 0.40, # Dropout for embeddings
                 conv_hidden_size = 3, # Channels for the inception block
                 top_k = 5, # Top num of periods from FFT considered
                 num_kernels = 13, # number of kernels for the inception block
                 encoder_layers = 3, # num of encoders
                 max_steps = 1000, # of training steps
                 early_stop_patience_steps = 10, #early stoppage on validation
                 val_check_steps = 100, # Val check every X steps,
                 windows_batch_size = 150, # Number of windows in training epoch,
                 num_workers_loader=7``
                 learning_rate = 0.0003,
                 random_seed = RANDOM_SEED)

c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\utilities\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
Seed set to 42


In [None]:
import_model = True

if import_model:
# Define the correct path
    model_path = os.path.join(os.getcwd(), 'gen_data', 'timesnet')

    # Ensure the directory and file exist
    if os.path.exists(model_path):
        nf = NeuralForecast.load(path=model_path)
    else:
        raise FileNotFoundError(f"Model path {model_path} does not exist.")
else:
  
  nf = NeuralForecast(models=[model], freq='d')
  
  for key in trn_keys:
    nf.fit(df=trn_env[key],val_size=0.2,)

  nf.save(path='./checkpoints/test_run/',
          model_index=None,
          overwrite=True,
          save_dataset=True)

c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\utilities\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
Seed set to 42


In [107]:
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING) # Disables printouts about GPU/TPU each time model.predict() is called
columns = ['date', 'price', '1d','2d','3d','4d','5d']
df = pd.DataFrame(columns=columns)
for window in range(1,10):

    today_date = env['FORD']['rw_long_raw_price'][window]['ds'].iloc[-1]
    today_price = list(itertools.chain.from_iterable(env['FORD']['rw_raw_price_env'][window]))[-1] # Flattens List of Lists of prices, final price = current price
    test = nf.predict(df=env['FORD']['rw_long_raw_price'][window])
    today_prediction = test['TimesNet'].to_list()
    row_to_add = {'date': today_date, 
                'price': today_price,
                '1d':today_prediction[0],
                '2d':today_prediction[1],
                '3d':today_prediction[2],
                '4d':today_prediction[3],
                '5d':today_prediction[4]}
    # Convert the row dictionary to a DataFrame
    row_df = pd.DataFrame([row_to_add])

    # Add the row to the DataFrame using pd.concat()
    df = pd.concat([df, row_df], ignore_index=True)


df['1d'] = df['1d'].shift(1)
df['2d'] = df['2d'].shift(2)
df['3d'] = df['3d'].shift(3)
df['4d'] = df['4d'].shift(4)
df['5d'] = df['5d'].shift(5)

display(df)



  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  df = pd.concat([df, row_df], ignore_index=True)
  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.temporal = torch.tensor(temporal, dtype=torch.float)
c:\Users\beckm\anaconda3\envs\MADDQN\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

Unnamed: 0,date,price,1d,2d,3d,4d,5d
0,2007-02-13,8.45,,,,,
1,2007-02-14,8.51,8.451353,,,,
2,2007-02-15,8.6,8.497956,8.455006,,,
3,2007-02-16,8.53,8.627429,8.518083,8.386785,,
4,2007-02-20,8.65,8.598877,8.616117,8.516963,8.330503,
5,2007-02-21,8.47,8.626576,8.598941,8.603215,8.525692,8.383846
6,2007-02-22,8.35,8.511107,8.640761,8.564103,8.573662,8.456856
7,2007-02-23,8.3,8.367944,8.473336,8.651675,8.507615,8.532435
8,2007-02-26,8.26,8.335512,8.398152,8.512602,8.644951,8.547848


In [None]:
for key in tst_keys:
  results = nf2.predict(df=tst_env[key])
  test_results[key] = results
  test_results[key].to_csv(f'{key')


test = nf2.predict(df=test_dataloader[0]['y'])
print(test,tst_env['GOOGL'].iloc[20:25])



In [None]:
display(test_results['AAPL'])

fig, ax = plt.subplots()

ax.plot(tst_env['AAPL']['ds'], tst_env['AAPL']['y'], label='actual')
ax.plot(test_results['AAPL']['ds'], test_results['AAPL']['TimesNet'], label='TimesNet')


ax.legend(loc='best')
# Set the x-axis limit
#plt.xlim(tst_dt_range[0], tst_dt_range[1])  # Adjust the limits as needed

# Show the plot
plt.show()

fig.autofmt_xdate()
plt.tight_layout()