In [15]:
import pandas as pd
prices = pd.read_csv("all_stocks.csv")
prices.rename(columns={"Date":"ds", "Ticker":"unique_id", "Close":"y"}, inplace=True)
prices["ds"] = prices["ds"].str[:10] # remove time
prices["ds"] = pd.to_datetime(prices["ds"])
prices = prices[["ds", "y", "unique_id","12-day EMA", "26-day EMA", "MACD", "Signal Line", "MACD Histogram", "RSI", "OBV"]]
prices.dropna(inplace=True)

In [17]:
# Train / Test split
train_size = 0.8  # 80% train, 20% test
def train_test_split(group):
    split_index = int(len(group) * train_size)
    return group.iloc[:split_index], group.iloc[split_index:]

# Apply the split to each unique stock (grouped by 'unique_id')
train_data, test_data = zip(*prices.groupby("unique_id").apply(train_test_split))

# Convert tuple results to DataFrames
df_train = pd.concat(train_data).reset_index(drop=True)
df_test = pd.concat(test_data).reset_index(drop=True)

  train_data, test_data = zip(*prices.groupby("unique_id").apply(train_test_split))


In [18]:
from neuralforecast import NeuralForecast
from neuralforecast.models import TimeLLM

input_sizes = [10,15,30,60,90] #historical window
#input_sizes = [10] #historical window

prompt_prefix = "The dataset contains closing stock prices and its supporting technical indicators: MACD, OBV, RSI."

for interval in input_sizes:
    timellm = TimeLLM(h=100,
                    input_size=interval,
                    prompt_prefix=prompt_prefix,
                    batch_size=16,
                    valid_batch_size=16,
                    max_steps=50,
                    windows_batch_size=16)

    nf = NeuralForecast(
        models=[timellm],
        freq='D'
    )
    print(f"Training for interval: {interval}")
    nf.fit(df=df_train)
    
    #test_df_future = nf.make_future_dataframe(df_train)
    forecasts = nf.predict(df_train)
    print(f"Reults for interval: {interval}")
    print(forecasts)
    test_results = pd.merge(df_test, forecasts, on=["ds", "unique_id"], how="inner") #LSTM (=predicted), y (=actual)
    test_results.to_csv(f"res_llmtime_{interval}.csv")


Seed set to 1


Using openai-community/gpt2 as default.
Successfully loaded model: openai-community/gpt2


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name                | Type               | Params | Mode 
-------------------------------------------------------------------
0 | loss                | MAE                | 0      | train
1 | padder_train        | ConstantPad1d      | 0      | train
2 | scaler              | TemporalNorm       | 0      | train
3 | llm                 | GPT2Model          | 124 M  | eval 
4 | patch_embedding     | PatchEmbedding     | 1.5 K  | train
5 | mapping_layer       | Linear             | 51.5 M | train
6 | reprogramming_layer | ReprogrammingLayer | 2.4 M  | train
7 | output_projection   | FlattenHead        | 12.9 K | train
8 | normalize_layers    | RevIN              | 0      | train
-------------------------------------------------------------------
53.9 M    Trainable params
124 M     Non-trainable params
178 M     Total params
713.257   Total estimated model params size (MB)
20

Training for interval: 10


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1


Reults for interval: 10
    unique_id         ds     TimeLLM
0        AAPL 2023-02-18  150.956696
1        AAPL 2023-02-19  152.011307
2        AAPL 2023-02-20  150.880310
3        AAPL 2023-02-21  152.409103
4        AAPL 2023-02-22  152.172974
..        ...        ...         ...
495      TSLA 2023-05-24  198.349747
496      TSLA 2023-05-25  205.785004
497      TSLA 2023-05-26  193.939133
498      TSLA 2023-05-27  202.669556
499      TSLA 2023-05-28  194.302765

[500 rows x 3 columns]
Using openai-community/gpt2 as default.
Successfully loaded model: openai-community/gpt2


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name                | Type               | Params | Mode 
-------------------------------------------------------------------
0 | loss                | MAE                | 0      | train
1 | padder_train        | ConstantPad1d      | 0      | train
2 | scaler              | TemporalNorm       | 0      | train
3 | llm                 | GPT2Model          | 124 M  | eval 
4 | patch_embedding     | PatchEmbedding     | 1.5 K  | train
5 | mapping_layer       | Linear             | 51.5 M | train
6 | reprogramming_layer | ReprogrammingLayer | 2.4 M  | train
7 | output_projection   | FlattenHead        | 12.9 K | train
8 | normalize_layers    | RevIN              | 0      | train
-------------------------------------------------------------------
53.9 M    Trainable params
124 M     Non-trainable params
178 M     Total params
713.257   Total estimated model params size (MB)
20

Training for interval: 15


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1


Reults for interval: 15
    unique_id         ds     TimeLLM
0        AAPL 2023-02-18  147.331741
1        AAPL 2023-02-19  152.266678
2        AAPL 2023-02-20  150.062012
3        AAPL 2023-02-21  150.435867
4        AAPL 2023-02-22  150.889191
..        ...        ...         ...
495      TSLA 2023-05-24  193.336960
496      TSLA 2023-05-25  188.810898
497      TSLA 2023-05-26  194.615128
498      TSLA 2023-05-27  200.477585
499      TSLA 2023-05-28  195.497589

[500 rows x 3 columns]
Using openai-community/gpt2 as default.
Successfully loaded model: openai-community/gpt2


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name                | Type               | Params | Mode 
-------------------------------------------------------------------
0 | loss                | MAE                | 0      | train
1 | padder_train        | ConstantPad1d      | 0      | train
2 | scaler              | TemporalNorm       | 0      | train
3 | llm                 | GPT2Model          | 124 M  | eval 
4 | patch_embedding     | PatchEmbedding     | 1.5 K  | train
5 | mapping_layer       | Linear             | 51.5 M | train
6 | reprogramming_layer | ReprogrammingLayer | 2.4 M  | train
7 | output_projection   | FlattenHead        | 38.5 K | train
8 | normalize_layers    | RevIN              | 0      | train
-------------------------------------------------------------------
53.9 M    Trainable params
124 M     Non-trainable params
178 M     Total params
713.360   Total estimated model params size (MB)
20

Training for interval: 30


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1


Reults for interval: 30
    unique_id         ds     TimeLLM
0        AAPL 2023-02-18  141.087646
1        AAPL 2023-02-19  137.796341
2        AAPL 2023-02-20  148.141922
3        AAPL 2023-02-21  143.490234
4        AAPL 2023-02-22  150.099777
..        ...        ...         ...
495      TSLA 2023-05-24  196.331696
496      TSLA 2023-05-25  199.137054
497      TSLA 2023-05-26  162.738617
498      TSLA 2023-05-27  177.739746
499      TSLA 2023-05-28  188.141907

[500 rows x 3 columns]
Using openai-community/gpt2 as default.
Successfully loaded model: openai-community/gpt2


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name                | Type               | Params | Mode 
-------------------------------------------------------------------
0 | loss                | MAE                | 0      | train
1 | padder_train        | ConstantPad1d      | 0      | train
2 | scaler              | TemporalNorm       | 0      | train
3 | llm                 | GPT2Model          | 124 M  | eval 
4 | patch_embedding     | PatchEmbedding     | 1.5 K  | train
5 | mapping_layer       | Linear             | 51.5 M | train
6 | reprogramming_layer | ReprogrammingLayer | 2.4 M  | train
7 | output_projection   | FlattenHead        | 89.7 K | train
8 | normalize_layers    | RevIN              | 0      | train
-------------------------------------------------------------------
54.0 M    Trainable params
124 M     Non-trainable params
178 M     Total params
713.565   Total estimated model params size (MB)
20

Training for interval: 60


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]


































































































































































































































































































































































Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1


Reults for interval: 60
    unique_id         ds     TimeLLM
0        AAPL 2023-02-18  128.658249
1        AAPL 2023-02-19  135.896149
2        AAPL 2023-02-20  135.802551
3        AAPL 2023-02-21  147.042313
4        AAPL 2023-02-22  151.980972
..        ...        ...         ...
495      TSLA 2023-05-24  142.618759
496      TSLA 2023-05-25  177.101425
497      TSLA 2023-05-26  170.164261
498      TSLA 2023-05-27  170.665253
499      TSLA 2023-05-28  134.821243

[500 rows x 3 columns]
Using openai-community/gpt2 as default.
Successfully loaded model: openai-community/gpt2


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name                | Type               | Params | Mode 
-------------------------------------------------------------------
0 | loss                | MAE                | 0      | train
1 | padder_train        | ConstantPad1d      | 0      | train
2 | scaler              | TemporalNorm       | 0      | train
3 | llm                 | GPT2Model          | 124 M  | eval 
4 | patch_embedding     | PatchEmbedding     | 1.5 K  | train
5 | mapping_layer       | Linear             | 51.5 M | train
6 | reprogramming_layer | ReprogrammingLayer | 2.4 M  | train
7 | output_projection   | FlattenHead        | 140 K  | train
8 | normalize_layers    | RevIN              | 0      | train
-------------------------------------------------------------------
54.0 M    Trainable params
124 M     Non-trainable params
178 M     Total params
713.769   Total estimated model params size (MB)
20

Training for interval: 90


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=50` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Reults for interval: 90
    unique_id         ds     TimeLLM
0        AAPL 2023-02-18  146.046051
1        AAPL 2023-02-19  141.594269
2        AAPL 2023-02-20  143.290802
3        AAPL 2023-02-21  146.852692
4        AAPL 2023-02-22  144.115921
..        ...        ...         ...
495      TSLA 2023-05-24  139.532669
496      TSLA 2023-05-25  203.212784
497      TSLA 2023-05-26  170.545776
498      TSLA 2023-05-27  215.745560
499      TSLA 2023-05-28  196.658554

[500 rows x 3 columns]


In [2]:
import numpy as np
import pandas as pd

input_sizes = [10,15,30,60,90]

# Define function for MAE calculation
def calculate_mae(y_true, y_pred):
    return np.mean(np.abs(np.array(y_true) - np.array(y_pred)))

# Define function for RMSE calculation
def calculate_rmse(y_true, y_pred):
    return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))

# Define function for RMAE calculation
def calculate_rmae(mae, avg_price):
    return (mae / avg_price) * 100 if avg_price != 0 else np.nan

# List to store per-stock results
detailed_results = []

# Iterate over each interval
for interval in input_sizes:
    df = pd.read_csv(f"TIMELLM_RESULTS_PER_STOCK/res_llmtime_{interval}.csv")

    # Calculate RMSE, MAE, and RMAE for each unique_id (each stock separately)
    grouped_results = df.groupby("unique_id").apply(lambda group: {
        "unique_id": group["unique_id"].iloc[0],
        "Interval": interval,
        "MAE": calculate_mae(group["y"], group["TimeLLM"]),
        "RMSE": calculate_rmse(group["y"], group["TimeLLM"]),
        "RMAE": calculate_rmae(
            calculate_mae(group["y"], group["TimeLLM"]),
            np.mean(group["y"])  # Average price in the interval
        )
    }).tolist()  # Convert to list of dicts

    # Append per-stock results
    detailed_results.extend(grouped_results)

# Save results as DataFrame
detailed_results_df = pd.DataFrame(detailed_results)
detailed_results_df.to_csv("results_timellm.csv", index=False)


  grouped_results = df.groupby("unique_id").apply(lambda group: {
  grouped_results = df.groupby("unique_id").apply(lambda group: {
  grouped_results = df.groupby("unique_id").apply(lambda group: {
  grouped_results = df.groupby("unique_id").apply(lambda group: {
  grouped_results = df.groupby("unique_id").apply(lambda group: {
