### 1. Install Dependencies

In [1]:
!pip install pandas gdown huggingface-hub numpy matplotlib scikit-learn transformers torch tqdm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

### 2. Imports

In [3]:
import os
import json
import sys
import argparse
import re
from typing import List, Union
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from google.colab import drive
from datetime import datetime

from abc import ABC, abstractmethod

import torch
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [4]:
torch.cuda.is_available()

True

### 3. Connect to Google Drive

In [5]:
drive.mount("/content/drive",force_remount=True)
os.chdir("/content/drive/My Drive")

Mounted at /content/drive


### 4. Utils Code

In [6]:
def format_time_difference(seconds):
    minutes = seconds // 60
    hours = minutes // 60
    days = hours // 24

    if days > 0:
        if hours % 24 > 0.1:
            return f"{days} days-{hours % 24} hours"
        else:
            return f"{days} days"
    elif hours > 0:
        if minutes % 60 > 0.1:
            return f"{hours} hours-{minutes % 60} minutes"
        else:
            return f"{hours} hours"
    elif minutes > 0:
        if seconds % 60 > 0.1:
            return f"{minutes} minutes-{seconds % 60} seconds"
        else:
            return f"{minutes} minutes"
    else:
        return f"{seconds} seconds"

def save_to_json(data, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    with open(save_path, "w") as f:
        json.dump(data, f, indent=4)

def plot_series(filename, input_ts, output_ts, predicted_ts, save_folder):
    plt.figure(figsize=(10, 5))
    plt.plot(range(len(input_ts)), input_ts, label="Input Time Series", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(output_ts)), output_ts, label="Ground Truth", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(predicted_ts)), predicted_ts, label="Predicted", linestyle='dashed')
    plt.legend()
    plt.title(f"Prediction for {filename}")
    plt.xlabel("Time Steps")
    plt.ylabel("Value")
    plt.grid()

    base_name = os.path.splitext(filename)[0]
    save_path = os.path.join(save_folder, base_name + ".png")
    plt.savefig(save_path)

    plt.close()

def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)

    # Avoid division by zero
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100


def calculate_acc(result_list, regrouped_labels = None):
    if regrouped_labels is None:
        correct_pred = sum(1 for result in result_list if result["ground_truth"] in result["predict"])
    else:
        correct_pred = 0
        for result in result_list:
            gt_group = regrouped_labels[result['ground_truth']]
            for original_label in regrouped_labels.keys():
                if original_label in result['predict']:
                    predict_group = regrouped_labels[original_label]
                    if gt_group == predict_group:
                        correct_pred += 1
                        break

    total_pred = len(result_list)
    accuracy = correct_pred / total_pred

    return accuracy


def calculate_correlation_acc(result_list):
    model_predictions = {"total": 0, "exact_correct": 0, "brief_correct": 0}
    positive_correlations = ["Strong Positive Correlation", "Moderate Positive Correlation"]
    negative_correlations = ["Strong Negative Correlation", "Moderate Negative Correlation"]
    for result in result_list:
        prediction = result["predict"].strip()
        model_predictions["total"] += 1
        if prediction == result["ground_truth"]:
            model_predictions["exact_correct"] += 1

        # Brief accuracy
        pred_is_positive = prediction in positive_correlations
        pred_is_negative = prediction in negative_correlations
        truth_is_positive = result["ground_truth"] in positive_correlations
        truth_is_negative = result["ground_truth"] in negative_correlations

        if (pred_is_positive and truth_is_positive) or \
            (pred_is_negative and truth_is_negative) or \
            (prediction == result["ground_truth"]):
            model_predictions["brief_correct"] += 1

    # Calculate and format results
    total = model_predictions["total"]
    exact_accuracy = (model_predictions["exact_correct"] / total) * 100
    brief_accuracy = (model_predictions["brief_correct"] / total) * 100

    metric_results = {
        "exact_accuracy": f"{round(exact_accuracy, 2)}%",
        "brief_accuracy": f"{round(brief_accuracy, 2)}%",
        "total_samples": total
    }
    return metric_results


def calculate_mcqa_acc(result_list):
    correct = 0
    total = 0
    for result in result_list:
        predition = result["predict"].strip()
        predition = predition[0].upper()
        if predition == result["ground_truth"]:
            correct += 1

        total += 1

    accuracy = correct / total

    return accuracy * 100

### 5. Models Code

In [7]:
class BaseModel(ABC):
    @abstractmethod
    def inference(self, content: str) -> str:
        """
        Run inference on a given input prompt and return the generated output.
        """
        pass

In [8]:
class DeepSeekModel(BaseModel):
    def __init__(self, model_name: str = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", **kwargs):

        # The model is set in eval mode by default by using eval()
        # See: https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            device_map="auto",
            **kwargs
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        chat_prompt = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        tokenized_input = self.tokenizer([chat_prompt], return_tensors="pt").to(self.model.device)
        generated_output = self.model.generate(
            **tokenized_input,
            max_new_tokens=4096,
        )
        output_ids = generated_output[0][len(tokenized_input.input_ids[0]):].tolist()

        # parsing thinking content
        try:
            # rindex finding 151649 (</think>)
            index = len(output_ids) - output_ids[::-1].index(151649)
        except ValueError:
            index = 0
        outputs = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

        return outputs

In [None]:
class LLaMAModel(BaseModel):
    def __init__(self, model_name: str = "meta-llama/Llama-3.2-1B-Instruct", **kwargs):
        self.pipeline = pipeline(
            "text-generation",
            model=model_name,
            torch_dtype="auto",
            device_map="auto",
            token="",
            **kwargs
        )

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        outputs = self.pipeline(messages, max_new_tokens=1024)

        return outputs[0]["generated_text"][-1]["content"]

In [10]:
class ModelFactory:
    def __init__(self, config: dict):
        self.config = config

    @staticmethod
    def get_model(model_type: str, model_name: str, **kwargs) -> BaseModel:
        if model_type == "deepseek":
            return DeepSeekModel(model_name=model_name, **kwargs)
        elif model_type == "llama":
            return LLaMAModel(model_name=model_name, **kwargs)
        else:
            raise ValueError(f"Unsupported model type: {model_type}")

### 6. Experiment Code --

In [11]:
def finance_mse_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:
    """
    Generates a meta-prompt for hypothetical stock price trend analysis
    based on given inputs.

    Args:
        text (str): News article content within the input time series range.
        prices (List[float]): Historical stock prices.
        start_datetime (str): Start datetime of the input time series.
        end_datetime (str): End datetime of the input time series.
        pred_end_datetime (str): End datetime of the hypothetical projection.
        granularity (str): Granularity of the input time series (e.g., daily, hourly).
        prediction_length (int): Number of future time steps to estimate.
        mode (str): Mode of estimation ("timeseries_only", "text_only", "combined").

    Returns:
        str: Meta-prompt for ChatGPT.
    """
    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to conduct a research-based timeseries estimation for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices and extrapolate potential movements. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated values in a structured format as a  list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def finance_macd_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future Moving Average Convergence Divergence (MACD) values for the next {prediction_length} time steps "
        f"based on provided historical timeseries movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant text sentiment analysis "
            f"The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your predicted MACD values in a structured format as a list of float numbers. Please predict the real possible values, do not use the naive linear extrapolation or similar methods"
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of predicted values should be exactly {prediction_length}. "
    )

    return prompt

def finance_bb_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future upper Bollinger Band (BB) values  for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated upper Bollinger Band (BB) values values in a structured format as a list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def parse_val_prediction_response(response: str) -> Union[List[float], None]:
    """
    Decodes the predicted prices from a response string.

    Args:
        response (str): The response containing the predicted prices.

    Returns:
        List[float]: A list of float numbers extracted from the response.
        None: If extraction fails.
    """
    match = re.search(r"Predicted Prices:\s*([-\d.,\s]+)", response)

    if match:
        try:
            price_list = [float(value) for value in match.group(1).split(',')]
            return price_list
        except ValueError:
            pass  # If conversion fails, try another approach

    # Alternative approach: Find all potential numbers in the response
    possible_numbers = re.findall(r"-?\d+\.\d+", response)
    if possible_numbers:
        try:
            return [float(num) for num in possible_numbers]
        except ValueError:
            pass  # If conversion fails, return None

    return None  # Return None if extraction fails

def finance_classification_metaprompt_generation(text=None, timestamps=None, prices=None, mode=None):
    time_series_data = ", ".join([f"{price}" for price in  prices])

    if mode == "combined":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend (rise, neutral, or fall) based on the following inputs:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            2. **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Task:
            Analyze the provided time-series data and news to identify future trends of the stock performance. Ensure that the news data is used to supplement the insights from the time-series analysis, focusing on combining both inputs for a more accurate prediction.

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            please think step-by-step and briefly explain how the combination of time-series data and news data led to the prediction;
            then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "text_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the news semantics to identify trends and patterns that could impact stock performance.
            Then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "timeseries_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the provided time-series data to identify trends and patterns that could impact stock performance. Focus solely on the time-series data for making predictions.
             then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    return meta_prompt

def parse_cls_response(answer):
    try:
        return  re.findall(r'\^\^\^(.*?)\^\^\^', answer)[-1]
    except:
        return  re.findall(r'\^+(.*?)\^+', answer)[-1]



def finance_correlation_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news):

    time_interval = "1 hour" if setting == "long" else "5 minutes"

    if setting == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 7 days and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    else:
        system_prompt = "You are an expert in finance and stock market analysis. Based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 1 day and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    question = "Return your answer in one of the following without any other words: Strong Positive Correlation, Moderate Positive Correlation, No Correlation, Moderate Negative Correlation, Strong Negative Correlation."
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            {question} Answer:"
    prompt = f"{system_prompt}\n\n{query}"

    return prompt




def finance_mcqa_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news, question):
    time_interval = "1 hour" if setting == "long" else "5 minutes"
    if setting  == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    else:
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            Question: {question}. Give your answer in the letter (A, B, C, or D) without any other words. Answer:"
    prompt = f"{system_prompt}\n\n{query}"
    return prompt

In [12]:
# import sys

# sys.argv = [
#     "script_name",  # Placeholder for script name (ignored by argparse)
#     "--dataset_folder", "./MTBench-Test/MTBench_finance_QA_short",
#     "--save_path", "./MTBench-Test/llama/qa_short",
#     "--model_type", "llama",
#     "--model", "meta-llama/Llama-3.2-1B-Instruct",
#     "--setting", "short"
# ]

import sys

sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--dataset_path", "./MTBench-Test/MTBench_finance_aligned_pairs_short/train-00000-of-00001.parquet",
    "--save_path", "./MTBench-Test/llama/tsforecast_short_combined",
    "--model_type", "llama",
    "--model", "meta-llama/Llama-3.2-1B-Instruct",
    "--mode", "combined",
    "--indicator", "time"
]

In [13]:
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path", type=str, help="path to the dataset")
parser.add_argument("--save_path", type=str, help="path to save the results")
parser.add_argument("--indicator", default="macd", type=str, help="macd, bb, or time")
parser.add_argument("--model_type",  type=str, help="deepseek or llama")
parser.add_argument("--model",  type=str, help="model name")
parser.add_argument(
    "--mode",
    type=str,
    default="combined",
    help="choose from timeseries_only, text_only, combined",
)
args = parser.parse_args()

save_path = Path(args.save_path)
details_path = save_path / "output_details"
visualizations_path = save_path / "visualizations"
details_path.mkdir(parents=True, exist_ok=True)
visualizations_path.mkdir(parents=True, exist_ok=True)

data_list = []
df = pd.read_parquet(args.dataset_path)
filename = Path(args.dataset_path).name

df["text"] = df["text"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
df["technical"] = df["technical"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)

for col in ["input_window", "output_window", "input_timestamps"]:
    df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)

for _, row in df.iterrows():
    text = row["text"]
    technical = row["technical"]

    extracted_data = {
        "filename": filename,
        "input_window": row["input_window"],
        "output_window": row["output_window"],
        "text": text["content"],
        "input_timestamps": row["input_timestamps"],
        "in_macd": technical.get("in_macd"),
        "out_macd": technical.get("out_macd"),
        "in_upper_bb": technical.get("in_upper_bb"),
        "out_upper_bb": technical.get("out_upper_bb"),
    }

    data_list.append(extracted_data)
data_list = data_list[:100]

model = ModelFactory.get_model(model_type=args.model_type, model_name=args.model)

result_list = []
tot_samples = len(data_list)
print(f"Evaluating {tot_samples} samples...")

epoch_results = []
cumulative_mse, cumulative_mae, cumulative_rmse, cumulative_mape = [], [], [], []
for idx, sample in tqdm(enumerate(data_list), total=tot_samples):
    try:
        datetime_list = [
            datetime.fromtimestamp(s).strftime("%Y-%m-%d %H:%M:%S")
            for s in sample["input_timestamps"]
        ]
        text = sample["text"]
        input_ts = sample["input_window"]
        granularity_string = format_time_difference(
            sample["input_timestamps"][1] - sample["input_timestamps"][0]
        )

        if args.indicator == "macd":
            output_ts = sample["out_macd"]
            designed_prompt = finance_macd_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "bb":
            output_ts = sample["out_upper_bb"]
            designed_prompt = finance_bb_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "time":
            output_ts = sample["output_window"]
            designed_prompt = finance_mse_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )

        answer = model.inference(designed_prompt)
        answer = answer.strip().replace('"', '')

        predict_ts = parse_val_prediction_response(answer)
        predict_ts_orig = predict_ts
        predict_ts = np.interp( # type: ignore
            np.linspace(0, 1, len(output_ts)),
            np.linspace(0, 1, len(predict_ts)),
            predict_ts
        )

        res = {
            "filename": sample["filename"],
            "response": answer,
            "ground_truth": output_ts,
            "predict": predict_ts.tolist(),
        }
        result_list.append(res)

        save_to_json(res, details_path / sample["filename"])

        if args.indicator == "macd":
            first_half = sample["in_macd"]
        elif args.indicator == "bb":
            first_half = sample["in_upper_bb"]
        elif args.indicator == "time":
            first_half = sample["input_window"]
        plot_series(sample["filename"], first_half, output_ts, predict_ts_orig, visualizations_path)

        mse = np.mean((np.array(output_ts) - np.array(predict_ts)) ** 2)
        mae = np.mean(np.abs(np.array(output_ts) - np.array(predict_ts)))
        rmse = np.sqrt(mse)
        mape = calculate_mape(output_ts, predict_ts)

        if args.indicator == "macd" and mse > 10:
            print(f"{sample['filename']} failed mse", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "time" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "bb" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        cumulative_mse.append(mse)
        cumulative_mae.append(mae)
        cumulative_rmse.append(rmse)
        cumulative_mape.append(mape)

        epoch_results.append({
            "filename": sample["filename"],
            "epoch": idx + 1,
            "mse": mse,
            "mae": mae,
            "rmse": rmse,
            "mape": mape,
            "mean_mse": np.mean(cumulative_mse),
            "mean_mae": np.mean(cumulative_mae),
            "mean_rmse": np.mean(cumulative_rmse),
            "mean_mape": np.mean(cumulative_mape),
        })
        save_to_json(epoch_results, f"{save_path}/epoch_results.json")
        print(
            "{}/{}: mse: {:.4f}, mae: {:.4f}, rmse: {:.4f}".format(
                idx, tot_samples, mse, mae, rmse
            )
        )
    except Exception as e:
        print(f"Skipping {idx} due to error: {e}")


summary = {
    "total_samples": len(result_list),
    "mse": np.mean(cumulative_mse),
    "mae": np.mean(cumulative_mae),
    "rmse": np.mean(cumulative_rmse),
    "mape": np.mean(cumulative_mape),
}

save_to_json(summary, f"{save_path}/final_results.json")
print(f"Processing complete. Results saved to {save_path}/final_results.json")

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cuda:0


Evaluating 100 samples...


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/100 [00:47<1:19:03, 47.92s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


0/100: mse: 0.8317, mae: 0.8381, rmse: 0.9120


  2%|▏         | 2/100 [01:24<1:07:45, 41.48s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  132.52623223535713


  3%|▎         | 3/100 [02:03<1:04:55, 40.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


2/100: mse: 0.0040, mae: 0.0533, rmse: 0.0633


  4%|▍         | 4/100 [02:44<1:04:42, 40.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


3/100: mse: 82.8958, mae: 9.0557, rmse: 9.1047


  5%|▌         | 5/100 [03:12<57:03, 36.03s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  10495.158785119746


  6%|▌         | 6/100 [03:53<59:01, 37.68s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


5/100: mse: 0.7595, mae: 0.4941, rmse: 0.8715


  7%|▋         | 7/100 [04:30<57:50, 37.32s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


6/100: mse: 46.8875, mae: 6.8222, rmse: 6.8474


  8%|▊         | 8/100 [05:08<57:40, 37.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1346.8874377098887


  9%|▉         | 9/100 [05:34<51:36, 34.03s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 8 due to error: object of type 'NoneType' has no len()


 10%|█         | 10/100 [06:18<55:36, 37.07s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


9/100: mse: 9.6815, mae: 3.0996, rmse: 3.1115


 11%|█         | 11/100 [06:59<57:05, 38.49s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  5182.589717534157


 12%|█▏        | 12/100 [07:43<58:39, 39.99s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


11/100: mse: 28.5255, mae: 5.2523, rmse: 5.3409


 13%|█▎        | 13/100 [08:04<49:44, 34.31s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1417.5239369505473


 14%|█▍        | 14/100 [08:45<52:11, 36.41s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3506.452420898866


 15%|█▌        | 15/100 [09:23<52:16, 36.90s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


14/100: mse: 12.8349, mae: 3.5248, rmse: 3.5826


 16%|█▌        | 16/100 [10:05<53:46, 38.41s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


15/100: mse: 0.0486, mae: 0.1897, rmse: 0.2204


 17%|█▋        | 17/100 [10:32<48:07, 34.79s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


16/100: mse: 2.1516, mae: 1.4606, rmse: 1.4668


 18%|█▊        | 18/100 [11:13<50:21, 36.85s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 17 due to error: object of type 'NoneType' has no len()


 19%|█▉        | 19/100 [11:58<52:52, 39.17s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


18/100: mse: 0.1794, mae: 0.3091, rmse: 0.4235


 20%|██        | 20/100 [12:39<53:03, 39.79s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


19/100: mse: 8.2179, mae: 2.8434, rmse: 2.8667


 21%|██        | 21/100 [13:02<45:43, 34.72s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


20/100: mse: 29.4599, mae: 5.4089, rmse: 5.4277


 22%|██▏       | 22/100 [13:45<48:13, 37.09s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


21/100: mse: 4.7735, mae: 2.1715, rmse: 2.1848


 23%|██▎       | 23/100 [14:11<43:22, 33.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  8553.50335267577


 24%|██▍       | 24/100 [14:56<47:17, 37.34s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


23/100: mse: 0.4569, mae: 0.5790, rmse: 0.6759


 25%|██▌       | 25/100 [15:36<47:25, 37.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


24/100: mse: 19.0573, mae: 3.5000, rmse: 4.3655


 26%|██▌       | 26/100 [16:16<47:31, 38.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


25/100: mse: 19.7223, mae: 4.3328, rmse: 4.4410


 27%|██▋       | 27/100 [17:00<48:49, 40.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


26/100: mse: 2.6782, mae: 1.6178, rmse: 1.6365


 28%|██▊       | 28/100 [17:40<48:06, 40.09s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


27/100: mse: 10.2812, mae: 3.1997, rmse: 3.2064


 29%|██▉       | 29/100 [18:17<46:32, 39.33s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


28/100: mse: 89.7031, mae: 7.7857, rmse: 9.4712


 30%|███       | 30/100 [18:58<46:32, 39.89s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


29/100: mse: 0.4823, mae: 0.6705, rmse: 0.6944


 31%|███       | 31/100 [19:38<45:44, 39.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


30/100: mse: 4.2377, mae: 0.4316, rmse: 2.0586


 32%|███▏      | 32/100 [20:17<44:47, 39.52s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


31/100: mse: 1.9357, mae: 1.1872, rmse: 1.3913


 33%|███▎      | 33/100 [20:39<38:21, 34.34s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


32/100: mse: 3.9314, mae: 1.9817, rmse: 1.9828


 34%|███▍      | 34/100 [21:23<40:53, 37.17s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


33/100: mse: 8.9523, mae: 2.4831, rmse: 2.9920


 35%|███▌      | 35/100 [22:05<41:59, 38.77s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


34/100: mse: 1.9321, mae: 1.3900, rmse: 1.3900


 36%|███▌      | 36/100 [22:47<42:17, 39.65s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2835.5907164452246


 37%|███▋      | 37/100 [23:25<41:01, 39.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  195.15888538841313


 38%|███▊      | 38/100 [24:04<40:28, 39.17s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


37/100: mse: 2.2664, mae: 1.3617, rmse: 1.5055


 39%|███▉      | 39/100 [24:43<39:48, 39.15s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


38/100: mse: 8.7594, mae: 2.9524, rmse: 2.9596


 40%|████      | 40/100 [25:23<39:25, 39.42s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


39/100: mse: 26.9293, mae: 5.0132, rmse: 5.1893


 41%|████      | 41/100 [26:01<38:20, 38.99s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


40/100: mse: 2.0365, mae: 1.4159, rmse: 1.4271


 42%|████▏     | 42/100 [26:28<34:17, 35.47s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


41/100: mse: 1.5371, mae: 0.9071, rmse: 1.2398


 43%|████▎     | 43/100 [27:10<35:22, 37.23s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  104.30171484950066


 44%|████▍     | 44/100 [27:53<36:17, 38.88s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


43/100: mse: 25.1894, mae: 1.3744, rmse: 5.0189


 45%|████▌     | 45/100 [28:25<33:52, 36.95s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


44/100: mse: 9.8698, mae: 2.7742, rmse: 3.1416


 46%|████▌     | 46/100 [29:07<34:35, 38.43s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


45/100: mse: 2.1610, mae: 1.1522, rmse: 1.4700


 47%|████▋     | 47/100 [29:45<33:55, 38.42s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1587.1586423675144


 48%|████▊     | 48/100 [30:23<33:05, 38.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1984.3325441276927


 49%|████▉     | 49/100 [31:05<33:25, 39.32s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


48/100: mse: 1.9229, mae: 1.2300, rmse: 1.3867


 50%|█████     | 50/100 [31:47<33:27, 40.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


49/100: mse: 12.1505, mae: 3.1175, rmse: 3.4858


 51%|█████     | 51/100 [32:25<32:10, 39.40s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


50/100: mse: 0.1131, mae: 0.3304, rmse: 0.3363


 52%|█████▏    | 52/100 [33:05<31:38, 39.56s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 51 due to error: object of type 'NoneType' has no len()


 53%|█████▎    | 53/100 [33:49<32:12, 41.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


52/100: mse: 0.5927, mae: 0.5584, rmse: 0.7699


 54%|█████▍    | 54/100 [34:18<28:42, 37.44s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  304.50791432435875


 55%|█████▌    | 55/100 [35:03<29:42, 39.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


54/100: mse: 11.8564, mae: 1.4658, rmse: 3.4433


 56%|█████▌    | 56/100 [35:41<28:40, 39.11s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


55/100: mse: 58.4510, mae: 6.9237, rmse: 7.6453


 57%|█████▋    | 57/100 [36:23<28:39, 39.99s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


56/100: mse: 0.0256, mae: 0.1338, rmse: 0.1601


 58%|█████▊    | 58/100 [37:08<29:06, 41.59s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


57/100: mse: 0.9845, mae: 0.9721, rmse: 0.9922


 59%|█████▉    | 59/100 [37:47<27:57, 40.91s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  509.10155821987047


 60%|██████    | 60/100 [38:25<26:39, 39.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


59/100: mse: 11.6067, mae: 3.3927, rmse: 3.4069


 61%|██████    | 61/100 [39:07<26:17, 40.44s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


60/100: mse: 1.6647, mae: 1.2777, rmse: 1.2902


 62%|██████▏   | 62/100 [39:54<26:57, 42.57s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


61/100: mse: 1.8046, mae: 1.2977, rmse: 1.3433


 63%|██████▎   | 63/100 [40:36<26:06, 42.34s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


62/100: mse: 72.6891, mae: 8.4636, rmse: 8.5258


 64%|██████▍   | 64/100 [41:15<24:43, 41.20s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 63 due to error: object of type 'NoneType' has no len()


 65%|██████▌   | 65/100 [41:24<18:26, 31.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


64/100: mse: 0.0313, mae: 0.1614, rmse: 0.1770


 66%|██████▌   | 66/100 [41:49<16:46, 29.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  10083.556132718202


 67%|██████▋   | 67/100 [42:30<18:13, 33.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


66/100: mse: 27.5249, mae: 3.7294, rmse: 5.2464


 68%|██████▊   | 68/100 [43:14<19:21, 36.29s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


67/100: mse: 0.1670, mae: 0.3812, rmse: 0.4087


 69%|██████▉   | 69/100 [43:54<19:23, 37.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


68/100: mse: 0.0390, mae: 0.1884, rmse: 0.1974


 70%|███████   | 70/100 [44:41<20:09, 40.33s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


69/100: mse: 0.0063, mae: 0.0785, rmse: 0.0796


 71%|███████   | 71/100 [45:23<19:41, 40.73s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


70/100: mse: 6.6517, mae: 1.0787, rmse: 2.5791


 72%|███████▏  | 72/100 [46:05<19:10, 41.08s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  58368.00407900641


 73%|███████▎  | 73/100 [46:29<16:12, 36.01s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


72/100: mse: 2.2786, mae: 1.4639, rmse: 1.5095


 74%|███████▍  | 74/100 [46:55<14:16, 32.96s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


73/100: mse: 0.2169, mae: 0.4113, rmse: 0.4657


 75%|███████▌  | 75/100 [47:36<14:48, 35.55s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  171.97207773575266


 76%|███████▌  | 76/100 [48:16<14:44, 36.85s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  124.81233964689665


 77%|███████▋  | 77/100 [48:56<14:25, 37.62s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3847.754491830779


 78%|███████▊  | 78/100 [49:33<13:45, 37.55s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 77 due to error: object of type 'NoneType' has no len()


 79%|███████▉  | 79/100 [50:17<13:51, 39.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


78/100: mse: 51.7988, mae: 7.0907, rmse: 7.1971


 80%|████████  | 80/100 [50:43<11:50, 35.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


79/100: mse: 3.8587, mae: 1.6143, rmse: 1.9643


 81%|████████  | 81/100 [51:25<11:48, 37.31s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


80/100: mse: 30.4505, mae: 5.5007, rmse: 5.5182


 82%|████████▏ | 82/100 [52:07<11:35, 38.63s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


81/100: mse: 6.9347, mae: 2.5974, rmse: 2.6334


 83%|████████▎ | 83/100 [52:44<10:50, 38.24s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


82/100: mse: 2.3928, mae: 1.4994, rmse: 1.5469


 84%|████████▍ | 84/100 [52:53<07:51, 29.48s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


83/100: mse: 0.5477, mae: 0.7301, rmse: 0.7401


 85%|████████▌ | 85/100 [53:18<07:04, 28.29s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


84/100: mse: 1.6107, mae: 1.1292, rmse: 1.2691


 86%|████████▌ | 86/100 [53:59<07:26, 31.86s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


85/100: mse: 0.0500, mae: 0.1798, rmse: 0.2236


 87%|████████▋ | 87/100 [54:33<07:03, 32.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2109.676266853974


 88%|████████▊ | 88/100 [55:17<07:11, 35.99s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


87/100: mse: 0.0194, mae: 0.1175, rmse: 0.1392


 89%|████████▉ | 89/100 [56:01<07:02, 38.39s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


88/100: mse: 8.4221, mae: 1.2582, rmse: 2.9021


 90%|█████████ | 90/100 [56:43<06:36, 39.63s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


89/100: mse: 0.1323, mae: 0.2969, rmse: 0.3637


 91%|█████████ | 91/100 [57:25<06:02, 40.26s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


90/100: mse: 0.1953, mae: 0.3844, rmse: 0.4420


 92%|█████████▏| 92/100 [58:06<05:22, 40.36s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


91/100: mse: 26.2574, mae: 4.5819, rmse: 5.1242


 93%|█████████▎| 93/100 [58:47<04:44, 40.70s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


92/100: mse: 0.0293, mae: 0.1319, rmse: 0.1711


 94%|█████████▍| 94/100 [59:31<04:10, 41.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


93/100: mse: 32.0261, mae: 5.6376, rmse: 5.6592


 95%|█████████▌| 95/100 [1:00:16<03:32, 42.56s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


94/100: mse: 4.7957, mae: 2.1311, rmse: 2.1899


 96%|█████████▌| 96/100 [1:01:02<02:54, 43.55s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


95/100: mse: 5.1713, mae: 2.2739, rmse: 2.2740


 97%|█████████▋| 97/100 [1:01:41<02:07, 42.41s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2155.883636758355


 98%|█████████▊| 98/100 [1:02:25<01:25, 42.62s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


97/100: mse: 1.6546, mae: 1.2285, rmse: 1.2863


 99%|█████████▉| 99/100 [1:03:06<00:42, 42.37s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


98/100: mse: 2.1290, mae: 1.2360, rmse: 1.4591


100%|██████████| 100/100 [1:03:33<00:00, 38.13s/it]

99/100: mse: 1.9203, mae: 1.1215, rmse: 1.3857
Processing complete. Results saved to MTBench-Test/llama/tsforecast_short_combined/final_results.json



