### 1. Install Dependencies

In [1]:
!pip install pandas gdown huggingface-hub numpy matplotlib scikit-learn transformers torch tqdm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

### 2. Imports

In [3]:
import os
import json
import sys
import argparse
import re
from typing import List, Union
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from google.colab import drive
from datetime import datetime

from abc import ABC, abstractmethod

import torch
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [4]:
torch.cuda.is_available()

True

### 3. Connect to Google Drive

In [5]:
drive.mount("/content/drive",force_remount=True)
os.chdir("/content/drive/My Drive")

Mounted at /content/drive


### 4. Utils Code

In [6]:
def format_time_difference(seconds):
    minutes = seconds // 60
    hours = minutes // 60
    days = hours // 24

    if days > 0:
        if hours % 24 > 0.1:
            return f"{days} days-{hours % 24} hours"
        else:
            return f"{days} days"
    elif hours > 0:
        if minutes % 60 > 0.1:
            return f"{hours} hours-{minutes % 60} minutes"
        else:
            return f"{hours} hours"
    elif minutes > 0:
        if seconds % 60 > 0.1:
            return f"{minutes} minutes-{seconds % 60} seconds"
        else:
            return f"{minutes} minutes"
    else:
        return f"{seconds} seconds"

def save_to_json(data, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    with open(save_path, "w") as f:
        json.dump(data, f, indent=4)

def plot_series(filename, input_ts, output_ts, predicted_ts, save_folder):
    plt.figure(figsize=(10, 5))
    plt.plot(range(len(input_ts)), input_ts, label="Input Time Series", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(output_ts)), output_ts, label="Ground Truth", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(predicted_ts)), predicted_ts, label="Predicted", linestyle='dashed')
    plt.legend()
    plt.title(f"Prediction for {filename}")
    plt.xlabel("Time Steps")
    plt.ylabel("Value")
    plt.grid()

    base_name = os.path.splitext(filename)[0]
    save_path = os.path.join(save_folder, base_name + ".png")
    plt.savefig(save_path)

    plt.close()

def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)

    # Avoid division by zero
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100


def calculate_acc(result_list, regrouped_labels = None):
    if regrouped_labels is None:
        correct_pred = sum(1 for result in result_list if result["ground_truth"] in result["predict"])
    else:
        correct_pred = 0
        for result in result_list:
            gt_group = regrouped_labels[result['ground_truth']]
            for original_label in regrouped_labels.keys():
                if original_label in result['predict']:
                    predict_group = regrouped_labels[original_label]
                    if gt_group == predict_group:
                        correct_pred += 1
                        break

    total_pred = len(result_list)
    accuracy = correct_pred / total_pred

    return accuracy


def calculate_correlation_acc(result_list):
    model_predictions = {"total": 0, "exact_correct": 0, "brief_correct": 0}
    positive_correlations = ["Strong Positive Correlation", "Moderate Positive Correlation"]
    negative_correlations = ["Strong Negative Correlation", "Moderate Negative Correlation"]
    for result in result_list:
        prediction = result["predict"].strip()
        model_predictions["total"] += 1
        if prediction == result["ground_truth"]:
            model_predictions["exact_correct"] += 1

        # Brief accuracy
        pred_is_positive = prediction in positive_correlations
        pred_is_negative = prediction in negative_correlations
        truth_is_positive = result["ground_truth"] in positive_correlations
        truth_is_negative = result["ground_truth"] in negative_correlations

        if (pred_is_positive and truth_is_positive) or \
            (pred_is_negative and truth_is_negative) or \
            (prediction == result["ground_truth"]):
            model_predictions["brief_correct"] += 1

    # Calculate and format results
    total = model_predictions["total"]
    exact_accuracy = (model_predictions["exact_correct"] / total) * 100
    brief_accuracy = (model_predictions["brief_correct"] / total) * 100

    metric_results = {
        "exact_accuracy": f"{round(exact_accuracy, 2)}%",
        "brief_accuracy": f"{round(brief_accuracy, 2)}%",
        "total_samples": total
    }
    return metric_results


def calculate_mcqa_acc(result_list):
    correct = 0
    total = 0
    for result in result_list:
        predition = result["predict"].strip()
        predition = predition[0].upper()
        if predition == result["ground_truth"]:
            correct += 1

        total += 1

    accuracy = correct / total

    return accuracy * 100

### 5. Models Code

In [7]:
class BaseModel(ABC):
    @abstractmethod
    def inference(self, content: str) -> str:
        """
        Run inference on a given input prompt and return the generated output.
        """
        pass

In [8]:
class DeepSeekModel(BaseModel):
    def __init__(self, model_name: str = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", **kwargs):

        # The model is set in eval mode by default by using eval()
        # See: https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            device_map="auto",
            **kwargs
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        chat_prompt = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        tokenized_input = self.tokenizer([chat_prompt], return_tensors="pt").to(self.model.device)
        generated_output = self.model.generate(
            **tokenized_input,
            max_new_tokens=4096,
        )
        output_ids = generated_output[0][len(tokenized_input.input_ids[0]):].tolist()

        # parsing thinking content
        try:
            # rindex finding 151649 (</think>)
            index = len(output_ids) - output_ids[::-1].index(151649)
        except ValueError:
            index = 0
        outputs = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

        return outputs

In [None]:
class LLaMAModel(BaseModel):
    def __init__(self, model_name: str = "meta-llama/Llama-3.2-1B-Instruct", **kwargs):
        self.pipeline = pipeline(
            "text-generation",
            model=model_name,
            torch_dtype="auto",
            device_map="auto",
            token="",
            **kwargs
        )

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        outputs = self.pipeline(messages, max_new_tokens=1024)

        return outputs[0]["generated_text"][-1]["content"]

In [10]:
class ModelFactory:
    def __init__(self, config: dict):
        self.config = config

    @staticmethod
    def get_model(model_type: str, model_name: str, **kwargs) -> BaseModel:
        if model_type == "deepseek":
            return DeepSeekModel(model_name=model_name, **kwargs)
        elif model_type == "llama":
            return LLaMAModel(model_name=model_name, **kwargs)
        else:
            raise ValueError(f"Unsupported model type: {model_type}")

### 6. Experiment Code --

In [11]:
def finance_mse_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:
    """
    Generates a meta-prompt for hypothetical stock price trend analysis
    based on given inputs.

    Args:
        text (str): News article content within the input time series range.
        prices (List[float]): Historical stock prices.
        start_datetime (str): Start datetime of the input time series.
        end_datetime (str): End datetime of the input time series.
        pred_end_datetime (str): End datetime of the hypothetical projection.
        granularity (str): Granularity of the input time series (e.g., daily, hourly).
        prediction_length (int): Number of future time steps to estimate.
        mode (str): Mode of estimation ("timeseries_only", "text_only", "combined").

    Returns:
        str: Meta-prompt for ChatGPT.
    """
    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to conduct a research-based timeseries estimation for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices and extrapolate potential movements. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated values in a structured format as a  list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def finance_macd_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future Moving Average Convergence Divergence (MACD) values for the next {prediction_length} time steps "
        f"based on provided historical timeseries movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant text sentiment analysis "
            f"The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your predicted MACD values in a structured format as a list of float numbers. Please predict the real possible values, do not use the naive linear extrapolation or similar methods"
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of predicted values should be exactly {prediction_length}. "
    )

    return prompt

def finance_bb_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future upper Bollinger Band (BB) values  for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated upper Bollinger Band (BB) values values in a structured format as a list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def parse_val_prediction_response(response: str) -> Union[List[float], None]:
    """
    Decodes the predicted prices from a response string.

    Args:
        response (str): The response containing the predicted prices.

    Returns:
        List[float]: A list of float numbers extracted from the response.
        None: If extraction fails.
    """
    match = re.search(r"Predicted Prices:\s*([-\d.,\s]+)", response)

    if match:
        try:
            price_list = [float(value) for value in match.group(1).split(',')]
            return price_list
        except ValueError:
            pass  # If conversion fails, try another approach

    # Alternative approach: Find all potential numbers in the response
    possible_numbers = re.findall(r"-?\d+\.\d+", response)
    if possible_numbers:
        try:
            return [float(num) for num in possible_numbers]
        except ValueError:
            pass  # If conversion fails, return None

    return None  # Return None if extraction fails

def finance_classification_metaprompt_generation(text=None, timestamps=None, prices=None, mode=None):
    time_series_data = ", ".join([f"{price}" for price in  prices])

    if mode == "combined":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend (rise, neutral, or fall) based on the following inputs:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            2. **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Task:
            Analyze the provided time-series data and news to identify future trends of the stock performance. Ensure that the news data is used to supplement the insights from the time-series analysis, focusing on combining both inputs for a more accurate prediction.

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            please think step-by-step and briefly explain how the combination of time-series data and news data led to the prediction;
            then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "text_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the news semantics to identify trends and patterns that could impact stock performance.
            Then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "timeseries_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the provided time-series data to identify trends and patterns that could impact stock performance. Focus solely on the time-series data for making predictions.
             then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    return meta_prompt

def parse_cls_response(answer):
    try:
        return  re.findall(r'\^\^\^(.*?)\^\^\^', answer)[-1]
    except:
        return  re.findall(r'\^+(.*?)\^+', answer)[-1]



def finance_correlation_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news):

    time_interval = "1 hour" if setting == "long" else "5 minutes"

    if setting == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 7 days and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    else:
        system_prompt = "You are an expert in finance and stock market analysis. Based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 1 day and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    question = "Return your answer in one of the following without any other words: Strong Positive Correlation, Moderate Positive Correlation, No Correlation, Moderate Negative Correlation, Strong Negative Correlation."
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            {question} Answer:"
    prompt = f"{system_prompt}\n\n{query}"

    return prompt




def finance_mcqa_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news, question):
    time_interval = "1 hour" if setting == "long" else "5 minutes"
    if setting  == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    else:
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            Question: {question}. Give your answer in the letter (A, B, C, or D) without any other words. Answer:"
    prompt = f"{system_prompt}\n\n{query}"
    return prompt

In [12]:
# import sys

# sys.argv = [
#     "script_name",  # Placeholder for script name (ignored by argparse)
#     "--dataset_folder", "./MTBench-Test/MTBench_finance_QA_short",
#     "--save_path", "./MTBench-Test/llama/qa_short",
#     "--model_type", "llama",
#     "--model", "meta-llama/Llama-3.2-1B-Instruct",
#     "--setting", "short"
# ]

import sys

sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--dataset_path", "./MTBench-Test/MTBench_finance_aligned_pairs_long/train-00000-of-00001.parquet",
    "--save_path", "./MTBench-Test/llama/tsforecast_long",
    "--model_type", "llama",
    "--model", "meta-llama/Llama-3.2-1B-Instruct",
    "--mode", "timeseries_only",
    "--indicator", "time"
]

In [13]:
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path", type=str, help="path to the dataset")
parser.add_argument("--save_path", type=str, help="path to save the results")
parser.add_argument("--indicator", default="macd", type=str, help="macd, bb, or time")
parser.add_argument("--model_type",  type=str, help="deepseek or llama")
parser.add_argument("--model",  type=str, help="model name")
parser.add_argument(
    "--mode",
    type=str,
    default="combined",
    help="choose from timeseries_only, text_only, combined",
)
args = parser.parse_args()

save_path = Path(args.save_path)
details_path = save_path / "output_details"
visualizations_path = save_path / "visualizations"
details_path.mkdir(parents=True, exist_ok=True)
visualizations_path.mkdir(parents=True, exist_ok=True)

data_list = []
df = pd.read_parquet(args.dataset_path)
filename = Path(args.dataset_path).name

df["text"] = df["text"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
df["technical"] = df["technical"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)

for col in ["input_window", "output_window", "input_timestamps"]:
    df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)

for _, row in df.iterrows():
    text = row["text"]
    technical = row["technical"]

    extracted_data = {
        "filename": filename,
        "input_window": row["input_window"],
        "output_window": row["output_window"],
        "text": text["content"],
        "input_timestamps": row["input_timestamps"],
        "in_macd": technical.get("in_macd"),
        "out_macd": technical.get("out_macd"),
        "in_upper_bb": technical.get("in_upper_bb"),
        "out_upper_bb": technical.get("out_upper_bb"),
    }

    data_list.append(extracted_data)
data_list = data_list[:150]

model = ModelFactory.get_model(model_type=args.model_type, model_name=args.model)

result_list = []
tot_samples = len(data_list)
print(f"Evaluating {tot_samples} samples...")

epoch_results = []
cumulative_mse, cumulative_mae, cumulative_rmse, cumulative_mape = [], [], [], []
for idx, sample in tqdm(enumerate(data_list), total=tot_samples):
    try:
        datetime_list = [
            datetime.fromtimestamp(s).strftime("%Y-%m-%d %H:%M:%S")
            for s in sample["input_timestamps"]
        ]
        text = sample["text"]
        input_ts = sample["input_window"]
        granularity_string = format_time_difference(
            sample["input_timestamps"][1] - sample["input_timestamps"][0]
        )

        if args.indicator == "macd":
            output_ts = sample["out_macd"]
            designed_prompt = finance_macd_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "bb":
            output_ts = sample["out_upper_bb"]
            designed_prompt = finance_bb_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "time":
            output_ts = sample["output_window"]
            designed_prompt = finance_mse_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )

        answer = model.inference(designed_prompt)
        answer = answer.strip().replace('"', '')

        predict_ts = parse_val_prediction_response(answer)
        predict_ts_orig = predict_ts
        predict_ts = np.interp( # type: ignore
            np.linspace(0, 1, len(output_ts)),
            np.linspace(0, 1, len(predict_ts)),
            predict_ts
        )

        res = {
            "filename": sample["filename"],
            "response": answer,
            "ground_truth": output_ts,
            "predict": predict_ts.tolist(),
        }
        result_list.append(res)

        save_to_json(res, details_path / sample["filename"])

        if args.indicator == "macd":
            first_half = sample["in_macd"]
        elif args.indicator == "bb":
            first_half = sample["in_upper_bb"]
        elif args.indicator == "time":
            first_half = sample["input_window"]
        plot_series(sample["filename"], first_half, output_ts, predict_ts_orig, visualizations_path)

        mse = np.mean((np.array(output_ts) - np.array(predict_ts)) ** 2)
        mae = np.mean(np.abs(np.array(output_ts) - np.array(predict_ts)))
        rmse = np.sqrt(mse)
        mape = calculate_mape(output_ts, predict_ts)

        if args.indicator == "macd" and mse > 10:
            print(f"{sample['filename']} failed mse", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "time" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "bb" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        cumulative_mse.append(mse)
        cumulative_mae.append(mae)
        cumulative_rmse.append(rmse)
        cumulative_mape.append(mape)

        epoch_results.append({
            "filename": sample["filename"],
            "epoch": idx + 1,
            "mse": mse,
            "mae": mae,
            "rmse": rmse,
            "mape": mape,
            "mean_mse": np.mean(cumulative_mse),
            "mean_mae": np.mean(cumulative_mae),
            "mean_rmse": np.mean(cumulative_rmse),
            "mean_mape": np.mean(cumulative_mape),
        })
        save_to_json(epoch_results, f"{save_path}/epoch_results.json")
        print(
            "{}/{}: mse: {:.4f}, mae: {:.4f}, rmse: {:.4f}".format(
                idx, tot_samples, mse, mae, rmse
            )
        )
    except Exception as e:
        print(f"Skipping {idx} due to error: {e}")


summary = {
    "total_samples": len(result_list),
    "mse": np.mean(cumulative_mse),
    "mae": np.mean(cumulative_mae),
    "rmse": np.mean(cumulative_rmse),
    "mape": np.mean(cumulative_mape),
}

save_to_json(summary, f"{save_path}/final_results.json")
print(f"Processing complete. Results saved to {save_path}/final_results.json")

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cuda:0


Evaluating 150 samples...


  0%|          | 0/150 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/150 [00:29<1:13:08, 29.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  554.9856573633517


  1%|▏         | 2/150 [00:57<1:10:59, 28.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1004.7537458964395


  2%|▏         | 3/150 [01:26<1:10:29, 28.77s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


2/150: mse: 2.0893, mae: 1.4015, rmse: 1.4454


  3%|▎         | 4/150 [01:54<1:09:26, 28.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  148.49677839155927


  3%|▎         | 5/150 [02:23<1:09:24, 28.72s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


4/150: mse: 50.1731, mae: 6.6053, rmse: 7.0833


  4%|▍         | 6/150 [02:51<1:08:22, 28.49s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


5/150: mse: 40.0711, mae: 5.4988, rmse: 6.3302


  5%|▍         | 7/150 [03:15<1:04:23, 27.02s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  54187.57957559879


  5%|▌         | 8/150 [03:43<1:04:48, 27.38s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


7/150: mse: 0.8566, mae: 0.8004, rmse: 0.9255


  6%|▌         | 9/150 [04:12<1:04:55, 27.63s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


8/150: mse: 21.3337, mae: 2.2578, rmse: 4.6188


  7%|▋         | 10/150 [04:26<54:59, 23.57s/it] You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 9 due to error: object of type 'NoneType' has no len()


  7%|▋         | 11/150 [04:54<57:59, 25.03s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


10/150: mse: 2.3550, mae: 1.0039, rmse: 1.5346


  8%|▊         | 12/150 [05:23<59:45, 25.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3327.440589030089


  9%|▊         | 13/150 [05:50<1:00:37, 26.55s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


12/150: mse: 92.3363, mae: 9.1951, rmse: 9.6092


  9%|▉         | 14/150 [06:05<51:38, 22.79s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


13/150: mse: 16.6932, mae: 1.3369, rmse: 4.0857


 10%|█         | 15/150 [06:33<54:50, 24.38s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


14/150: mse: 2.0654, mae: 1.2145, rmse: 1.4372


 11%|█         | 16/150 [07:02<57:38, 25.81s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


15/150: mse: 4.0195, mae: 1.7634, rmse: 2.0049


 11%|█▏        | 17/150 [07:30<58:37, 26.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2599.221221199091


 12%|█▏        | 18/150 [07:58<59:26, 27.02s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


17/150: mse: 45.4325, mae: 5.4707, rmse: 6.7404


 13%|█▎        | 19/150 [08:26<59:31, 27.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


18/150: mse: 16.0611, mae: 3.7905, rmse: 4.0076


 13%|█▎        | 20/150 [08:54<59:30, 27.47s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  175.03547511096056


 14%|█▍        | 21/150 [09:20<57:59, 26.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


20/150: mse: 89.0576, mae: 7.2895, rmse: 9.4370


 15%|█▍        | 22/150 [09:48<58:15, 27.30s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


21/150: mse: 20.0921, mae: 2.1591, rmse: 4.4824


 15%|█▌        | 23/150 [10:11<55:22, 26.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


22/150: mse: 1.5233, mae: 1.1642, rmse: 1.2342


 16%|█▌        | 24/150 [10:29<49:34, 23.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


23/150: mse: 5.8604, mae: 2.1558, rmse: 2.4208


 17%|█▋        | 25/150 [10:57<51:54, 24.91s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  6792.845413255894


 17%|█▋        | 26/150 [11:26<54:14, 26.25s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


25/150: mse: 47.5036, mae: 6.3679, rmse: 6.8923


 18%|█▊        | 27/150 [11:55<55:12, 26.93s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  538.2987472507675


 19%|█▊        | 28/150 [12:23<55:17, 27.20s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


27/150: mse: 24.6575, mae: 4.5681, rmse: 4.9656


 19%|█▉        | 29/150 [12:43<50:50, 25.21s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


28/150: mse: 25.3923, mae: 4.9425, rmse: 5.0391


 20%|██        | 30/150 [13:12<52:25, 26.22s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


29/150: mse: 10.2441, mae: 2.8220, rmse: 3.2006


 21%|██        | 31/150 [13:40<53:14, 26.85s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


30/150: mse: 42.7948, mae: 2.7691, rmse: 6.5418


 21%|██▏       | 32/150 [14:08<53:34, 27.24s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


31/150: mse: 2.6821, mae: 1.4047, rmse: 1.6377


 22%|██▏       | 33/150 [14:36<53:29, 27.43s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2999.07443606324


 23%|██▎       | 34/150 [15:04<53:21, 27.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  100.72492425113961


 23%|██▎       | 35/150 [15:32<53:23, 27.86s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  494.32607649999994


 24%|██▍       | 36/150 [16:01<53:30, 28.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


35/150: mse: 0.6978, mae: 0.8091, rmse: 0.8353


 25%|██▍       | 37/150 [16:29<52:51, 28.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  100.9430144291554


 25%|██▌       | 38/150 [16:57<52:21, 28.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


37/150: mse: 12.4715, mae: 3.3984, rmse: 3.5315


 26%|██▌       | 39/150 [17:24<51:05, 27.62s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1816.7239235372726


 27%|██▋       | 40/150 [17:52<50:48, 27.72s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  117.43063304690804


 27%|██▋       | 41/150 [18:17<48:54, 26.92s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 40 due to error: object of type 'NoneType' has no len()


 28%|██▊       | 42/150 [18:43<48:10, 26.76s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1506.9920734743182


 29%|██▊       | 43/150 [19:11<48:21, 27.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


42/150: mse: 4.6562, mae: 1.7085, rmse: 2.1578


 29%|██▉       | 44/150 [19:39<48:31, 27.47s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  478.41980692148763


 30%|███       | 45/150 [20:07<48:21, 27.63s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


44/150: mse: 5.9121, mae: 2.2761, rmse: 2.4315


 31%|███       | 46/150 [20:36<48:32, 28.00s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


45/150: mse: 0.7843, mae: 0.7272, rmse: 0.8856


 31%|███▏      | 47/150 [20:58<44:52, 26.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  653.3595175519412


 32%|███▏      | 48/150 [21:24<44:26, 26.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


47/150: mse: 3.0700, mae: 1.3636, rmse: 1.7521


 33%|███▎      | 49/150 [21:52<44:49, 26.62s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


48/150: mse: 19.9902, mae: 1.5139, rmse: 4.4710


 33%|███▎      | 50/150 [22:20<44:52, 26.92s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


49/150: mse: 12.6388, mae: 2.6671, rmse: 3.5551


 34%|███▍      | 51/150 [22:43<42:54, 26.01s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  896.0716890766666


 35%|███▍      | 52/150 [23:15<45:10, 27.65s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


51/150: mse: 0.0362, mae: 0.1523, rmse: 0.1903


 35%|███▌      | 53/150 [23:44<45:13, 27.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  131.2303582965091


 36%|███▌      | 54/150 [24:12<44:55, 28.08s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


53/150: mse: 13.9674, mae: 2.3027, rmse: 3.7373


 37%|███▋      | 55/150 [24:40<44:31, 28.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  5216.458579544615


 37%|███▋      | 56/150 [25:09<44:19, 28.29s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  4489.482486815626


 38%|███▊      | 57/150 [25:37<43:43, 28.21s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


56/150: mse: 6.2647, mae: 2.3648, rmse: 2.5029


 39%|███▊      | 58/150 [26:05<43:06, 28.11s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  132.71278206792616


 39%|███▉      | 59/150 [26:33<42:37, 28.11s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1988.885164378046


 40%|████      | 60/150 [27:00<41:37, 27.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


59/150: mse: 12.6660, mae: 2.8832, rmse: 3.5589


 41%|████      | 61/150 [27:19<37:25, 25.23s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1767.384915471212


 41%|████▏     | 62/150 [27:47<38:09, 26.01s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


61/150: mse: 6.4336, mae: 2.2537, rmse: 2.5365


 42%|████▏     | 63/150 [28:10<36:12, 24.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


62/150: mse: 14.2011, mae: 3.7681, rmse: 3.7684


 43%|████▎     | 64/150 [28:37<37:02, 25.84s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


63/150: mse: 41.2825, mae: 5.9475, rmse: 6.4251


 43%|████▎     | 65/150 [29:05<37:22, 26.38s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


64/150: mse: 3.6213, mae: 1.6565, rmse: 1.9030


 44%|████▍     | 66/150 [29:34<37:52, 27.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  5349.769323168761


 45%|████▍     | 67/150 [30:02<37:50, 27.36s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


66/150: mse: 9.9288, mae: 2.9453, rmse: 3.1510


 45%|████▌     | 68/150 [30:30<37:41, 27.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


67/150: mse: 3.7932, mae: 1.5146, rmse: 1.9476


 46%|████▌     | 69/150 [30:58<37:15, 27.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 68 due to error: object of type 'NoneType' has no len()


 47%|████▋     | 70/150 [31:15<32:39, 24.50s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1813.3320495175758


 47%|████▋     | 71/150 [31:43<33:37, 25.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


70/150: mse: 63.0379, mae: 2.8695, rmse: 7.9396


 48%|████▊     | 72/150 [32:11<34:07, 26.26s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


71/150: mse: 24.6150, mae: 4.6623, rmse: 4.9614


 49%|████▊     | 73/150 [32:34<32:35, 25.40s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  106689.17966284849


 49%|████▉     | 74/150 [33:02<33:06, 26.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


73/150: mse: 28.4854, mae: 5.0295, rmse: 5.3372


 50%|█████     | 75/150 [33:30<33:29, 26.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


74/150: mse: 39.4700, mae: 3.1732, rmse: 6.2825


 51%|█████     | 76/150 [34:00<34:13, 27.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  342.4664802884776


 51%|█████▏    | 77/150 [34:28<33:41, 27.69s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  395.1210828784967


 52%|█████▏    | 78/150 [34:56<33:14, 27.70s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2327.898584507576


 53%|█████▎    | 79/150 [35:23<32:50, 27.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3873.4118421579847


 53%|█████▎    | 80/150 [35:51<32:23, 27.77s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  893.7233093415422


 54%|█████▍    | 81/150 [36:19<32:03, 27.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2516.5860911134296


 55%|█████▍    | 82/150 [36:47<31:37, 27.90s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  11745.983334356923


 55%|█████▌    | 83/150 [37:15<31:02, 27.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


82/150: mse: 67.3572, mae: 5.3455, rmse: 8.2071


 56%|█████▌    | 84/150 [37:43<30:39, 27.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


83/150: mse: 77.9353, mae: 2.0924, rmse: 8.8281


 57%|█████▋    | 85/150 [38:11<30:06, 27.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


84/150: mse: 22.7083, mae: 4.2998, rmse: 4.7653


 57%|█████▋    | 86/150 [38:40<30:04, 28.20s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1940.8538811696255


 58%|█████▊    | 87/150 [39:07<29:27, 28.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


86/150: mse: 3.3225, mae: 1.7570, rmse: 1.8228


 59%|█████▊    | 88/150 [39:35<28:59, 28.06s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  873.7231936586792


 59%|█████▉    | 89/150 [40:04<28:33, 28.08s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


88/150: mse: 17.5323, mae: 3.8165, rmse: 4.1872


 60%|██████    | 90/150 [40:32<28:02, 28.04s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3051.7845608187595


 61%|██████    | 91/150 [40:59<27:30, 27.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  691.7236203538462


 61%|██████▏   | 92/150 [41:27<26:57, 27.90s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


91/150: mse: 9.0096, mae: 2.6830, rmse: 3.0016


 62%|██████▏   | 93/150 [41:55<26:29, 27.88s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


92/150: mse: 70.6825, mae: 8.4032, rmse: 8.4073


 63%|██████▎   | 94/150 [42:23<26:11, 28.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


93/150: mse: 3.7051, mae: 1.2292, rmse: 1.9249


 63%|██████▎   | 95/150 [42:53<26:01, 28.39s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


94/150: mse: 2.6893, mae: 1.4234, rmse: 1.6399


 64%|██████▍   | 96/150 [43:21<25:28, 28.30s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  446.3002101455143


 65%|██████▍   | 97/150 [43:46<24:08, 27.33s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  133.5210801609848


 65%|██████▌   | 98/150 [44:11<23:08, 26.70s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


97/150: mse: 12.9530, mae: 2.8181, rmse: 3.5990


 66%|██████▌   | 99/150 [44:39<22:59, 27.06s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


98/150: mse: 5.9595, mae: 2.0238, rmse: 2.4412


 67%|██████▋   | 100/150 [45:07<22:43, 27.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


99/150: mse: 93.7726, mae: 8.7211, rmse: 9.6836


 67%|██████▋   | 101/150 [45:35<22:32, 27.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


100/150: mse: 20.3827, mae: 1.5590, rmse: 4.5147


 68%|██████▊   | 102/150 [46:03<22:08, 27.69s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


101/150: mse: 40.9044, mae: 5.4384, rmse: 6.3957


 69%|██████▊   | 103/150 [46:15<17:57, 22.93s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  9148.049170498183


 69%|██████▉   | 104/150 [46:42<18:40, 24.36s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


103/150: mse: 0.5954, mae: 0.5903, rmse: 0.7716


 70%|███████   | 105/150 [47:16<20:27, 27.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1207.7537746710527


 71%|███████   | 106/150 [47:45<20:15, 27.62s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  644.8205859917093


 71%|███████▏  | 107/150 [48:13<19:49, 27.66s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


106/150: mse: 0.6433, mae: 0.6187, rmse: 0.8020


 72%|███████▏  | 108/150 [48:40<19:24, 27.73s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1051.1250527047225


 73%|███████▎  | 109/150 [49:09<19:04, 27.91s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


108/150: mse: 33.5731, mae: 5.1079, rmse: 5.7942


 73%|███████▎  | 110/150 [49:36<18:32, 27.81s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  663.0371408470942


 74%|███████▍  | 111/150 [50:04<18:07, 27.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


110/150: mse: 92.4870, mae: 3.5709, rmse: 9.6170


 75%|███████▍  | 112/150 [50:33<17:42, 27.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


111/150: mse: 27.2814, mae: 4.5470, rmse: 5.2232


 75%|███████▌  | 113/150 [51:01<17:15, 27.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 112 due to error: object of type 'NoneType' has no len()


 76%|███████▌  | 114/150 [51:28<16:46, 27.95s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


113/150: mse: 19.9797, mae: 3.9087, rmse: 4.4699


 77%|███████▋  | 115/150 [51:58<16:29, 28.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  338.1035854019697


 77%|███████▋  | 116/150 [52:23<15:37, 27.57s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 115 due to error: object of type 'NoneType' has no len()


 78%|███████▊  | 117/150 [52:52<15:17, 27.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  219.8781310984061


 79%|███████▊  | 118/150 [53:20<14:51, 27.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3579.8896608500563


 79%|███████▉  | 119/150 [53:46<14:11, 27.46s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


118/150: mse: 3.5021, mae: 1.4187, rmse: 1.8714


 80%|████████  | 120/150 [54:14<13:46, 27.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  179.2076245704261


 81%|████████  | 121/150 [54:42<13:20, 27.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


120/150: mse: 85.9113, mae: 7.0847, rmse: 9.2688


 81%|████████▏ | 122/150 [55:10<12:55, 27.68s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


121/150: mse: 2.0894, mae: 1.3316, rmse: 1.4455


 82%|████████▏ | 123/150 [55:38<12:30, 27.79s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


122/150: mse: 8.0080, mae: 2.1023, rmse: 2.8298


 83%|████████▎ | 124/150 [56:06<12:05, 27.91s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


123/150: mse: 12.3415, mae: 3.3754, rmse: 3.5130


 83%|████████▎ | 125/150 [56:35<11:46, 28.24s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  272.6779384761562


 84%|████████▍ | 126/150 [57:03<11:15, 28.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


125/150: mse: 17.2226, mae: 3.1693, rmse: 4.1500


 85%|████████▍ | 127/150 [57:31<10:45, 28.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1573.7036577926074


 85%|████████▌ | 128/150 [58:02<10:40, 29.10s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  406.6179944681128


 86%|████████▌ | 129/150 [58:30<10:02, 28.67s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


128/150: mse: 0.2135, mae: 0.3252, rmse: 0.4621


 87%|████████▋ | 130/150 [58:58<09:28, 28.41s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  277.02397109281196


 87%|████████▋ | 131/150 [59:38<10:05, 31.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  363.4323458281116


 88%|████████▊ | 132/150 [1:00:05<09:11, 30.66s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


131/150: mse: 1.5894, mae: 1.2334, rmse: 1.2607


 89%|████████▊ | 133/150 [1:00:33<08:26, 29.77s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


132/150: mse: 99.4569, mae: 8.1858, rmse: 9.9728


 89%|████████▉ | 134/150 [1:01:02<07:53, 29.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


133/150: mse: 0.0304, mae: 0.1220, rmse: 0.1744


 90%|█████████ | 135/150 [1:01:31<07:17, 29.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  585.9681008264472


 91%|█████████ | 136/150 [1:01:58<06:42, 28.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  16140.022078809929


 91%|█████████▏| 137/150 [1:02:26<06:09, 28.44s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  577.971669872184


 92%|█████████▏| 138/150 [1:02:54<05:39, 28.25s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  182.06169871299247


 93%|█████████▎| 139/150 [1:03:20<05:05, 27.76s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


138/150: mse: 5.3058, mae: 2.1408, rmse: 2.3034


 93%|█████████▎| 140/150 [1:04:08<05:36, 33.67s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


139/150: mse: 0.0058, mae: 0.0637, rmse: 0.0759


 94%|█████████▍| 141/150 [1:04:33<04:39, 31.02s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  272.9553456477942


 95%|█████████▍| 142/150 [1:05:00<04:00, 30.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1401.304585750777


 95%|█████████▌| 143/150 [1:05:30<03:29, 29.91s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


142/150: mse: 1.9280, mae: 1.3797, rmse: 1.3885


 96%|█████████▌| 144/150 [1:05:58<02:56, 29.38s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


143/150: mse: 10.0195, mae: 3.0896, rmse: 3.1654


 97%|█████████▋| 145/150 [1:06:26<02:24, 28.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


144/150: mse: 18.2108, mae: 4.1069, rmse: 4.2674


 97%|█████████▋| 146/150 [1:06:54<01:54, 28.64s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


145/150: mse: 0.7991, mae: 0.4032, rmse: 0.8939


 98%|█████████▊| 147/150 [1:07:22<01:25, 28.50s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  290.1150184673591


 99%|█████████▊| 148/150 [1:07:50<00:56, 28.32s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  137.38924329558336


 99%|█████████▉| 149/150 [1:08:16<00:27, 27.68s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 148 due to error: object of type 'NoneType' has no len()


100%|██████████| 150/150 [1:08:41<00:00, 27.48s/it]

train-00000-of-00001.parquet failed mse  275.40031654083566
Processing complete. Results saved to MTBench-Test/llama/tsforecast_long/final_results.json



