### 1. Install Dependencies

In [1]:
!pip install pandas gdown huggingface-hub numpy matplotlib scikit-learn transformers torch tqdm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

### 2. Imports

In [3]:
import os
import json
import sys
import argparse
import re
from typing import List, Union
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from google.colab import drive
from datetime import datetime

from abc import ABC, abstractmethod

import torch
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [4]:
torch.cuda.is_available()

True

### 3. Connect to Google Drive

In [5]:
drive.mount("/content/drive",force_remount=True)
os.chdir("/content/drive/My Drive")

Mounted at /content/drive


### 4. Utils Code

In [6]:
def format_time_difference(seconds):
    minutes = seconds // 60
    hours = minutes // 60
    days = hours // 24

    if days > 0:
        if hours % 24 > 0.1:
            return f"{days} days-{hours % 24} hours"
        else:
            return f"{days} days"
    elif hours > 0:
        if minutes % 60 > 0.1:
            return f"{hours} hours-{minutes % 60} minutes"
        else:
            return f"{hours} hours"
    elif minutes > 0:
        if seconds % 60 > 0.1:
            return f"{minutes} minutes-{seconds % 60} seconds"
        else:
            return f"{minutes} minutes"
    else:
        return f"{seconds} seconds"

def save_to_json(data, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    with open(save_path, "w") as f:
        json.dump(data, f, indent=4)

def plot_series(filename, input_ts, output_ts, predicted_ts, save_folder):
    plt.figure(figsize=(10, 5))
    plt.plot(range(len(input_ts)), input_ts, label="Input Time Series", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(output_ts)), output_ts, label="Ground Truth", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(predicted_ts)), predicted_ts, label="Predicted", linestyle='dashed')
    plt.legend()
    plt.title(f"Prediction for {filename}")
    plt.xlabel("Time Steps")
    plt.ylabel("Value")
    plt.grid()

    base_name = os.path.splitext(filename)[0]
    save_path = os.path.join(save_folder, base_name + ".png")
    plt.savefig(save_path)

    plt.close()

def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)

    # Avoid division by zero
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100


def calculate_acc(result_list, regrouped_labels = None):
    if regrouped_labels is None:
        correct_pred = sum(1 for result in result_list if result["ground_truth"] in result["predict"])
    else:
        correct_pred = 0
        for result in result_list:
            gt_group = regrouped_labels[result['ground_truth']]
            for original_label in regrouped_labels.keys():
                if original_label in result['predict']:
                    predict_group = regrouped_labels[original_label]
                    if gt_group == predict_group:
                        correct_pred += 1
                        break

    total_pred = len(result_list)
    accuracy = correct_pred / total_pred

    return accuracy


def calculate_correlation_acc(result_list):
    model_predictions = {"total": 0, "exact_correct": 0, "brief_correct": 0}
    positive_correlations = ["Strong Positive Correlation", "Moderate Positive Correlation"]
    negative_correlations = ["Strong Negative Correlation", "Moderate Negative Correlation"]
    for result in result_list:
        prediction = result["predict"].strip()
        model_predictions["total"] += 1
        if prediction == result["ground_truth"]:
            model_predictions["exact_correct"] += 1

        # Brief accuracy
        pred_is_positive = prediction in positive_correlations
        pred_is_negative = prediction in negative_correlations
        truth_is_positive = result["ground_truth"] in positive_correlations
        truth_is_negative = result["ground_truth"] in negative_correlations

        if (pred_is_positive and truth_is_positive) or \
            (pred_is_negative and truth_is_negative) or \
            (prediction == result["ground_truth"]):
            model_predictions["brief_correct"] += 1

    # Calculate and format results
    total = model_predictions["total"]
    exact_accuracy = (model_predictions["exact_correct"] / total) * 100
    brief_accuracy = (model_predictions["brief_correct"] / total) * 100

    metric_results = {
        "exact_accuracy": f"{round(exact_accuracy, 2)}%",
        "brief_accuracy": f"{round(brief_accuracy, 2)}%",
        "total_samples": total
    }
    return metric_results


def calculate_mcqa_acc(result_list):
    correct = 0
    total = 0
    for result in result_list:
        predition = result["predict"].strip()
        predition = predition[0].upper()
        if predition == result["ground_truth"]:
            correct += 1

        total += 1

    accuracy = correct / total

    return accuracy * 100

### 5. Models Code

In [7]:
class BaseModel(ABC):
    @abstractmethod
    def inference(self, content: str) -> str:
        """
        Run inference on a given input prompt and return the generated output.
        """
        pass

In [8]:
class DeepSeekModel(BaseModel):
    def __init__(self, model_name: str = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", **kwargs):

        # The model is set in eval mode by default by using eval()
        # See: https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            device_map="auto",
            **kwargs
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        chat_prompt = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        tokenized_input = self.tokenizer([chat_prompt], return_tensors="pt").to(self.model.device)
        generated_output = self.model.generate(
            **tokenized_input,
            max_new_tokens=4096,
        )
        output_ids = generated_output[0][len(tokenized_input.input_ids[0]):].tolist()

        # parsing thinking content
        try:
            # rindex finding 151649 (</think>)
            index = len(output_ids) - output_ids[::-1].index(151649)
        except ValueError:
            index = 0
        outputs = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

        return outputs

In [None]:
class LLaMAModel(BaseModel):
    def __init__(self, model_name: str = "meta-llama/Llama-3.2-1B-Instruct", **kwargs):
        self.pipeline = pipeline(
            "text-generation",
            model=model_name,
            torch_dtype="auto",
            device_map="auto",
            token="",
            **kwargs
        )

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        outputs = self.pipeline(messages, max_new_tokens=1024)

        return outputs[0]["generated_text"][-1]["content"]

In [10]:
class ModelFactory:
    def __init__(self, config: dict):
        self.config = config

    @staticmethod
    def get_model(model_type: str, model_name: str, **kwargs) -> BaseModel:
        if model_type == "deepseek":
            return DeepSeekModel(model_name=model_name, **kwargs)
        elif model_type == "llama":
            return LLaMAModel(model_name=model_name, **kwargs)
        else:
            raise ValueError(f"Unsupported model type: {model_type}")

### 6. Experiment Code --

In [11]:
def finance_mse_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:
    """
    Generates a meta-prompt for hypothetical stock price trend analysis
    based on given inputs.

    Args:
        text (str): News article content within the input time series range.
        prices (List[float]): Historical stock prices.
        start_datetime (str): Start datetime of the input time series.
        end_datetime (str): End datetime of the input time series.
        pred_end_datetime (str): End datetime of the hypothetical projection.
        granularity (str): Granularity of the input time series (e.g., daily, hourly).
        prediction_length (int): Number of future time steps to estimate.
        mode (str): Mode of estimation ("timeseries_only", "text_only", "combined").

    Returns:
        str: Meta-prompt for ChatGPT.
    """
    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to conduct a research-based timeseries estimation for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices and extrapolate potential movements. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated values in a structured format as a  list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def finance_macd_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future Moving Average Convergence Divergence (MACD) values for the next {prediction_length} time steps "
        f"based on provided historical timeseries movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant text sentiment analysis "
            f"The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your predicted MACD values in a structured format as a list of float numbers. Please predict the real possible values, do not use the naive linear extrapolation or similar methods"
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of predicted values should be exactly {prediction_length}. "
    )

    return prompt

def finance_bb_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future upper Bollinger Band (BB) values  for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated upper Bollinger Band (BB) values values in a structured format as a list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def parse_val_prediction_response(response: str) -> Union[List[float], None]:
    """
    Decodes the predicted prices from a response string.

    Args:
        response (str): The response containing the predicted prices.

    Returns:
        List[float]: A list of float numbers extracted from the response.
        None: If extraction fails.
    """
    match = re.search(r"Predicted Prices:\s*([-\d.,\s]+)", response)

    if match:
        try:
            price_list = [float(value) for value in match.group(1).split(',')]
            return price_list
        except ValueError:
            pass  # If conversion fails, try another approach

    # Alternative approach: Find all potential numbers in the response
    possible_numbers = re.findall(r"-?\d+\.\d+", response)
    if possible_numbers:
        try:
            return [float(num) for num in possible_numbers]
        except ValueError:
            pass  # If conversion fails, return None

    return None  # Return None if extraction fails

def finance_classification_metaprompt_generation(text=None, timestamps=None, prices=None, mode=None):
    time_series_data = ", ".join([f"{price}" for price in  prices])

    if mode == "combined":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend (rise, neutral, or fall) based on the following inputs:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            2. **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Task:
            Analyze the provided time-series data and news to identify future trends of the stock performance. Ensure that the news data is used to supplement the insights from the time-series analysis, focusing on combining both inputs for a more accurate prediction.

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            please think step-by-step and briefly explain how the combination of time-series data and news data led to the prediction;
            then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "text_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the news semantics to identify trends and patterns that could impact stock performance.
            Then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "timeseries_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the provided time-series data to identify trends and patterns that could impact stock performance. Focus solely on the time-series data for making predictions.
             then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    return meta_prompt

def parse_cls_response(answer):
    try:
        return  re.findall(r'\^\^\^(.*?)\^\^\^', answer)[-1]
    except:
        return  re.findall(r'\^+(.*?)\^+', answer)[-1]



def finance_correlation_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news):

    time_interval = "1 hour" if setting == "long" else "5 minutes"

    if setting == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 7 days and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    else:
        system_prompt = "You are an expert in finance and stock market analysis. Based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 1 day and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    question = "Return your answer in one of the following without any other words: Strong Positive Correlation, Moderate Positive Correlation, No Correlation, Moderate Negative Correlation, Strong Negative Correlation."
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            {question} Answer:"
    prompt = f"{system_prompt}\n\n{query}"

    return prompt




def finance_mcqa_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news, question):
    time_interval = "1 hour" if setting == "long" else "5 minutes"
    if setting  == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    else:
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            Question: {question}. Give your answer in the letter (A, B, C, or D) without any other words. Answer:"
    prompt = f"{system_prompt}\n\n{query}"
    return prompt

In [12]:
# import sys

# sys.argv = [
#     "script_name",  # Placeholder for script name (ignored by argparse)
#     "--dataset_folder", "./MTBench-Test/MTBench_finance_QA_short",
#     "--save_path", "./MTBench-Test/llama/qa_short",
#     "--model_type", "llama",
#     "--model", "meta-llama/Llama-3.2-1B-Instruct",
#     "--setting", "short"
# ]

import sys

sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--dataset_path", "./MTBench-Test/MTBench_finance_aligned_pairs_short/train-00000-of-00001.parquet",
    "--save_path", "./MTBench-Test/llama/tsforecast_short",
    "--model_type", "llama",
    "--model", "meta-llama/Llama-3.2-1B-Instruct",
    "--mode", "timeseries_only",
    "--indicator", "time"
]

In [13]:
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path", type=str, help="path to the dataset")
parser.add_argument("--save_path", type=str, help="path to save the results")
parser.add_argument("--indicator", default="macd", type=str, help="macd, bb, or time")
parser.add_argument("--model_type",  type=str, help="deepseek or llama")
parser.add_argument("--model",  type=str, help="model name")
parser.add_argument(
    "--mode",
    type=str,
    default="combined",
    help="choose from timeseries_only, text_only, combined",
)
args = parser.parse_args()

save_path = Path(args.save_path)
details_path = save_path / "output_details"
visualizations_path = save_path / "visualizations"
details_path.mkdir(parents=True, exist_ok=True)
visualizations_path.mkdir(parents=True, exist_ok=True)

data_list = []
df = pd.read_parquet(args.dataset_path)
filename = Path(args.dataset_path).name

df["text"] = df["text"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
df["technical"] = df["technical"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)

for col in ["input_window", "output_window", "input_timestamps"]:
    df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)

for _, row in df.iterrows():
    text = row["text"]
    technical = row["technical"]

    extracted_data = {
        "filename": filename,
        "input_window": row["input_window"],
        "output_window": row["output_window"],
        "text": text["content"],
        "input_timestamps": row["input_timestamps"],
        "in_macd": technical.get("in_macd"),
        "out_macd": technical.get("out_macd"),
        "in_upper_bb": technical.get("in_upper_bb"),
        "out_upper_bb": technical.get("out_upper_bb"),
    }

    data_list.append(extracted_data)
data_list = data_list[:150]

model = ModelFactory.get_model(model_type=args.model_type, model_name=args.model)

result_list = []
tot_samples = len(data_list)
print(f"Evaluating {tot_samples} samples...")

epoch_results = []
cumulative_mse, cumulative_mae, cumulative_rmse, cumulative_mape = [], [], [], []
for idx, sample in tqdm(enumerate(data_list), total=tot_samples):
    try:
        datetime_list = [
            datetime.fromtimestamp(s).strftime("%Y-%m-%d %H:%M:%S")
            for s in sample["input_timestamps"]
        ]
        text = sample["text"]
        input_ts = sample["input_window"]
        granularity_string = format_time_difference(
            sample["input_timestamps"][1] - sample["input_timestamps"][0]
        )

        if args.indicator == "macd":
            output_ts = sample["out_macd"]
            designed_prompt = finance_macd_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "bb":
            output_ts = sample["out_upper_bb"]
            designed_prompt = finance_bb_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "time":
            output_ts = sample["output_window"]
            designed_prompt = finance_mse_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )

        answer = model.inference(designed_prompt)
        answer = answer.strip().replace('"', '')

        predict_ts = parse_val_prediction_response(answer)
        predict_ts_orig = predict_ts
        predict_ts = np.interp( # type: ignore
            np.linspace(0, 1, len(output_ts)),
            np.linspace(0, 1, len(predict_ts)),
            predict_ts
        )

        res = {
            "filename": sample["filename"],
            "response": answer,
            "ground_truth": output_ts,
            "predict": predict_ts.tolist(),
        }
        result_list.append(res)

        save_to_json(res, details_path / sample["filename"])

        if args.indicator == "macd":
            first_half = sample["in_macd"]
        elif args.indicator == "bb":
            first_half = sample["in_upper_bb"]
        elif args.indicator == "time":
            first_half = sample["input_window"]
        plot_series(sample["filename"], first_half, output_ts, predict_ts_orig, visualizations_path)

        mse = np.mean((np.array(output_ts) - np.array(predict_ts)) ** 2)
        mae = np.mean(np.abs(np.array(output_ts) - np.array(predict_ts)))
        rmse = np.sqrt(mse)
        mape = calculate_mape(output_ts, predict_ts)

        if args.indicator == "macd" and mse > 10:
            print(f"{sample['filename']} failed mse", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "time" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "bb" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        cumulative_mse.append(mse)
        cumulative_mae.append(mae)
        cumulative_rmse.append(rmse)
        cumulative_mape.append(mape)

        epoch_results.append({
            "filename": sample["filename"],
            "epoch": idx + 1,
            "mse": mse,
            "mae": mae,
            "rmse": rmse,
            "mape": mape,
            "mean_mse": np.mean(cumulative_mse),
            "mean_mae": np.mean(cumulative_mae),
            "mean_rmse": np.mean(cumulative_rmse),
            "mean_mape": np.mean(cumulative_mape),
        })
        save_to_json(epoch_results, f"{save_path}/epoch_results.json")
        print(
            "{}/{}: mse: {:.4f}, mae: {:.4f}, rmse: {:.4f}".format(
                idx, tot_samples, mse, mae, rmse
            )
        )
    except Exception as e:
        print(f"Skipping {idx} due to error: {e}")


summary = {
    "total_samples": len(result_list),
    "mse": np.mean(cumulative_mse),
    "mae": np.mean(cumulative_mae),
    "rmse": np.mean(cumulative_rmse),
    "mape": np.mean(cumulative_mape),
}

save_to_json(summary, f"{save_path}/final_results.json")
print(f"Processing complete. Results saved to {save_path}/final_results.json")

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cuda:0


Evaluating 150 samples...


  0%|          | 0/150 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/150 [00:42<1:44:35, 42.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


0/150: mse: 0.8459, mae: 0.8492, rmse: 0.9197


  1%|▏         | 2/150 [01:04<1:15:53, 30.76s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1/150: mse: 22.9970, mae: 4.7940, rmse: 4.7955


  2%|▏         | 3/150 [01:39<1:19:49, 32.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


2/150: mse: 0.0037, mae: 0.0508, rmse: 0.0611


  3%|▎         | 4/150 [02:14<1:21:12, 33.37s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


3/150: mse: 76.7770, mae: 8.7053, rmse: 8.7622


  3%|▎         | 5/150 [02:52<1:24:52, 35.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


4/150: mse: 19.8816, mae: 3.6194, rmse: 4.4589


  4%|▍         | 6/150 [03:26<1:23:40, 34.86s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


5/150: mse: 0.2182, mae: 0.4656, rmse: 0.4672


  5%|▍         | 7/150 [03:59<1:21:10, 34.06s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


6/150: mse: 25.9288, mae: 4.9561, rmse: 5.0920


  5%|▌         | 8/150 [04:01<57:00, 24.09s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 7 due to error: object of type 'NoneType' has no len()


  6%|▌         | 9/150 [04:24<55:23, 23.57s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 8 due to error: object of type 'NoneType' has no len()


  7%|▋         | 10/150 [04:48<55:27, 23.77s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 9 due to error: object of type 'NoneType' has no len()


  7%|▋         | 11/150 [05:22<1:02:18, 26.89s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


10/150: mse: 39.8237, mae: 5.8422, rmse: 6.3106


  8%|▊         | 12/150 [05:56<1:06:56, 29.10s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  700.9013083823581


  9%|▊         | 13/150 [06:30<1:09:53, 30.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


12/150: mse: 0.1646, mae: 0.3587, rmse: 0.4056


  9%|▉         | 14/150 [07:04<1:11:28, 31.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 13 due to error: object of type 'NoneType' has no len()


 10%|█         | 15/150 [07:39<1:13:14, 32.56s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


14/150: mse: 28.3542, mae: 5.0925, rmse: 5.3249


 11%|█         | 16/150 [07:42<52:55, 23.70s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 15 due to error: object of type 'NoneType' has no len()


 11%|█▏        | 17/150 [08:16<59:35, 26.88s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


16/150: mse: 0.0997, mae: 0.2745, rmse: 0.3157


 12%|█▏        | 18/150 [08:50<1:03:53, 29.04s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  528.2930550958761


 13%|█▎        | 19/150 [09:23<1:06:02, 30.25s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 18 due to error: object of type 'NoneType' has no len()


 13%|█▎        | 20/150 [09:48<1:01:42, 28.48s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  105.33068834921436


 14%|█▍        | 21/150 [10:07<55:24, 25.77s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3618.112980882659


 15%|█▍        | 22/150 [10:41<1:00:14, 28.24s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


21/150: mse: 4.7247, mae: 1.6368, rmse: 2.1736


 15%|█▌        | 23/150 [11:16<1:03:44, 30.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  216.3067308101457


 16%|█▌        | 24/150 [11:50<1:05:35, 31.24s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


23/150: mse: 82.1260, mae: 2.4405, rmse: 9.0623


 17%|█▋        | 25/150 [12:22<1:05:31, 31.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


24/150: mse: 19.1604, mae: 3.5218, rmse: 4.3773


 17%|█▋        | 26/150 [12:46<1:00:30, 29.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 25 due to error: object of type 'NoneType' has no len()


 18%|█▊        | 27/150 [13:20<1:02:44, 30.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


26/150: mse: 5.9241, mae: 1.9886, rmse: 2.4339


 19%|█▊        | 28/150 [13:54<1:04:28, 31.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


27/150: mse: 10.3320, mae: 3.1963, rmse: 3.2143


 19%|█▉        | 29/150 [14:28<1:05:29, 32.48s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


28/150: mse: 91.9047, mae: 7.9794, rmse: 9.5867


 20%|██        | 30/150 [14:52<59:33, 29.78s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  476.9762509445823


 21%|██        | 31/150 [15:12<53:17, 26.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 30 due to error: object of type 'NoneType' has no len()


 21%|██▏       | 32/150 [15:46<57:26, 29.21s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


31/150: mse: 1.8976, mae: 1.1604, rmse: 1.3775


 22%|██▏       | 33/150 [16:13<55:16, 28.34s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3141.964168084717


 23%|██▎       | 34/150 [16:47<58:15, 30.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


33/150: mse: 71.1251, mae: 4.5253, rmse: 8.4336


 23%|██▎       | 35/150 [17:20<59:40, 31.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


34/150: mse: 1.9321, mae: 1.3900, rmse: 1.3900


 24%|██▍       | 36/150 [17:54<1:00:47, 32.00s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2844.409478853146


 25%|██▍       | 37/150 [18:12<51:56, 27.58s/it]  Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  398117.3293776806


 25%|██▌       | 38/150 [18:46<55:12, 29.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


37/150: mse: 1.9498, mae: 1.2445, rmse: 1.3964


 26%|██▌       | 39/150 [18:48<39:43, 21.47s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 38 due to error: object of type 'NoneType' has no len()


 27%|██▋       | 40/150 [19:23<46:28, 25.35s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


39/150: mse: 23.6721, mae: 4.7220, rmse: 4.8654


 27%|██▋       | 41/150 [19:57<50:46, 27.95s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


40/150: mse: 1.9672, mae: 1.3886, rmse: 1.4026


 28%|██▊       | 42/150 [20:31<53:43, 29.85s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


41/150: mse: 1.1987, mae: 0.8148, rmse: 1.0949


 29%|██▊       | 43/150 [20:57<51:03, 28.63s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  40686.68298927422


 29%|██▉       | 44/150 [21:30<52:56, 29.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


43/150: mse: 0.6337, mae: 0.6894, rmse: 0.7961


 30%|███       | 45/150 [21:51<47:51, 27.35s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  40950.394796612185


 31%|███       | 46/150 [22:26<51:03, 29.46s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


45/150: mse: 1.3492, mae: 0.9801, rmse: 1.1616


 31%|███▏      | 47/150 [23:00<52:54, 30.82s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1589.628304076915


 32%|███▏      | 48/150 [23:34<54:18, 31.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


47/150: mse: 6.3105, mae: 2.4742, rmse: 2.5121


 33%|███▎      | 49/150 [24:08<54:56, 32.64s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


48/150: mse: 1.2537, mae: 1.0573, rmse: 1.1197


 33%|███▎      | 50/150 [24:42<54:58, 32.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


49/150: mse: 13.4119, mae: 3.1323, rmse: 3.6622


 34%|███▍      | 51/150 [25:17<55:10, 33.44s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


50/150: mse: 0.1518, mae: 0.3485, rmse: 0.3897


 35%|███▍      | 52/150 [25:51<55:07, 33.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  41540.656849687046


 35%|███▌      | 53/150 [26:25<54:38, 33.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


52/150: mse: 0.5975, mae: 0.5600, rmse: 0.7730


 36%|███▌      | 54/150 [26:49<49:22, 30.86s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 53 due to error: object of type 'NoneType' has no len()


 37%|███▋      | 55/150 [27:23<50:19, 31.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


54/150: mse: 0.9298, mae: 0.7993, rmse: 0.9642


 37%|███▋      | 56/150 [27:57<50:43, 32.38s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


55/150: mse: 55.1217, mae: 6.7066, rmse: 7.4244


 38%|███▊      | 57/150 [28:31<51:02, 32.93s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


56/150: mse: 0.0271, mae: 0.1371, rmse: 0.1647


 39%|███▊      | 58/150 [29:07<51:52, 33.83s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


57/150: mse: 1.0896, mae: 1.0229, rmse: 1.0439


 39%|███▉      | 59/150 [29:39<50:16, 33.15s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  198.90615197188268


 40%|████      | 60/150 [30:13<50:21, 33.57s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


59/150: mse: 16.8147, mae: 3.4946, rmse: 4.1006


 41%|████      | 61/150 [30:44<48:43, 32.85s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  577.989566666795


 41%|████▏     | 62/150 [31:19<48:48, 33.28s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


61/150: mse: 1.1032, mae: 0.8711, rmse: 1.0503


 42%|████▏     | 63/150 [31:52<48:22, 33.36s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 62 due to error: object of type 'NoneType' has no len()


 43%|████▎     | 64/150 [32:26<48:10, 33.61s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


63/150: mse: 37.0748, mae: 5.8143, rmse: 6.0889


 43%|████▎     | 65/150 [33:00<47:44, 33.70s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


64/150: mse: 0.1589, mae: 0.3415, rmse: 0.3986


 44%|████▍     | 66/150 [33:34<47:22, 33.84s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 65 due to error: object of type 'NoneType' has no len()


 45%|████▍     | 67/150 [34:09<46:59, 33.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


66/150: mse: 13.0556, mae: 3.3093, rmse: 3.6133


 45%|████▌     | 68/150 [34:43<46:23, 33.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


67/150: mse: 0.1903, mae: 0.4074, rmse: 0.4362


 46%|████▌     | 69/150 [35:17<45:59, 34.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


68/150: mse: 0.0292, mae: 0.1480, rmse: 0.1709


 47%|████▋     | 70/150 [35:51<45:30, 34.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


69/150: mse: 0.0546, mae: 0.2309, rmse: 0.2336


 47%|████▋     | 71/150 [36:25<44:56, 34.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


70/150: mse: 0.9755, mae: 0.8152, rmse: 0.9877


 48%|████▊     | 72/150 [37:00<44:23, 34.15s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


71/150: mse: 10.5923, mae: 3.1859, rmse: 3.2546


 49%|████▊     | 73/150 [37:34<43:45, 34.10s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


72/150: mse: 1.5190, mae: 1.0801, rmse: 1.2325


 49%|████▉     | 74/150 [38:07<43:04, 34.00s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


73/150: mse: 0.2581, mae: 0.4641, rmse: 0.5080


 50%|█████     | 75/150 [38:41<42:34, 34.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


74/150: mse: 0.9267, mae: 0.8128, rmse: 0.9626


 51%|█████     | 76/150 [38:58<35:25, 28.72s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  7980.612716783116


 51%|█████▏    | 77/150 [39:33<37:27, 30.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3178.2312913885676


 52%|█████▏    | 78/150 [40:07<38:08, 31.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


77/150: mse: 2.3037, mae: 1.3086, rmse: 1.5178


 53%|█████▎    | 79/150 [40:41<38:21, 32.42s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


78/150: mse: 76.6248, mae: 8.3070, rmse: 8.7536


 53%|█████▎    | 80/150 [41:15<38:24, 32.92s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


79/150: mse: 7.9735, mae: 2.4976, rmse: 2.8237


 54%|█████▍    | 81/150 [41:49<38:08, 33.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


80/150: mse: 61.1492, mae: 5.1761, rmse: 7.8198


 55%|█████▍    | 82/150 [42:23<37:57, 33.50s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


81/150: mse: 7.6197, mae: 2.6670, rmse: 2.7604


 55%|█████▌    | 83/150 [42:58<37:38, 33.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


82/150: mse: 2.4427, mae: 1.5203, rmse: 1.5629


 56%|█████▌    | 84/150 [43:31<37:07, 33.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


83/150: mse: 3.9027, mae: 1.6960, rmse: 1.9755


 57%|█████▋    | 85/150 [44:06<36:51, 34.03s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


84/150: mse: 1.0694, mae: 0.8598, rmse: 1.0341


 57%|█████▋    | 86/150 [44:33<33:57, 31.84s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 85 due to error: object of type 'NoneType' has no len()


 58%|█████▊    | 87/150 [45:07<34:06, 32.49s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


86/150: mse: 3.3619, mae: 1.4998, rmse: 1.8335


 59%|█████▊    | 88/150 [45:41<34:03, 32.96s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


87/150: mse: 0.2610, mae: 0.4469, rmse: 0.5109


 59%|█████▉    | 89/150 [46:16<34:00, 33.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


88/150: mse: 1.0438, mae: 0.9653, rmse: 1.0217


 60%|██████    | 90/150 [46:50<33:35, 33.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


89/150: mse: 0.1461, mae: 0.3167, rmse: 0.3823


 61%|██████    | 91/150 [47:11<29:34, 30.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


90/150: mse: 84.0547, mae: 9.1636, rmse: 9.1681


 61%|██████▏   | 92/150 [47:46<30:16, 31.31s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


91/150: mse: 26.0817, mae: 4.5421, rmse: 5.1070


 62%|██████▏   | 93/150 [48:19<30:26, 32.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


92/150: mse: 0.0316, mae: 0.1439, rmse: 0.1778


 63%|██████▎   | 94/150 [48:54<30:31, 32.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


93/150: mse: 32.0015, mae: 5.6354, rmse: 5.6570


 63%|██████▎   | 95/150 [49:28<30:27, 33.23s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


94/150: mse: 5.8525, mae: 2.4025, rmse: 2.4192


 64%|██████▍   | 96/150 [50:02<30:12, 33.57s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


95/150: mse: 0.0754, mae: 0.0994, rmse: 0.2745


 65%|██████▍   | 97/150 [50:37<29:48, 33.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2283.7056870677334


 65%|██████▌   | 98/150 [51:12<29:36, 34.17s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


97/150: mse: 1.7423, mae: 1.2756, rmse: 1.3200


 66%|██████▌   | 99/150 [51:46<29:01, 34.15s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


98/150: mse: 2.0403, mae: 1.2102, rmse: 1.4284


 67%|██████▋   | 100/150 [52:20<28:26, 34.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


99/150: mse: 4.0408, mae: 1.4739, rmse: 2.0102


 67%|██████▋   | 101/150 [52:53<27:44, 33.96s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


100/150: mse: 0.2483, mae: 0.4594, rmse: 0.4983


 68%|██████▊   | 102/150 [53:28<27:16, 34.09s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


101/150: mse: 0.8308, mae: 0.7757, rmse: 0.9115


 69%|██████▊   | 103/150 [54:02<26:47, 34.21s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  131.2090219042981


 69%|██████▉   | 104/150 [54:34<25:42, 33.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


103/150: mse: 0.0508, mae: 0.1979, rmse: 0.2254


 70%|███████   | 105/150 [55:08<25:17, 33.72s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


104/150: mse: 8.1996, mae: 2.8093, rmse: 2.8635


 71%|███████   | 106/150 [55:42<24:47, 33.81s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


105/150: mse: 1.5852, mae: 0.9866, rmse: 1.2591


 71%|███████▏  | 107/150 [56:17<24:26, 34.11s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


106/150: mse: 19.4977, mae: 4.3929, rmse: 4.4156


 72%|███████▏  | 108/150 [56:21<17:23, 24.84s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 107 due to error: object of type 'NoneType' has no len()


 73%|███████▎  | 109/150 [56:54<18:49, 27.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  143.44331841778182


 73%|███████▎  | 110/150 [57:28<19:35, 29.39s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


109/150: mse: 0.5172, mae: 0.7052, rmse: 0.7192


 74%|███████▍  | 111/150 [58:02<20:01, 30.81s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


110/150: mse: 6.0697, mae: 2.2230, rmse: 2.4637


 75%|███████▍  | 112/150 [58:36<20:05, 31.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


111/150: mse: 0.0399, mae: 0.1820, rmse: 0.1997


 75%|███████▌  | 113/150 [59:10<20:00, 32.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


112/150: mse: 0.2538, mae: 0.4163, rmse: 0.5038


 76%|███████▌  | 114/150 [59:30<17:11, 28.66s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 113 due to error: object of type 'NoneType' has no len()


 77%|███████▋  | 115/150 [1:00:04<17:35, 30.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


114/150: mse: 8.2271, mae: 0.9703, rmse: 2.8683


 77%|███████▋  | 116/150 [1:00:37<17:34, 31.03s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


115/150: mse: 5.7682, mae: 2.3497, rmse: 2.4017


 78%|███████▊  | 117/150 [1:01:11<17:35, 31.97s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  196.86882755131728


 79%|███████▊  | 118/150 [1:01:45<17:24, 32.64s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


117/150: mse: 0.2524, mae: 0.3949, rmse: 0.5024


 79%|███████▉  | 119/150 [1:02:19<17:05, 33.10s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


118/150: mse: 2.2371, mae: 1.1863, rmse: 1.4957


 80%|████████  | 120/150 [1:02:37<14:11, 28.38s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  22445.053951282058


 81%|████████  | 121/150 [1:03:11<14:31, 30.05s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


120/150: mse: 2.8633, mae: 1.4080, rmse: 1.6921


 81%|████████▏ | 122/150 [1:03:44<14:32, 31.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


121/150: mse: 0.1269, mae: 0.3048, rmse: 0.3562


 82%|████████▏ | 123/150 [1:04:18<14:25, 32.06s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


122/150: mse: 20.7818, mae: 3.7750, rmse: 4.5587


 83%|████████▎ | 124/150 [1:04:50<13:48, 31.88s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


123/150: mse: 0.6651, mae: 0.7194, rmse: 0.8155


 83%|████████▎ | 125/150 [1:05:24<13:34, 32.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


124/150: mse: 76.2882, mae: 8.2835, rmse: 8.7343


 84%|████████▍ | 126/150 [1:05:59<13:15, 33.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


125/150: mse: 0.9917, mae: 0.9525, rmse: 0.9958


 85%|████████▍ | 127/150 [1:06:32<12:47, 33.36s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


126/150: mse: 52.5301, mae: 5.5033, rmse: 7.2478


 85%|████████▌ | 128/150 [1:07:06<12:17, 33.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


127/150: mse: 17.9861, mae: 1.4555, rmse: 4.2410


 86%|████████▌ | 129/150 [1:07:40<11:47, 33.69s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  196.15869544468552


 87%|████████▋ | 130/150 [1:08:15<11:17, 33.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


129/150: mse: 0.0460, mae: 0.2071, rmse: 0.2146


 87%|████████▋ | 131/150 [1:08:49<10:44, 33.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


130/150: mse: 44.1334, mae: 6.5380, rmse: 6.6433


 88%|████████▊ | 132/150 [1:09:22<10:09, 33.86s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


131/150: mse: 1.6872, mae: 1.1641, rmse: 1.2989


 89%|████████▊ | 133/150 [1:09:55<09:29, 33.52s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3653.42044059902


 89%|████████▉ | 134/150 [1:10:29<08:57, 33.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


133/150: mse: 1.2975, mae: 0.8383, rmse: 1.1391


 90%|█████████ | 135/150 [1:11:03<08:26, 33.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


134/150: mse: 0.1846, mae: 0.3668, rmse: 0.4296


 91%|█████████ | 136/150 [1:11:36<07:50, 33.62s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  10851.531301125704


 91%|█████████▏| 137/150 [1:12:11<07:19, 33.81s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


136/150: mse: 2.6559, mae: 1.3587, rmse: 1.6297


 92%|█████████▏| 138/150 [1:12:45<06:46, 33.88s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


137/150: mse: 84.9996, mae: 8.8937, rmse: 9.2195


 93%|█████████▎| 139/150 [1:13:19<06:12, 33.90s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


138/150: mse: 0.1419, mae: 0.3750, rmse: 0.3767


 93%|█████████▎| 140/150 [1:13:53<05:40, 34.07s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


139/150: mse: 15.6635, mae: 3.7166, rmse: 3.9577


 94%|█████████▍| 141/150 [1:14:27<05:05, 33.95s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


140/150: mse: 82.3389, mae: 7.5817, rmse: 9.0741


 95%|█████████▍| 142/150 [1:14:29<03:15, 24.42s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 141 due to error: object of type 'NoneType' has no len()


 95%|█████████▌| 143/150 [1:15:02<03:08, 26.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


142/150: mse: 37.1552, mae: 4.6159, rmse: 6.0955


 96%|█████████▌| 144/150 [1:15:34<02:51, 28.64s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


143/150: mse: 10.3197, mae: 3.0265, rmse: 3.2124


 97%|█████████▋| 145/150 [1:16:08<02:31, 30.20s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


144/150: mse: 37.9575, mae: 6.1608, rmse: 6.1610


 97%|█████████▋| 146/150 [1:16:32<01:53, 28.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


145/150: mse: 1.2719, mae: 1.1209, rmse: 1.1278


 98%|█████████▊| 147/150 [1:17:06<01:30, 30.01s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


146/150: mse: 23.5279, mae: 4.0123, rmse: 4.8506


 99%|█████████▊| 148/150 [1:17:40<01:02, 31.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


147/150: mse: 34.1707, mae: 5.8025, rmse: 5.8456


 99%|█████████▉| 149/150 [1:18:14<00:31, 31.96s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


148/150: mse: 4.0269, mae: 1.4963, rmse: 2.0067


100%|██████████| 150/150 [1:18:46<00:00, 31.51s/it]

149/150: mse: 0.3339, mae: 0.5534, rmse: 0.5778
Processing complete. Results saved to MTBench-Test/llama/tsforecast_short/final_results.json



