### 1. Install Dependencies

In [1]:
!pip install pandas gdown huggingface-hub numpy matplotlib scikit-learn transformers torch tqdm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

### 2. Imports

In [3]:
import os
import json
import sys
import argparse
import re
from typing import List, Union
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from google.colab import drive
from datetime import datetime

from abc import ABC, abstractmethod

import torch
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [4]:
torch.cuda.is_available()

True

### 3. Connect to Google Drive

In [5]:
drive.mount("/content/drive",force_remount=True)
os.chdir("/content/drive/My Drive")

Mounted at /content/drive


### 4. Utils Code

In [6]:
def format_time_difference(seconds):
    minutes = seconds // 60
    hours = minutes // 60
    days = hours // 24

    if days > 0:
        if hours % 24 > 0.1:
            return f"{days} days-{hours % 24} hours"
        else:
            return f"{days} days"
    elif hours > 0:
        if minutes % 60 > 0.1:
            return f"{hours} hours-{minutes % 60} minutes"
        else:
            return f"{hours} hours"
    elif minutes > 0:
        if seconds % 60 > 0.1:
            return f"{minutes} minutes-{seconds % 60} seconds"
        else:
            return f"{minutes} minutes"
    else:
        return f"{seconds} seconds"

def save_to_json(data, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    with open(save_path, "w") as f:
        json.dump(data, f, indent=4)

def plot_series(filename, input_ts, output_ts, predicted_ts, save_folder):
    plt.figure(figsize=(10, 5))
    plt.plot(range(len(input_ts)), input_ts, label="Input Time Series", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(output_ts)), output_ts, label="Ground Truth", marker='o')
    plt.plot(range(len(input_ts), len(input_ts) + len(predicted_ts)), predicted_ts, label="Predicted", linestyle='dashed')
    plt.legend()
    plt.title(f"Prediction for {filename}")
    plt.xlabel("Time Steps")
    plt.ylabel("Value")
    plt.grid()

    base_name = os.path.splitext(filename)[0]
    save_path = os.path.join(save_folder, base_name + ".png")
    plt.savefig(save_path)

    plt.close()

def calculate_mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)

    # Avoid division by zero
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100


def calculate_acc(result_list, regrouped_labels = None):
    if regrouped_labels is None:
        correct_pred = sum(1 for result in result_list if result["ground_truth"] in result["predict"])
    else:
        correct_pred = 0
        for result in result_list:
            gt_group = regrouped_labels[result['ground_truth']]
            for original_label in regrouped_labels.keys():
                if original_label in result['predict']:
                    predict_group = regrouped_labels[original_label]
                    if gt_group == predict_group:
                        correct_pred += 1
                        break

    total_pred = len(result_list)
    accuracy = correct_pred / total_pred

    return accuracy


def calculate_correlation_acc(result_list):
    model_predictions = {"total": 0, "exact_correct": 0, "brief_correct": 0}
    positive_correlations = ["Strong Positive Correlation", "Moderate Positive Correlation"]
    negative_correlations = ["Strong Negative Correlation", "Moderate Negative Correlation"]
    for result in result_list:
        prediction = result["predict"].strip()
        model_predictions["total"] += 1
        if prediction == result["ground_truth"]:
            model_predictions["exact_correct"] += 1

        # Brief accuracy
        pred_is_positive = prediction in positive_correlations
        pred_is_negative = prediction in negative_correlations
        truth_is_positive = result["ground_truth"] in positive_correlations
        truth_is_negative = result["ground_truth"] in negative_correlations

        if (pred_is_positive and truth_is_positive) or \
            (pred_is_negative and truth_is_negative) or \
            (prediction == result["ground_truth"]):
            model_predictions["brief_correct"] += 1

    # Calculate and format results
    total = model_predictions["total"]
    exact_accuracy = (model_predictions["exact_correct"] / total) * 100
    brief_accuracy = (model_predictions["brief_correct"] / total) * 100

    metric_results = {
        "exact_accuracy": f"{round(exact_accuracy, 2)}%",
        "brief_accuracy": f"{round(brief_accuracy, 2)}%",
        "total_samples": total
    }
    return metric_results


def calculate_mcqa_acc(result_list):
    correct = 0
    total = 0
    for result in result_list:
        predition = result["predict"].strip()
        predition = predition[0].upper()
        if predition == result["ground_truth"]:
            correct += 1

        total += 1

    accuracy = correct / total

    return accuracy * 100

### 5. Models Code

In [7]:
class BaseModel(ABC):
    @abstractmethod
    def inference(self, content: str) -> str:
        """
        Run inference on a given input prompt and return the generated output.
        """
        pass

In [8]:
class DeepSeekModel(BaseModel):
    def __init__(self, model_name: str = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", **kwargs):

        # The model is set in eval mode by default by using eval()
        # See: https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            device_map="auto",
            **kwargs
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        chat_prompt = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        tokenized_input = self.tokenizer([chat_prompt], return_tensors="pt").to(self.model.device)
        generated_output = self.model.generate(
            **tokenized_input,
            max_new_tokens=4096,
        )
        output_ids = generated_output[0][len(tokenized_input.input_ids[0]):].tolist()

        # parsing thinking content
        try:
            # rindex finding 151649 (</think>)
            index = len(output_ids) - output_ids[::-1].index(151649)
        except ValueError:
            index = 0
        outputs = self.tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

        return outputs

In [None]:
class LLaMAModel(BaseModel):
    def __init__(self, model_name: str = "meta-llama/Llama-3.2-1B-Instruct", **kwargs):
        self.pipeline = pipeline(
            "text-generation",
            model=model_name,
            torch_dtype="auto",
            device_map="auto",
            token="",
            **kwargs
        )

    def inference(self, content: str) -> str:
        messages = [{"role": "user", "content": content}]

        outputs = self.pipeline(messages, max_new_tokens=1024)

        return outputs[0]["generated_text"][-1]["content"]

In [10]:
class ModelFactory:
    def __init__(self, config: dict):
        self.config = config

    @staticmethod
    def get_model(model_type: str, model_name: str, **kwargs) -> BaseModel:
        if model_type == "deepseek":
            return DeepSeekModel(model_name=model_name, **kwargs)
        elif model_type == "llama":
            return LLaMAModel(model_name=model_name, **kwargs)
        else:
            raise ValueError(f"Unsupported model type: {model_type}")

### 6. Experiment Code --

In [11]:
def finance_mse_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:
    """
    Generates a meta-prompt for hypothetical stock price trend analysis
    based on given inputs.

    Args:
        text (str): News article content within the input time series range.
        prices (List[float]): Historical stock prices.
        start_datetime (str): Start datetime of the input time series.
        end_datetime (str): End datetime of the input time series.
        pred_end_datetime (str): End datetime of the hypothetical projection.
        granularity (str): Granularity of the input time series (e.g., daily, hourly).
        prediction_length (int): Number of future time steps to estimate.
        mode (str): Mode of estimation ("timeseries_only", "text_only", "combined").

    Returns:
        str: Meta-prompt for ChatGPT.
    """
    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to conduct a research-based timeseries estimation for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices and extrapolate potential movements. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated values in a structured format as a  list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def finance_macd_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future Moving Average Convergence Divergence (MACD) values for the next {prediction_length} time steps "
        f"based on provided historical timeseries movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant text sentiment analysis "
            f"The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your predicted MACD values in a structured format as a list of float numbers. Please predict the real possible values, do not use the naive linear extrapolation or similar methods"
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of predicted values should be exactly {prediction_length}. "
    )

    return prompt

def finance_bb_metaprompt_generation(
    text: str,
    prices: List[float],
    start_datetime: str,
    end_datetime: str,
    pred_end_datetime: str,
    granularity: str,
    prediction_length: int,
    mode: str,
) -> str:

    prompt = (
        f"You are an AI assistant trained in data analysis and modeling. "
        f"Your task is to Predict the future upper Bollinger Band (BB) values  for the next {prediction_length} time steps "
        f"based on provided historical price movements and/or related news articles. "
        # f"This analysis aims to explore patterns in the given dataset and should not be considered financial advice. "
        f"The input time series spans from {start_datetime} to {end_datetime}, with a granularity of {granularity}. "
        f"The estimation period extends from {end_datetime} to {pred_end_datetime}, maintaining the same granularity."
    )

    if mode == "timeseries_only":
        prompt += (
            "You will analyze the numerical patterns in historical prices. "
            f"The input prices are: {prices}. "
        )
    elif mode == "text_only":
        prompt += (
            "You will analyze sentiment and potential market impacts from the following news article content: "
            f"{text}. "
        )
    elif mode == "combined":
        prompt += (
            "You will use both historical price movements and relevant news sentiment analysis "
            f"to explore hypothetical market trends. The input prices are: {prices}. The news article states: {text}. "
        )
    else:
        raise ValueError(
            "Invalid mode. Choose from 'timeseries_only', 'text_only', or 'combined'."
        )

    prompt += (
        "\n\nPlease return your estimated upper Bollinger Band (BB) values values in a structured format as a list of float numbers. "
        "Ensure the output follows this format strictly: "
        "\nPredicted Prices: value1, value2, ..., valueN. "
        f"The number of estimated values should be exactly {prediction_length}. "
    )

    return prompt

def parse_val_prediction_response(response: str) -> Union[List[float], None]:
    """
    Decodes the predicted prices from a response string.

    Args:
        response (str): The response containing the predicted prices.

    Returns:
        List[float]: A list of float numbers extracted from the response.
        None: If extraction fails.
    """
    match = re.search(r"Predicted Prices:\s*([-\d.,\s]+)", response)

    if match:
        try:
            price_list = [float(value) for value in match.group(1).split(',')]
            return price_list
        except ValueError:
            pass  # If conversion fails, try another approach

    # Alternative approach: Find all potential numbers in the response
    possible_numbers = re.findall(r"-?\d+\.\d+", response)
    if possible_numbers:
        try:
            return [float(num) for num in possible_numbers]
        except ValueError:
            pass  # If conversion fails, return None

    return None  # Return None if extraction fails

def finance_classification_metaprompt_generation(text=None, timestamps=None, prices=None, mode=None):
    time_series_data = ", ".join([f"{price}" for price in  prices])

    if mode == "combined":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend (rise, neutral, or fall) based on the following inputs:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            2. **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Task:
            Analyze the provided time-series data and news to identify future trends of the stock performance. Ensure that the news data is used to supplement the insights from the time-series analysis, focusing on combining both inputs for a more accurate prediction.

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            please think step-by-step and briefly explain how the combination of time-series data and news data led to the prediction;
            then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "text_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            **News Data**:
            - This includes news headlines and summaries relevant to the stock's company or sector.
            - Example data format:
                {text}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the news semantics to identify trends and patterns that could impact stock performance.
            Then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    elif mode == "timeseries_only":
        meta_prompt = f"""
            You are a financial prediction expert with knowledge of advanced machine learning models and time-series analysis.
            Your goal is to predict the stock trend with given labels based on the following input:

            1. **Time Series Stock Price Data**:
            - This data includes stock prices recorded at 1-hour intervals over the last month from {timestamps[0]} to {timestamps[-1]}.
            - Example data format:
                {time_series_data}

            ### Output:
            Provide a prediction for the stock trend categorized one of the following labels:
            - "<-4%"
            - "-2% ~ -4%"
            - "-2% ~ +2%"
            - "+2% ~ +4%"
            - ">+4%"

            ### Task:
            Analyze the provided time-series data to identify trends and patterns that could impact stock performance. Focus solely on the time-series data for making predictions.
             then wrap your final answer in the final predicted label in the format ^^^label^^^
        """

    return meta_prompt

def parse_cls_response(answer):
    try:
        return  re.findall(r'\^\^\^(.*?)\^\^\^', answer)[-1]
    except:
        return  re.findall(r'\^+(.*?)\^+', answer)[-1]



def finance_correlation_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news):

    time_interval = "1 hour" if setting == "long" else "5 minutes"

    if setting == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 7 days and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    else:
        system_prompt = "You are an expert in finance and stock market analysis. Based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series, your task is to predict the correlation between the stock's price fluctuations in the next 1 day and the analysis sentiment (positive correlation indicates that positive analysis leads to price increase and negative analysis leads to price decrease). Take into account external factors or market conditions that might affect stock price movement."
    question = "Return your answer in one of the following without any other words: Strong Positive Correlation, Moderate Positive Correlation, No Correlation, Moderate Negative Correlation, Strong Negative Correlation."
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            {question} Answer:"
    prompt = f"{system_prompt}\n\n{query}"

    return prompt




def finance_mcqa_metaprompt_generation(setting, sticker, time1, time2, in_price, news, time_news, question):
    time_interval = "1 hour" if setting == "long" else "5 minutes"
    if setting  == "long":
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 30-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    else:
        system_prompt ="You are an expert in finance and stock market analysis. Your task is to answer the question based on the given 7-day historical stock price time series and a financial analysis published at the last timestamp of the time series. Return your answer only in the letter (A, B, C, or D). "
    query = f"stock price of {sticker} between {time1} to {time2}, time interval is {time_interval}: \
            {in_price}\
            News published at {time_news}: \
            {news}\
            Question: {question}. Give your answer in the letter (A, B, C, or D) without any other words. Answer:"
    prompt = f"{system_prompt}\n\n{query}"
    return prompt

In [12]:
# import sys

# sys.argv = [
#     "script_name",  # Placeholder for script name (ignored by argparse)
#     "--dataset_folder", "./MTBench-Test/MTBench_finance_QA_short",
#     "--save_path", "./MTBench-Test/llama/qa_short",
#     "--model_type", "llama",
#     "--model", "meta-llama/Llama-3.2-1B-Instruct",
#     "--setting", "short"
# ]

import sys

sys.argv = [
    "script_name",  # Placeholder for script name (ignored by argparse)
    "--dataset_path", "./MTBench-Test/MTBench_finance_aligned_pairs_long/train-00000-of-00001.parquet",
    "--save_path", "./MTBench-Test/llama/tsforecast_long_combined",
    "--model_type", "llama",
    "--model", "meta-llama/Llama-3.2-1B-Instruct",
    "--mode", "combined",
    "--indicator", "time"
]

In [13]:
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path", type=str, help="path to the dataset")
parser.add_argument("--save_path", type=str, help="path to save the results")
parser.add_argument("--indicator", default="macd", type=str, help="macd, bb, or time")
parser.add_argument("--model_type",  type=str, help="deepseek or llama")
parser.add_argument("--model",  type=str, help="model name")
parser.add_argument(
    "--mode",
    type=str,
    default="combined",
    help="choose from timeseries_only, text_only, combined",
)
args = parser.parse_args()

save_path = Path(args.save_path)
details_path = save_path / "output_details"
visualizations_path = save_path / "visualizations"
details_path.mkdir(parents=True, exist_ok=True)
visualizations_path.mkdir(parents=True, exist_ok=True)

data_list = []
df = pd.read_parquet(args.dataset_path)
filename = Path(args.dataset_path).name

df["text"] = df["text"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
df["technical"] = df["technical"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)

for col in ["input_window", "output_window", "input_timestamps"]:
    df[col] = df[col].apply(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)

for _, row in df.iterrows():
    text = row["text"]
    technical = row["technical"]

    extracted_data = {
        "filename": filename,
        "input_window": row["input_window"],
        "output_window": row["output_window"],
        "text": text["content"],
        "input_timestamps": row["input_timestamps"],
        "in_macd": technical.get("in_macd"),
        "out_macd": technical.get("out_macd"),
        "in_upper_bb": technical.get("in_upper_bb"),
        "out_upper_bb": technical.get("out_upper_bb"),
    }

    data_list.append(extracted_data)
data_list = data_list[:100]

model = ModelFactory.get_model(model_type=args.model_type, model_name=args.model)

result_list = []
tot_samples = len(data_list)
print(f"Evaluating {tot_samples} samples...")

epoch_results = []
cumulative_mse, cumulative_mae, cumulative_rmse, cumulative_mape = [], [], [], []
for idx, sample in tqdm(enumerate(data_list), total=tot_samples):
    try:
        datetime_list = [
            datetime.fromtimestamp(s).strftime("%Y-%m-%d %H:%M:%S")
            for s in sample["input_timestamps"]
        ]
        text = sample["text"]
        input_ts = sample["input_window"]
        granularity_string = format_time_difference(
            sample["input_timestamps"][1] - sample["input_timestamps"][0]
        )

        if args.indicator == "macd":
            output_ts = sample["out_macd"]
            designed_prompt = finance_macd_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "bb":
            output_ts = sample["out_upper_bb"]
            designed_prompt = finance_bb_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )
        elif args.indicator == "time":
            output_ts = sample["output_window"]
            designed_prompt = finance_mse_metaprompt_generation(
                text=text,
                prices=input_ts,
                start_datetime=datetime_list[0],
                end_datetime=datetime_list[-1],
                pred_end_datetime=output_ts[-1],
                granularity=granularity_string,
                prediction_length=len(output_ts),
                mode=args.mode,
            )

        answer = model.inference(designed_prompt)
        answer = answer.strip().replace('"', '')

        predict_ts = parse_val_prediction_response(answer)
        predict_ts_orig = predict_ts
        predict_ts = np.interp( # type: ignore
            np.linspace(0, 1, len(output_ts)),
            np.linspace(0, 1, len(predict_ts)),
            predict_ts
        )

        res = {
            "filename": sample["filename"],
            "response": answer,
            "ground_truth": output_ts,
            "predict": predict_ts.tolist(),
        }
        result_list.append(res)

        save_to_json(res, details_path / sample["filename"])

        if args.indicator == "macd":
            first_half = sample["in_macd"]
        elif args.indicator == "bb":
            first_half = sample["in_upper_bb"]
        elif args.indicator == "time":
            first_half = sample["input_window"]
        plot_series(sample["filename"], first_half, output_ts, predict_ts_orig, visualizations_path)

        mse = np.mean((np.array(output_ts) - np.array(predict_ts)) ** 2)
        mae = np.mean(np.abs(np.array(output_ts) - np.array(predict_ts)))
        rmse = np.sqrt(mse)
        mape = calculate_mape(output_ts, predict_ts)

        if args.indicator == "macd" and mse > 10:
            print(f"{sample['filename']} failed mse", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "time" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        if args.indicator == "bb" and mse > 100:
            print(f"{sample['filename']} failed mse ", mse)
            epoch_results.append({
                "filename": sample["filename"],
                "failed": True,
                "epoch": idx + 1,
                "mse": mse,
                "mae": mae,
                "rmse": rmse,
                "mape": mape,
            })
            continue

        cumulative_mse.append(mse)
        cumulative_mae.append(mae)
        cumulative_rmse.append(rmse)
        cumulative_mape.append(mape)

        epoch_results.append({
            "filename": sample["filename"],
            "epoch": idx + 1,
            "mse": mse,
            "mae": mae,
            "rmse": rmse,
            "mape": mape,
            "mean_mse": np.mean(cumulative_mse),
            "mean_mae": np.mean(cumulative_mae),
            "mean_rmse": np.mean(cumulative_rmse),
            "mean_mape": np.mean(cumulative_mape),
        })
        save_to_json(epoch_results, f"{save_path}/epoch_results.json")
        print(
            "{}/{}: mse: {:.4f}, mae: {:.4f}, rmse: {:.4f}".format(
                idx, tot_samples, mse, mae, rmse
            )
        )
    except Exception as e:
        print(f"Skipping {idx} due to error: {e}")


summary = {
    "total_samples": len(result_list),
    "mse": np.mean(cumulative_mse),
    "mae": np.mean(cumulative_mae),
    "rmse": np.mean(cumulative_rmse),
    "mape": np.mean(cumulative_mape),
}

save_to_json(summary, f"{save_path}/final_results.json")
print(f"Processing complete. Results saved to {save_path}/final_results.json")

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Device set to use cuda:0


Evaluating 100 samples...


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/100 [00:34<56:36, 34.31s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1400.6330431826182


  2%|▏         | 2/100 [01:07<54:50, 33.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1002.7979566102273


  3%|▎         | 3/100 [01:39<53:01, 32.79s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 2 due to error: object of type 'NoneType' has no len()


  4%|▍         | 4/100 [02:11<51:57, 32.48s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


3/100: mse: 88.7224, mae: 8.9228, rmse: 9.4193


  5%|▌         | 5/100 [02:32<45:16, 28.60s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


4/100: mse: 2.7209, mae: 1.0864, rmse: 1.6495


  6%|▌         | 6/100 [03:06<47:10, 30.11s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  585.5353986691936


  7%|▋         | 7/100 [03:41<49:23, 31.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3244.739074440171


  8%|▊         | 8/100 [04:15<49:48, 32.49s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


7/100: mse: 0.0552, mae: 0.1817, rmse: 0.2350


  9%|▉         | 9/100 [04:48<49:43, 32.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


8/100: mse: 46.7785, mae: 3.5977, rmse: 6.8395


 10%|█         | 10/100 [05:22<49:41, 33.13s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


9/100: mse: 9.3114, mae: 2.8013, rmse: 3.0515


 11%|█         | 11/100 [05:59<50:36, 34.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


10/100: mse: 5.2583, mae: 1.2802, rmse: 2.2931


 12%|█▏        | 12/100 [06:29<48:17, 32.93s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  197.15605362249278


 13%|█▎        | 13/100 [07:01<47:24, 32.69s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


12/100: mse: 90.9549, mae: 9.0936, rmse: 9.5370


 14%|█▍        | 14/100 [07:27<43:58, 30.69s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


13/100: mse: 8.1351, mae: 0.8774, rmse: 2.8522


 15%|█▌        | 15/100 [07:55<42:18, 29.87s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


14/100: mse: 1.5456, mae: 1.0272, rmse: 1.2432


 16%|█▌        | 16/100 [08:31<44:25, 31.73s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


15/100: mse: 4.1732, mae: 1.8485, rmse: 2.0428


 17%|█▋        | 17/100 [09:07<45:39, 33.00s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


16/100: mse: 9.3846, mae: 2.7018, rmse: 3.0634


 18%|█▊        | 18/100 [09:37<44:02, 32.22s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


17/100: mse: 39.3548, mae: 5.1435, rmse: 6.2733


 19%|█▉        | 19/100 [10:09<43:23, 32.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  22479.359834747032


 20%|██        | 20/100 [10:43<43:36, 32.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


19/100: mse: 40.2887, mae: 5.3029, rmse: 6.3473


 21%|██        | 21/100 [11:16<43:07, 32.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


20/100: mse: 3.4429, mae: 1.5924, rmse: 1.8555


 22%|██▏       | 22/100 [11:47<41:59, 32.30s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


21/100: mse: 3.8622, mae: 1.5520, rmse: 1.9653


 23%|██▎       | 23/100 [12:27<44:17, 34.51s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


22/100: mse: 3.0686, mae: 1.4812, rmse: 1.7518


 24%|██▍       | 24/100 [12:53<40:23, 31.89s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


23/100: mse: 7.9168, mae: 2.3635, rmse: 2.8137


 25%|██▌       | 25/100 [13:27<40:35, 32.47s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  7256.109980469615


 26%|██▌       | 26/100 [14:02<41:04, 33.30s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  172.80279551090908


 27%|██▋       | 27/100 [14:36<40:38, 33.40s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


26/100: mse: 22.3142, mae: 3.8818, rmse: 4.7238


 28%|██▊       | 28/100 [15:09<39:59, 33.32s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  188.14753514888963


 29%|██▉       | 29/100 [15:43<39:40, 33.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  164.58103531704472


 30%|███       | 30/100 [16:14<38:16, 32.80s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


29/100: mse: 31.2723, mae: 3.4225, rmse: 5.5922


 31%|███       | 31/100 [16:50<38:50, 33.77s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  187.04616668067226


 32%|███▏      | 32/100 [17:16<35:52, 31.65s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  517.4252362351515


 33%|███▎      | 33/100 [17:45<34:25, 30.83s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3758.457752621641


 34%|███▍      | 34/100 [18:19<34:40, 31.52s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


33/100: mse: 72.9103, mae: 3.3091, rmse: 8.5388


 35%|███▌      | 35/100 [18:53<34:56, 32.25s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3816.9415915266613


 36%|███▌      | 36/100 [19:20<32:45, 30.70s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


35/100: mse: 0.0951, mae: 0.2647, rmse: 0.3083


 37%|███▋      | 37/100 [20:05<36:58, 35.22s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


36/100: mse: 2.6228, mae: 1.5130, rmse: 1.6195


 38%|███▊      | 38/100 [20:30<33:08, 32.08s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1957.7005196969694


 39%|███▉      | 39/100 [21:04<33:04, 32.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


38/100: mse: 50.4256, mae: 2.9369, rmse: 7.1011


 40%|████      | 40/100 [21:38<32:58, 32.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


39/100: mse: 6.2227, mae: 2.3655, rmse: 2.4945


 41%|████      | 41/100 [22:03<30:13, 30.73s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1472.9647834698865


 42%|████▏     | 42/100 [22:36<30:13, 31.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1515.7005251045728


 43%|████▎     | 43/100 [23:00<27:38, 29.10s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  309.93225860128854


 44%|████▍     | 44/100 [23:12<22:25, 24.02s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


43/100: mse: 89.1060, mae: 9.3092, rmse: 9.4396


 45%|████▌     | 45/100 [23:48<25:21, 27.66s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


44/100: mse: 8.3656, mae: 2.6435, rmse: 2.8923


 46%|████▌     | 46/100 [24:22<26:40, 29.64s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  441.33449201329057


 47%|████▋     | 47/100 [24:49<25:19, 28.68s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  9989.734688884088


 48%|████▊     | 48/100 [25:26<27:03, 31.23s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


47/100: mse: 6.4404, mae: 2.3879, rmse: 2.5378


 49%|████▉     | 49/100 [26:01<27:25, 32.26s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


48/100: mse: 30.2643, mae: 2.2075, rmse: 5.5013


 50%|█████     | 50/100 [26:23<24:17, 29.15s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


49/100: mse: 16.3477, mae: 3.2261, rmse: 4.0432


 51%|█████     | 51/100 [26:58<25:27, 31.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


50/100: mse: 59.8194, mae: 3.6894, rmse: 7.7343


 52%|█████▏    | 52/100 [27:36<26:25, 33.03s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


51/100: mse: 0.0337, mae: 0.1465, rmse: 0.1837


 53%|█████▎    | 53/100 [28:12<26:34, 33.92s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2300.0141545961346


 54%|█████▍    | 54/100 [28:24<20:54, 27.28s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


53/100: mse: 8.6349, mae: 2.8937, rmse: 2.9385


 55%|█████▌    | 55/100 [28:50<20:21, 27.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  227.36920663900602


 56%|█████▌    | 56/100 [29:21<20:44, 28.29s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  616.4535876820884


 57%|█████▋    | 57/100 [29:58<22:05, 30.84s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


56/100: mse: 51.6161, mae: 3.5094, rmse: 7.1844


 58%|█████▊    | 58/100 [30:32<22:16, 31.81s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1132.731405458363


 59%|█████▉    | 59/100 [31:08<22:38, 33.15s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


58/100: mse: 0.7345, mae: 0.7787, rmse: 0.8570


 60%|██████    | 60/100 [31:47<23:12, 34.82s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


59/100: mse: 41.7700, mae: 6.4504, rmse: 6.4630


 61%|██████    | 61/100 [32:22<22:33, 34.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


60/100: mse: 21.4395, mae: 3.8904, rmse: 4.6303


 62%|██████▏   | 62/100 [32:54<21:31, 33.98s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


61/100: mse: 5.7162, mae: 2.1545, rmse: 2.3909


 63%|██████▎   | 63/100 [33:34<22:07, 35.89s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


62/100: mse: 0.1650, mae: 0.4036, rmse: 0.4062


 64%|██████▍   | 64/100 [34:10<21:34, 35.95s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 63 due to error: object of type 'NoneType' has no len()


 65%|██████▌   | 65/100 [34:41<20:05, 34.45s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


64/100: mse: 2.5106, mae: 1.2651, rmse: 1.5845


 66%|██████▌   | 66/100 [35:15<19:22, 34.20s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  7041.680117304395


 67%|██████▋   | 67/100 [35:49<18:47, 34.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


66/100: mse: 11.2316, mae: 3.0837, rmse: 3.3514


 68%|██████▊   | 68/100 [36:19<17:37, 33.04s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


67/100: mse: 36.9718, mae: 3.1043, rmse: 6.0804


 69%|██████▉   | 69/100 [36:46<16:01, 31.00s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Skipping 68 due to error: object of type 'NoneType' has no len()


 70%|███████   | 70/100 [37:20<15:56, 31.89s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


69/100: mse: 55.1274, mae: 7.3072, rmse: 7.4248


 71%|███████   | 71/100 [37:50<15:09, 31.35s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


70/100: mse: 4.1037, mae: 1.7101, rmse: 2.0258


 72%|███████▏  | 72/100 [38:17<14:05, 30.21s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


71/100: mse: 21.2292, mae: 4.5680, rmse: 4.6075


 73%|███████▎  | 73/100 [38:49<13:44, 30.53s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  530.1788382917922


 74%|███████▍  | 74/100 [39:19<13:13, 30.52s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


73/100: mse: 36.8317, mae: 5.7296, rmse: 6.0689


 75%|███████▌  | 75/100 [39:52<12:57, 31.10s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


74/100: mse: 32.7677, mae: 5.5852, rmse: 5.7243


 76%|███████▌  | 76/100 [40:31<13:28, 33.68s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  142.08894413131281


 77%|███████▋  | 77/100 [41:01<12:30, 32.65s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  394.8768473800725


 78%|███████▊  | 78/100 [41:35<12:04, 32.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  2382.8822828687084


 79%|███████▉  | 79/100 [42:09<11:40, 33.36s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  3857.781577147652


 80%|████████  | 80/100 [42:39<10:43, 32.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  22297.364297731157


 81%|████████  | 81/100 [43:09<09:58, 31.52s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  14070.155232165482


 82%|████████▏ | 82/100 [43:43<09:39, 32.22s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  347.5001875446154


 83%|████████▎ | 83/100 [43:57<07:34, 26.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


82/100: mse: 39.7973, mae: 3.7402, rmse: 6.3085


 84%|████████▍ | 84/100 [44:29<07:33, 28.32s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


83/100: mse: 82.5262, mae: 3.4887, rmse: 9.0844


 85%|████████▌ | 85/100 [45:03<07:32, 30.19s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  814.8793695412134


 86%|████████▌ | 86/100 [45:38<07:21, 31.50s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1990.6978358676297


 87%|████████▋ | 87/100 [46:12<07:01, 32.42s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


86/100: mse: 2.9597, mae: 1.6059, rmse: 1.7204


 88%|████████▊ | 88/100 [46:28<05:29, 27.50s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  162.57694877446926


 89%|████████▉ | 89/100 [47:05<05:32, 30.21s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


88/100: mse: 51.0191, mae: 6.8553, rmse: 7.1428


 90%|█████████ | 90/100 [47:41<05:20, 32.04s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  1073.6665959243728


 91%|█████████ | 91/100 [47:53<03:54, 26.04s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


90/100: mse: 10.8069, mae: 2.8656, rmse: 3.2874


 92%|█████████▏| 92/100 [48:04<02:52, 21.58s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


91/100: mse: 0.8414, mae: 0.6991, rmse: 0.9173


 93%|█████████▎| 93/100 [48:40<03:00, 25.82s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


92/100: mse: 0.7620, mae: 0.8683, rmse: 0.8729


 94%|█████████▍| 94/100 [49:14<02:49, 28.27s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


93/100: mse: 2.0777, mae: 1.4342, rmse: 1.4414


 95%|█████████▌| 95/100 [49:50<02:33, 30.67s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  279.7078887483428


 96%|█████████▌| 96/100 [50:33<02:16, 34.13s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


95/100: mse: 35.8003, mae: 5.4427, rmse: 5.9833


 97%|█████████▋| 97/100 [51:09<01:44, 34.94s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


train-00000-of-00001.parquet failed mse  169.7513770421698


 98%|█████████▊| 98/100 [51:38<01:05, 32.95s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


97/100: mse: 12.9530, mae: 2.8181, rmse: 3.5990


 99%|█████████▉| 99/100 [52:09<00:32, 32.33s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


98/100: mse: 5.3204, mae: 1.9373, rmse: 2.3066


100%|██████████| 100/100 [52:38<00:00, 31.59s/it]

train-00000-of-00001.parquet failed mse  7915.408537178479
Processing complete. Results saved to MTBench-Test/llama/tsforecast_long_combined/final_results.json



