In [1]:
import sys
sys.path.insert(0, r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")

from utils import convert_to_json
from metric.evaluator import get_evaluator

task = 'fact'

evaluator = get_evaluator(task)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from prettytable import PrettyTable

def convert_to_json(output_list, src_list=None, ref_list=None, context_list=None, \
            scores=None, doc_id=None, system_id=None):
    """
        Convert the data into the json format.

        output_list: a list of model output
        src_list: source input for different NLG tasks. For example, source document for summarization
                  and dialogue history for dialogue response generation
        ref_list: human-annotated groundtruth
        context_list: the context needed to evaluate several specific dimension. For example,
                      additional factual information when evaluating engagingness and groundedness in dialogues
        scores: human scores for evaluating the model output. They can be used to calculate the correlation
                between evaluators and human judgements. The scores should be stored in a dictionary. For example,
                {'fluency': 2.0, 'coherence': 3.0} could be the human score for a sample.
        doc_id: the index of the input source. It can be used to calculate summary-level correlation for summarzation
        system_id: the index of the generation system. It can be used to calculate system-level correlation.
    """
    json_data = []
    for i in range(len(output_list)):
        cur = {}
        cur['system_output'] = output_list[i]
        if src_list is not None:
            cur['source'] = src_list[i]
        if ref_list is not None:
            cur['reference'] = ref_list[i]
        if context_list is not None:
            cur['context'] = context_list[i]
        if scores is not None:
            cur['scores'] = scores[i]
        if doc_id is not None:
            cur['doc_id'] = doc_id[i]
        if system_id is not None:
            cur['system_id'] = system_id[i]
        json_data.append(cur)
    return json_data


def add_question(dimension, output, src=None, ref=None, context=None, task=None):
    """
        Add questions to generate input in Bool-QA format for UniEval.

        dimension: specific dimension to be evaluated
        src: source input for different NLG tasks. For example, source document for summarization
             and dialogue history for dialogue response generation.
        output: output text generated by the models
        ref: human-annotataed groundtruth
        context: the context needed to evaluate several specific dimension. For example,
                 additional factual information when evaluating engagingness and groundedness in dialogues.
    """

    input_with_question = []
    for i in range(len(output)):
        # For summarization
        if task == 'summarization':
            if dimension == 'fluency':
                cur_input = 'question: Is this a fluent paragraph? </s> paragraph: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent summary to the document? </s> summary: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'relevance':
                cur_input = 'question: Is this summary relevant to the reference? </s> summary: ' + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For dialogues
        elif task == 'dialogue':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a natural response in the dialogue? </s> response: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent response given the dialogue history? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i]
            elif dimension == 'engagingness':
                cur_input = 'question: Is this an engaging and informative response according to the dialogue history and fact? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i] + ' </s> fact: ' + context[i]
            elif dimension == 'groundedness':
                cur_input = 'question: Is this response consistent with knowledge in the fact? </s> response: '\
                            + output[i] + ' </s> fact: ' + context[i]
            elif dimension == 'understandability':
                cur_input = 'question: Is this an understandable response in the dialogue? </s> response: ' + output[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For data-to-text
        elif task == 'data2text':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a fluent utterance? </s> utterance: ' + output[i]
            elif dimension == 'informativeness':
                cur_input = 'question: Is this sentence informative according to the reference? </s> sentence: '\
                            + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For factual consistency detection
        elif task == 'fact':
            if dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            else:
                raise NotImplementedError('No other dimensions for the factual consistency detection task.')
        # For new customized tasks
        else:
            raise NotImplementedError('Other tasks are not implemented, please customize specific tasks here.')
        input_with_question.append(cur_input)
    return input_with_question


def print_scores(scores):
    table = PrettyTable(['Dimensions','Score'])
    print('\nEvaluation scores are shown below:')
    dims = list(scores[0].keys())
    for dim in dims:
        cur_score = 0
        for i in range(len(scores)):
            cur_score += scores[i][dim]
        table.add_row([dim, round(cur_score / len(scores), 6)])
    print(table)

In [3]:
import numpy as np
from nltk import sent_tokenize
from scorer import UniEvaluator  # Make sure this import works after placing scorer.py in the same directory

def evaluate(data, dims=None, overall=True, print_result=False, model_name_or_path="t5-small", task='summarization', device='cuda:0', individual=True):
    """
    Get the scores of all the given dimensions (fluency, consistency, coherence, relevance)

    data: A list of dictionaries, where each dictionary contains:
          - 'source': The original text
          - 'system_output': The generated system output (summary)
          - 'reference' (optional): Reference summary for relevance evaluation

    dims: A list of dimensions to be evaluated. If dims is None, it evaluates four default dimensions:
          coherence, consistency, fluency, relevance.

    overall: Boolean to indicate whether the overall score is calculated as the average of all dimensions.

    print_result: Boolean to print the results on the screen.

    model_name_or_path: The model name or path to use for evaluation, e.g., 't5-small'

    task: The task type (used in scoring if needed, like summarization or other NLP tasks).

    device: The device to use for evaluation ('cpu' or 'cuda:0').
    """

    # Instantiate the scorer
    scorer = UniEvaluator(model_name_or_path=model_name_or_path, device=device)

    n_data = len(data)
    eval_scores = [{} for _ in range(n_data)]

    # Default dimensions if not provided
    if dims is None:
        dims = ['coherence', 'consistency', 'fluency', 'factual consistency']   #add relevance

    for dim in dims:
        print(f'Evaluating {dim} of {n_data} samples !!!')

        if dim == 'consistency' or dim == 'fluency':
            # Sentence-level scores for consistency and fluency
            src_list, output_list = [], []
            n_sents = []  # number of sentences in each summary

            for i in range(n_data):
                if dim == 'consistency':
                    source = data[i]['source']
                else:
                    source = ''
                system_outputs = sent_tokenize(data[i]['system_output'])
                n_sents.append(len(system_outputs))
                for j in range(len(system_outputs)):
                    src_list.append(source)
                    output_list.append(system_outputs[j])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, task=task)
            sent_score = scorer.score(input_list)

            # Calculate average sentence-level scores for each sample
            start_idx = 0
            score = []
            for cur_n_sent in n_sents:
                score.append(sum(sent_score[start_idx:start_idx + cur_n_sent]) / cur_n_sent)
                start_idx += cur_n_sent

        elif dim == 'coherence' or dim == 'relevance':
            # Summary-level scores for coherence and relevance
            src_list, output_list, ref_list = [], [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])
                if dim == 'relevance':
                    ref_list.append(data[i]['reference'])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, ref=ref_list, task=task)
            score = scorer.score(input_list)

        elif dim == 'factual consistency':
            output_list, src_list = [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])

            data = convert_to_json(output_list=output_list, src_list=src_list)
            eval_score = evaluator.evaluate(data)
            score = []

            for i in eval_score:
                temp = i['consistency']
                score.append(temp)

        else:
            raise NotImplementedError(f"The input format for the dimension '{dim}' is still undefined. Please customize it.")

        # Store the scores for the current dimension
        for i in range(n_data):
            eval_scores[i][dim] = score[i]

    # Calculate overall score (average of all evaluated dimensions)
    if overall:
        for i in range(n_data):
            eval_scores[i]['overall'] = np.mean([eval_scores[i][dim] for dim in dims])

    # Print the result if requested
    if print_result:
        print_scores(eval_scores)

    if individual:
        individual_scores = []
        for i in range(n_data):
            temp = [eval_scores[i][dim] for dim in dims]
            individual_scores.append(temp)

        return np.array(individual_scores)

    # Calculate average score across all the dimensions except 'overall'
    avg_score = []
    for i in range(n_data):
        # Exclude 'overall' from the averaging
        dimensions = [dim for dim in dims if dim != 'overall']
        avg_score.append(np.mean([eval_scores[i][dim] for dim in dimensions]))

    return avg_score

In [10]:
# Import all required libraries
import torch
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments
from trl import RewardTrainer, PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from datasets import Dataset
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, TaskType
import bitsandbytes as bnb
import numpy as np
from tqdm import tqdm
import time
import sys

# Add UniEval to path and import
sys.path.append(r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")
from utils import convert_to_json
from metric.evaluator import get_evaluator

# Configuration
DATA_PATH = "combined_clinical_notes.csv"
# MODEL_PATH = r"D:\kshitij-weights-folder\qwen-aloe-9-4-base-fine-tune"
MODEL_PATH = "gpt2" 
# PEFT_ADAPTER_PATH = r"D:\kshitij-weights-folder\qwen-aloe-9-4-base-fine-tune-peft-adapaters"
MEDICAL_PROMPT = "\nGenerate a concise medical summary focusing on key findings and treatment plans:"

# Load and prepare data
df = pd.read_csv(DATA_PATH)
train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)
eval_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
dataset = Dataset.from_pandas(eval_df.rename(columns={"dialogue": "review"}))

# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

# Dataset preprocessing
def preprocess_function(examples):
    return {
        "input_ids": tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512),
        "query": tokenizer.decode(tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512), skip_special_tokens=True)
    }

dataset = dataset.map(preprocess_function, batched=False)
dataset.set_format("pytorch")

# Model configuration
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto"
)
base_model = prepare_model_for_kbit_training(base_model)

# PEFT/LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=["c_attn", "c_proj"],
)

# model_with_lora = get_peft_model(base_model, lora_config)
model = AutoModelForCausalLMWithValueHead.from_pretrained(base_model, peft_config=lora_config).to("cuda")

# Reference model
ref_model = create_reference_model(model).to("cuda")
ref_model.eval()
for param in ref_model.parameters():
    param.requires_grad = False

# PPO Configuration
ppo_config = PPOConfig(
    model_name=MODEL_PATH,
    ppo_epochs=1,
    gradient_accumulation_steps=2,
    steps=5,
    batch_size=2,
    mini_batch_size=1,
    learning_rate=2e-5,
    log_with='tensorboard',
    project_kwargs={"logging_dir": r"D:\kshitij-weights-folder\gpt2-rl-logs"}
)

# Initialize PPO Trainer
ppo_trainer = PPOTrainer(
    config=ppo_config,
    model=model,
    ref_model=ref_model,
    tokenizer=tokenizer,
    dataset=dataset,
    optimizer=bnb.optim.Adam8bit(model.parameters(), lr=ppo_config.learning_rate)
)

# Evaluation setup
# sum_eval = get_evaluator("summarization", "cuda"="cuda" if torch.cuda.is_available() else "cpu")

def get_score(game_data):
    weights = np.array([0.1, 0.2, 0.3, 0.4])  # coherence, consistency, fluency, factual consistency
    sample_data = [{"source": q, "system_output": r} for q, r in zip(game_data["query"], game_data["response"])]
    
    # scores = sum_eval.evaluate(sample_data, overall=False)
    scores = evaluate(sample_data, overall=False)
    weighted_scores = []
    
    for dimension_scores in scores:
        adjusted = np.where(
            dimension_scores < 0.5,
            -dimension_scores * weights,
            dimension_scores * weights
        )
        weighted_scores.append(torch.tensor(np.sum(adjusted)/4, dtype=torch.float32).to(model.pretrained_model.device))
    
    return weighted_scores

max_position_embeddings = model.pretrained_model.config.max_position_embeddings 
# Training loop
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    # "max_new_tokens": 64,  # Increased for better summary generation
    "eos_token_id": -1,
    "max_length": max_position_embeddings,
    "max_new_tokens": 64
}

for epoch in range(1):
    for batch in tqdm(ppo_trainer.dataloader):
        (logs, game_data,) = (
            dict(),
            dict(),
        )

        # task_list = choices(ctrl_str, k=config.batch_size)
        # game_data["query"] = [t + q for t, q in zip(task_list, batch["query"])]
        game_data["query"] = [q for q in batch["query"]]
        # query_tensors = [torch.cat((ctrl_tokens[t], input_ids)) for t, input_ids in zip(task_list, batch["input_ids"])]
        query_tensors = [input_ids for input_ids in batch["input_ids"]]
        
        response_tensors = []
        for query in query_tensors:
            original_notes = tokenizer.decode(query)
            
            # Combine with medical prompt only during generation
            full_prompt = f"{MEDICAL_PROMPT}{original_notes}"
            full_prompt_tensor = tokenizer.encode(full_prompt, return_tensors="pt").to("cuda").squeeze(0)
            
            response = ppo_trainer.generate(
                full_prompt_tensor,
                **generation_kwargs
            )
            # Ensure response doesn't exceed max length
            response = response[:, :generation_kwargs["max_new_tokens"]]
            response_tensors.append(response.squeeze())
#         print(response_tensors)
        game_data["response"] = [tokenizer.decode(r) for r in response_tensors]

        print("check")

        texts = [q + r for q, r in zip(batch["query"], game_data["response"])]
        logits = get_score(game_data)
        rewards = logits
        # rewards = pos_logit_to_reward(logits, task_list)
        # rewards = [torch.tensor([1.0], device=query_tensors[0].device) for _ in range(len(texts))]

        #### Run PPO training
        t = time.time()
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)


Map: 100%|███████████████████████████████████████████████████████████| 93/93 [00:00<00:00, 141.35 examples/s]
  0%|                                                                                 | 0/46 [00:00<?, ?it/s]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.83it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.10it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.53it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.63it/s][A
  2%|█▌                                                                       | 1/46 [00:05<04:28,  5.97s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
  return fn(*args, **kwargs)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.61it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.57it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.93it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.00it/s][A
  4%|███▏                                                                     | 2/46 [00:11<04:00,  5.47s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.00it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.41it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.69it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.91it/s][A
  7%|████▊                                                                    | 3/46 [00:16<03:49,  5.34s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.97it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 44.68it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.92it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.70it/s][A
  9%|██████▎                                                                  | 4/46 [00:21<03:45,  5.38s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.63it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.23it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 62.94it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.35it/s][A
 11%|███████▉                                                                 | 5/46 [00:27<03:42,  5.44s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.25it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 37.27it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.96it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.11it/s][A
 13%|█████████▌                                                               | 6/46 [00:32<03:41,  5.53s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.46it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.87it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.52it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.87it/s][A
 15%|███████████                                                              | 7/46 [00:38<03:30,  5.39s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.24it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.73it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.15it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.84it/s][A
 17%|████████████▋                                                            | 8/46 [00:43<03:24,  5.37s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.63it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.70it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.50it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.88it/s][A
 20%|██████████████▎                                                          | 9/46 [00:48<03:15,  5.29s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.05it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.31it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.55it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.90it/s][A
 22%|███████████████▋                                                        | 10/46 [00:53<03:08,  5.24s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.48it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 34.37it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.65it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.62it/s][A
 24%|█████████████████▏                                                      | 11/46 [00:58<03:03,  5.24s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.93it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 50.58it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 77.98it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.42it/s][A
 26%|██████████████████▊                                                     | 12/46 [01:04<02:59,  5.28s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.61it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 32.75it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 82.40it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.16it/s][A
 28%|████████████████████▎                                                   | 13/46 [01:09<02:54,  5.28s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.04it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 35.65it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 66.89it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.15it/s][A
 30%|█████████████████████▉                                                  | 14/46 [01:14<02:49,  5.28s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.90it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.13it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.44it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.90it/s][A
 33%|███████████████████████▍                                                | 15/46 [01:19<02:41,  5.22s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.68it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 38.12it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.14it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.98it/s][A
 35%|█████████████████████████                                               | 16/46 [01:25<02:41,  5.38s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.02it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.14it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.50it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.51it/s][A
 37%|██████████████████████████▌                                             | 17/46 [01:30<02:33,  5.31s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.83it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 35.70it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.09it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.76it/s][A
 39%|████████████████████████████▏                                           | 18/46 [01:36<02:30,  5.37s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.86it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.81it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.07it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.61it/s][A
 41%|█████████████████████████████▋                                          | 19/46 [01:41<02:25,  5.37s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.16it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.70it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 71.21it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.52it/s][A
 43%|███████████████████████████████▎                                        | 20/46 [01:46<02:17,  5.29s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 66.01it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.56it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 71.15it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.77it/s][A
 46%|████████████████████████████████▊                                       | 21/46 [01:52<02:12,  5.29s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.44it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.92it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.13it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.49it/s][A
 48%|██████████████████████████████████▍                                     | 22/46 [01:57<02:09,  5.39s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.19it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.54it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 77.06it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.80it/s][A
 50%|████████████████████████████████████                                    | 23/46 [02:02<02:01,  5.30s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 45.26it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 34.51it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 66.89it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.55it/s][A
 52%|█████████████████████████████████████▌                                  | 24/46 [02:08<01:56,  5.31s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.86it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 35.56it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.06it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.66it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.49it/s][A
 54%|███████████████████████████████████████▏                                | 25/46 [02:13<01:54,  5.45s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.27it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.61it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 71.29it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.44it/s][A
 57%|████████████████████████████████████████▋                               | 26/46 [02:18<01:46,  5.35s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.52it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.08it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 77.79it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.42it/s][A
 59%|██████████████████████████████████████████▎                             | 27/46 [02:24<01:42,  5.37s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.34it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.89it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 82.47it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.61it/s][A
 61%|███████████████████████████████████████████▊                            | 28/46 [02:29<01:35,  5.30s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.84it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.77it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 71.09it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.73it/s][A
 63%|█████████████████████████████████████████████▍                          | 29/46 [02:34<01:29,  5.29s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.53it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 40.10it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.76it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.26it/s][A
 65%|██████████████████████████████████████████████▉                         | 30/46 [02:40<01:28,  5.51s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.36it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.21it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 81.45it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.52it/s][A
 67%|████████████████████████████████████████████████▌                       | 31/46 [02:46<01:22,  5.47s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 19.02it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 10.95it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 46.70it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.40it/s][A
 70%|██████████████████████████████████████████████████                      | 32/46 [02:53<01:22,  5.87s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.48it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.87it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.22it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.54it/s][A
 72%|███████████████████████████████████████████████████▋                    | 33/46 [02:58<01:15,  5.77s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.41it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.33it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.01it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.23it/s][A
 74%|█████████████████████████████████████████████████████▏                  | 34/46 [03:03<01:07,  5.62s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.19it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.21it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.56it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.02it/s][A
 76%|██████████████████████████████████████████████████████▊                 | 35/46 [03:09<01:00,  5.53s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.24it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 50.14it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.52it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.56it/s][A
 78%|████████████████████████████████████████████████████████▎               | 36/46 [03:14<00:54,  5.43s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 62.46it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.34it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 79.11it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.76it/s][A
 80%|█████████████████████████████████████████████████████████▉              | 37/46 [03:19<00:48,  5.40s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.68it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.29it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.88it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.73it/s][A
 83%|███████████████████████████████████████████████████████████▍            | 38/46 [03:25<00:43,  5.48s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.08it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.04it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.08it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.74it/s][A
 85%|█████████████████████████████████████████████████████████████           | 39/46 [03:30<00:37,  5.36s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.62it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.28it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.46it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.84it/s][A
 87%|██████████████████████████████████████████████████████████████▌         | 40/46 [03:35<00:31,  5.30s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.84it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 50.25it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.21it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.69it/s][A
 89%|████████████████████████████████████████████████████████████████▏       | 41/46 [03:40<00:26,  5.25s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.79it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.52it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 76.26it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.81it/s][A
 91%|█████████████████████████████████████████████████████████████████▋      | 42/46 [03:45<00:21,  5.25s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.66it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.26it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 82.18it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.14it/s][A
 93%|███████████████████████████████████████████████████████████████████▎    | 43/46 [03:51<00:15,  5.29s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.60it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.72it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.65it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s][A
 96%|████████████████████████████████████████████████████████████████████▊   | 44/46 [03:56<00:10,  5.32s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.56it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.16it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 80.93it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.75it/s][A
 98%|██████████████████████████████████████████████████████████████████████▍ | 45/46 [04:01<00:05,  5.28s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


check
Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 17.88it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.04it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.63it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.86it/s][A
100%|████████████████████████████████████████████████████████████████████████| 46/46 [04:09<00:00,  5.41s/it]
