In [1]:
import sys
sys.path.insert(0, r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")

from utils import convert_to_json
from metric.evaluator import get_evaluator

task = 'fact'

evaluator = get_evaluator(task)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from prettytable import PrettyTable

def convert_to_json(output_list, src_list=None, ref_list=None, context_list=None, \
            scores=None, doc_id=None, system_id=None):
    """
        Convert the data into the json format.

        output_list: a list of model output
        src_list: source input for different NLG tasks. For example, source document for summarization
                  and dialogue history for dialogue response generation
        ref_list: human-annotated groundtruth
        context_list: the context needed to evaluate several specific dimension. For example,
                      additional factual information when evaluating engagingness and groundedness in dialogues
        scores: human scores for evaluating the model output. They can be used to calculate the correlation
                between evaluators and human judgements. The scores should be stored in a dictionary. For example,
                {'fluency': 2.0, 'coherence': 3.0} could be the human score for a sample.
        doc_id: the index of the input source. It can be used to calculate summary-level correlation for summarzation
        system_id: the index of the generation system. It can be used to calculate system-level correlation.
    """
    json_data = []
    for i in range(len(output_list)):
        cur = {}
        cur['system_output'] = output_list[i]
        if src_list is not None:
            cur['source'] = src_list[i]
        if ref_list is not None:
            cur['reference'] = ref_list[i]
        if context_list is not None:
            cur['context'] = context_list[i]
        if scores is not None:
            cur['scores'] = scores[i]
        if doc_id is not None:
            cur['doc_id'] = doc_id[i]
        if system_id is not None:
            cur['system_id'] = system_id[i]
        json_data.append(cur)
    return json_data


def add_question(dimension, output, src=None, ref=None, context=None, task=None):
    """
        Add questions to generate input in Bool-QA format for UniEval.

        dimension: specific dimension to be evaluated
        src: source input for different NLG tasks. For example, source document for summarization
             and dialogue history for dialogue response generation.
        output: output text generated by the models
        ref: human-annotataed groundtruth
        context: the context needed to evaluate several specific dimension. For example,
                 additional factual information when evaluating engagingness and groundedness in dialogues.
    """

    input_with_question = []
    for i in range(len(output)):
        # For summarization
        if task == 'summarization':
            if dimension == 'fluency':
                cur_input = 'question: Is this a fluent paragraph? </s> paragraph: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent summary to the document? </s> summary: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'relevance':
                cur_input = 'question: Is this summary relevant to the reference? </s> summary: ' + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For dialogues
        elif task == 'dialogue':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a natural response in the dialogue? </s> response: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent response given the dialogue history? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i]
            elif dimension == 'engagingness':
                cur_input = 'question: Is this an engaging and informative response according to the dialogue history and fact? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i] + ' </s> fact: ' + context[i]
            elif dimension == 'groundedness':
                cur_input = 'question: Is this response consistent with knowledge in the fact? </s> response: '\
                            + output[i] + ' </s> fact: ' + context[i]
            elif dimension == 'understandability':
                cur_input = 'question: Is this an understandable response in the dialogue? </s> response: ' + output[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For data-to-text
        elif task == 'data2text':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a fluent utterance? </s> utterance: ' + output[i]
            elif dimension == 'informativeness':
                cur_input = 'question: Is this sentence informative according to the reference? </s> sentence: '\
                            + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For factual consistency detection
        elif task == 'fact':
            if dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            else:
                raise NotImplementedError('No other dimensions for the factual consistency detection task.')
        # For new customized tasks
        else:
            raise NotImplementedError('Other tasks are not implemented, please customize specific tasks here.')
        input_with_question.append(cur_input)
    return input_with_question


def print_scores(scores):
    table = PrettyTable(['Dimensions','Score'])
    print('\nEvaluation scores are shown below:')
    dims = list(scores[0].keys())
    for dim in dims:
        cur_score = 0
        for i in range(len(scores)):
            cur_score += scores[i][dim]
        table.add_row([dim, round(cur_score / len(scores), 6)])
    print(table)

In [3]:
import numpy as np
from nltk import sent_tokenize
from scorer import UniEvaluator  # Make sure this import works after placing scorer.py in the same directory

def evaluate(data, dims=None, overall=True, print_result=False, model_name_or_path="t5-small", task='summarization', device='cuda:0', individual=True):
    """
    Get the scores of all the given dimensions (fluency, consistency, coherence, relevance)

    data: A list of dictionaries, where each dictionary contains:
          - 'source': The original text
          - 'system_output': The generated system output (summary)
          - 'reference' (optional): Reference summary for relevance evaluation

    dims: A list of dimensions to be evaluated. If dims is None, it evaluates four default dimensions:
          coherence, consistency, fluency, relevance.

    overall: Boolean to indicate whether the overall score is calculated as the average of all dimensions.

    print_result: Boolean to print the results on the screen.

    model_name_or_path: The model name or path to use for evaluation, e.g., 't5-small'

    task: The task type (used in scoring if needed, like summarization or other NLP tasks).

    device: The device to use for evaluation ('cpu' or 'cuda:0').
    """

    # Instantiate the scorer
    scorer = UniEvaluator(model_name_or_path=model_name_or_path, device=device)

    n_data = len(data)
    eval_scores = [{} for _ in range(n_data)]

    # Default dimensions if not provided
    if dims is None:
        dims = ['coherence', 'consistency', 'fluency', 'factual consistency']   #add relevance

    for dim in dims:
        print(f'Evaluating {dim} of {n_data} samples !!!')

        if dim == 'consistency' or dim == 'fluency':
            # Sentence-level scores for consistency and fluency
            src_list, output_list = [], []
            n_sents = []  # number of sentences in each summary

            for i in range(n_data):
                if dim == 'consistency':
                    source = data[i]['source']
                else:
                    source = ''
                system_outputs = sent_tokenize(data[i]['system_output'])
                n_sents.append(len(system_outputs))
                for j in range(len(system_outputs)):
                    src_list.append(source)
                    output_list.append(system_outputs[j])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, task=task)
            sent_score = scorer.score(input_list)

            # Calculate average sentence-level scores for each sample
            start_idx = 0
            score = []
            for cur_n_sent in n_sents:
                score.append(sum(sent_score[start_idx:start_idx + cur_n_sent]) / cur_n_sent)
                start_idx += cur_n_sent

        elif dim == 'coherence' or dim == 'relevance':
            # Summary-level scores for coherence and relevance
            src_list, output_list, ref_list = [], [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])
                if dim == 'relevance':
                    ref_list.append(data[i]['reference'])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, ref=ref_list, task=task)
            score = scorer.score(input_list)

        elif dim == 'factual consistency':
            output_list, src_list = [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])

            data = convert_to_json(output_list=output_list, src_list=src_list)
            eval_score = evaluator.evaluate(data)
            score = []

            for i in eval_score:
                temp = i['consistency']
                score.append(temp)

        else:
            raise NotImplementedError(f"The input format for the dimension '{dim}' is still undefined. Please customize it.")

        # Store the scores for the current dimension
        for i in range(n_data):
            eval_scores[i][dim] = score[i]

    # Calculate overall score (average of all evaluated dimensions)
    if overall:
        for i in range(n_data):
            eval_scores[i]['overall'] = np.mean([eval_scores[i][dim] for dim in dims])

    # Print the result if requested
    if print_result:
        print_scores(eval_scores)

    if individual:
        individual_scores = []
        for i in range(n_data):
            temp = [eval_scores[i][dim] for dim in dims]
            individual_scores.append(temp)

        return np.array(individual_scores)

    # Calculate average score across all the dimensions except 'overall'
    avg_score = []
    for i in range(n_data):
        # Exclude 'overall' from the averaging
        dimensions = [dim for dim in dims if dim != 'overall']
        avg_score.append(np.mean([eval_scores[i][dim] for dim in dimensions]))

    return avg_score

In [4]:
# Import all required libraries
import torch
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments
from trl import RewardTrainer, PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from datasets import Dataset
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, TaskType
import bitsandbytes as bnb
import numpy as np
from tqdm import tqdm
import time
import sys

# Add UniEval to path and import
sys.path.append(r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")
from utils import convert_to_json
from metric.evaluator import get_evaluator

# Configuration
DATA_PATH = "combined_clinical_notes.csv"
# MODEL_PATH = r"D:\kshitij-weights-folder\qwen-aloe-9-4-base-fine-tune"
MODEL_PATH = "gpt2" 
# PEFT_ADAPTER_PATH = r"D:\kshitij-weights-folder\qwen-aloe-9-4-base-fine-tune-peft-adapaters"
MEDICAL_PROMPT = "\nGenerate a concise medical summary focusing on key findings and treatment plans:"

# Load and prepare data
df = pd.read_csv(DATA_PATH)
train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)
eval_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
dataset = Dataset.from_pandas(eval_df.rename(columns={"dialogue": "review"}))

# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

# Dataset preprocessing
def preprocess_function(examples):
    return {
        "input_ids": tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512),
        "query": tokenizer.decode(tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512), skip_special_tokens=True)
    }

dataset = dataset.map(preprocess_function, batched=False)
dataset.set_format("pytorch")

# Model configuration
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto"
)
base_model = prepare_model_for_kbit_training(base_model)

# PEFT/LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=["c_attn", "c_proj"],
)

# model_with_lora = get_peft_model(base_model, lora_config)
model = AutoModelForCausalLMWithValueHead.from_pretrained(base_model, peft_config=lora_config).to("cuda")

# Reference model
ref_model = create_reference_model(model).to("cuda")
ref_model.eval()
for param in ref_model.parameters():
    param.requires_grad = False

NUM_CANDIDATES = 2

# PPO Configuration
ppo_config = PPOConfig(
    model_name=MODEL_PATH,
    ppo_epochs=1,
    gradient_accumulation_steps=1,
    steps=5,
    batch_size=1*NUM_CANDIDATES,
    mini_batch_size=1*NUM_CANDIDATES,
    learning_rate=2e-5,
    log_with=None,
    # project_kwargs={"logging_dir": r"D:\kshitij-weights-folder\gpt2-rl-logs"}
)

# optimizer = torch.optim.AdamW(
#     filter(lambda p: p.requires_grad, ppo_model.parameters()),
#     lr=2e-5,
#     eps=1e-5,  # Slightly larger epsilon for stability
# )

# Initialize PPO Trainer
ppo_trainer = PPOTrainer(
    config=ppo_config,
    model=model,
    ref_model=ref_model,
    tokenizer=tokenizer,
    dataset=dataset,
    optimizer=bnb.optim.Adam8bit(model.parameters(), lr=ppo_config.learning_rate)
)

# Evaluation setup
# sum_eval = get_evaluator("summarization", "cuda"="cuda" if torch.cuda.is_available() else "cpu")

def get_score(src, res):
    # weights = np.array([0.1, 0.2, 0.3, 0.4])  # coherence, consistency, fluency, factual consistency
    # sample_data = [{"source": q, "system_output": r} for q, r in zip(game_data["query"], game_data["response"])]
    
    # scores = sum_eval.evaluate(sample_data, overall=False)
    # scores = evaluate(sample_data, overall=False)
    # weighted_scores = []
    
    # for dimension_scores in scores:
    #     adjusted = np.where(
    #         dimension_scores < 0.5,
    #         -dimension_scores * weights,
    #         dimension_scores * weights
    #     )
    #     weighted_scores.append(torch.tensor(np.sum(adjusted)/4, dtype=torch.float32).to(model.pretrained_model.device))
    
    # return weighted_scores

    data = convert_to_json(
        output_list=res,
        src_list=src,
    )
    # raw = sum_eval.evaluate(data, print_result=True)
    # dims = ['coherence', 'consistency', 'fluency', 'factual consistency']
    raw = evaluate(data, overall=False)
    score = [
        [d[0], d[1], d[2], d[3]]
        for d in raw
    ]
    scores = torch.tensor(score, dtype=torch.float32).numpy()  # CPU (B,4

    k = len(res)
    dom_counts = np.zeros(k)
    
    for i in range(k):
        for j in range(k):
            if i == j:
                continue
            # Check dominance: i dominates j if all scores are >= and at least one is >
            if np.all(scores[i] >= scores[j]) and np.any(scores[i] > scores[j]):
                dom_counts[i] += 1
    
    # Convert to [-1, 1] range reward
    max_dom = k - 1
    if max_dom > 0:
        rewards = 2 * (dom_counts / max_dom) - 1
    else:
        rewards = np.zeros(k)
    
    return rewards


max_position_embeddings = model.pretrained_model.config.max_position_embeddings 
# Training loop
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    # "max_new_tokens": 64,  # Increased for better summary generation
    "eos_token_id": -1,
    "max_length": max_position_embeddings,
    "max_new_tokens": 64
}
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Use consistent compute dtype
COMPUTE_DTYPE = torch.float32  # Using float32 to avoid dtype issues

for epoch in range(1):
    for batch_idx, batch in enumerate(tqdm(ppo_trainer.dataloader)):
        (logs, game_data,) = (
            dict(),
            dict(),
        )

        # task_list = choices(ctrl_str, k=config.batch_size)
        # game_data["query"] = [t + q for t, q in zip(task_list, batch["query"])]
        game_data["query"] = [q for q in batch["query"]]
        # query_tensors = [torch.cat((ctrl_tokens[t], input_ids)) for t, input_ids in zip(task_list, batch["input_ids"])]
        query_tensors = [input_ids for input_ids in batch["input_ids"]]
        all_outs = []
        
        response_tensors = []
        for query in query_tensors:
            for _ in range(NUM_CANDIDATES):
                original_notes = tokenizer.decode(query)
                
                # Combine with medical prompt only during generation
                full_prompt = f"{MEDICAL_PROMPT}{original_notes}" #TRy with full prompt here
                full_prompt_tensor = tokenizer.encode(full_prompt, return_tensors="pt").to("cuda").squeeze(0)
                
                with torch.no_grad():
                    response = ppo_trainer.generate(
                        full_prompt_tensor,
                        **generation_kwargs
                    )
                # Ensure response doesn't exceed max length
                response = response[:, :generation_kwargs["max_new_tokens"]]
                all_outs.append(response)
                response_tensors.append(response.squeeze())

            outs = torch.stack(all_outs, dim=1)
            B, K, _ = outs.shape
            
            # Decode outputs for evaluation
            hyps = []
            for b in range(B):
                hyps_b = []
                for k in range(K):
                    try:
                        text = tokenizer.decode(outs[b, k], skip_special_tokens=True)
                        hyps_b.append(text)
                    except Exception as e:
                        print(f"Error decoding text: {e}")
                        hyps_b.append("")  # Add empty string as fallback
                hyps.append(hyps_b)

            # rewards = []
            # for b in range(len(batch['input_ids'])):
            #     # Get scores for all candidates (K, 4)
            #     scores = get_score(
            #         batch['query'][b] * NUM_CANDIDATES,
            #         hyps[b]
            #     ).numpy()

            #     print("check")

            #     dom_counts = np.zeros(NUM_CANDIDATES)
            #     for i in range(NUM_CANDIDATES):
            #         for j in range(NUM_CANDIDATES):
            #             if i == j:
            #                 continue
            #             # Check if i dominates j
            #             if np.all(scores[i] >= scores[j]) and np.any(scores[i] > scores[j]):
            #                 dom_counts[i] += 1

            #     max_dom = NUM_CANDIDATES - 1
            #     scalar_rewards = 2 * (dom_counts / max_dom) - 1
            #     rewards.append(scalar_rewards)

            # flat_queries = []
            # flat_responses = []
            # flat_rewards = []

            # for b in range(len(batch['input_ids'])):
            #     for k in range(NUM_CANDIDATES):
            #         flat_queries.append(batch['input_ids'][b])
            #         flat_responses.append(outs[b, k])
            #         flat_rewards.append(torch.tensor([rewards[b][k]], device="cuda"))

            # stats = ppo_trainer.step(
            #     queries   = flat_queries,    # e.g. [ q0, q0 ]
            #     responses = flat_responses,  # e.g. [ r0, r1 ]
            #     scores    = flat_rewards     # e.g. [ s0, s1 ]
            # )

            flat_queries, flat_responses, flat_rewards = [], [], []
                    
            for b in range(B):
                try:
                    # Calculate rewards
                    rewards_b = get_score(
                        [batch['query'][b]] * K, 
                        hyps[b],
                    )
                    
                    # Flatten for PPO
                    for k in range(K):
                        flat_queries.append(batch['input_ids'][b])
                        flat_responses.append(outs[b, k])
                        flat_rewards.append(torch.tensor([rewards_b[k]], device=DEVICE, dtype=COMPUTE_DTYPE))
                        
                except Exception as e:
                    print(f"Error computing rewards: {e}")
                    continue
            
            # Safety check
            if len(flat_queries) != ppo_config.batch_size:
                print(f"Batch size mismatch: expected {ppo_config.batch_size}, got {len(flat_queries)}")
                continue

            try:
                # Verify shapes match
                print(f"Queries: {len(flat_queries)}, Responses: {len(flat_responses)}, Rewards: {len(flat_rewards)}")
                
                # Manual memory management
                # torch.cuda.empty_cache()
                
                # Do PPO step
                stats = ppo_trainer.step(
                    queries=flat_queries,
                    responses=flat_responses,
                    scores=flat_rewards
                )
                
                # Success! Log the output
                print(f"Batch {batch_idx} PPO step successful!")
                print(f"Sample output: {hyps[0][0][:100]}...")
                avg_reward = np.mean([r.item() for r in flat_rewards])
                print(f"Average reward: {avg_reward:.4f}")
                
            except RuntimeError as e:
                print(f"Error in PPO step: {e}")
                
                # If still running into CUDA errors, try moving to CPU
                if "CUDA" in str(e) and DEVICE != "cpu":
                    print("\nContinuing to encounter CUDA errors. Try two options:")
                    print("1. Change DEVICE = 'cpu' at the top of the script")
                    print("2. Or use the non-quantized model version\n")
                    
                # Clear memory and continue
                # if torch.cuda.is_available():
                #     torch.cuda.empty_cache()
            
            # if batch_idx % 10 == 0:
            #     print(f"Epoch {epoch+1}, Batch {batch_idx}")
            #     print(f"Sample output: {hyps[0][0][:100]}...")
            #     print(f"Average reward: {np.mean([r.item() for r in flat_rewards]):.4f}")

    print(f"✅ Epoch {epoch+1}/3 complete")
    
print("🎉 PPO fine-tuning done")
#         print(response_tensors)
        # game_data["response"] = [tokenizer.decode(r) for r in response_tensors]

        # print("check")

        # texts = [q + r for q, r in zip(batch["query"], game_data["response"])]
        # logits = get_score(game_data)
        # rewards = logits
        # rewards = pos_logit_to_reward(logits, task_list)
        # rewards = [torch.tensor([1.0], device=query_tensors[0].device) for _ in range(len(texts))]

        #### Run PPO training
        # t = time.time()
        # stats = ppo_trainer.step(query_tensors, response_tensors, rewards)


Map: 100%|███████████████████████████████████████████████████████████| 93/93 [00:00<00:00, 151.77 examples/s]
  0%|                                                                                 | 0/46 [00:00<?, ?it/s]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.92it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.71it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.42it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.57it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Batch 0 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey steven...
Average reward: -1.0000


  return fn(*args, **kwargs)
Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.28it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.35it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.04it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.40it/s][A
  2%|█▌                                                                       | 1/46 [00:09<07:05,  9.46s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.04it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.66it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.14it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.10it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 1 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] karen is a...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.73it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 42.56it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.23it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.42it/s][A
  4%|███▏                                                                     | 2/46 [00:19<07:11,  9.82s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.11it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.80it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 68.03it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.75it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 2 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] okay well ...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.83it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.73it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 68.09it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.34it/s][A
  7%|████▊                                                                    | 3/46 [00:29<07:11, 10.03s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 50.05it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.24it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.72it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.44it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 3 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey gregor...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.26it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 25.25it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.83it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s][A
  9%|██████▎                                                                  | 4/46 [00:40<07:10, 10.24s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.14it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.41it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.21it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.94it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 4 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] mister jac...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.15it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 23.96it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.90it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.24it/s][A
 11%|███████▉                                                                 | 5/46 [00:50<06:59, 10.22s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.75it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 31.24it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.99it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.70it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.08it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 5 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hi, joseph...
Average reward: 0.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 43.17it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 34.53it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 73.37it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/3 [00:00<?, ?it/s][A
 33%|████████████████████████▋                                                 | 1/3 [00:00<00:00,  2.69it/s][A
 67%|█████████████████████████████████████████████████▎                        | 2/3 [00:00<00:00,  2.62it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.42it/s][A
 13%|█████████▌                                                               | 6/46 [01:01<07:03, 10.59s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.45it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.05it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.81it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.97it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 6 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[patient] alright t...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.50it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 42.04it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.11it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.38it/s][A
 15%|███████████                                                              | 7/46 [01:11<06:43, 10.34s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.69it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.08it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.47it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.97it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 7 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey kyle s...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.98it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.25it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.95it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.41it/s][A
 17%|████████████▋                                                            | 8/46 [01:21<06:29, 10.25s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 20.17it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 21.56it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.08it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.57it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 8 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] alright ju...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.39it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.01it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.70it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.36it/s][A
 20%|██████████████▎                                                          | 9/46 [01:33<06:34, 10.67s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.58it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.17it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.64it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.80it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 9 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey diana ...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.14it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.07it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.69it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.32it/s][A
 22%|███████████████▋                                                        | 10/46 [01:43<06:17, 10.48s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.24it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.54it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.32it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.54it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 10 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hello mrs....
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.03it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 38.91it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 71.44it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.56it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.89it/s][A
 24%|█████████████████▏                                                      | 11/46 [01:53<06:06, 10.48s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.49it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.01it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.45it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.23it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 11 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] tyler nels...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 35.61it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.33it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.77it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.90it/s][A
 26%|██████████████████▊                                                     | 12/46 [02:03<05:51, 10.35s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.11it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 45.20it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 74.47it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.69it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 12 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] so beverly...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.45it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 31.92it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 75.64it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.42it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.21it/s][A
 28%|████████████████████▎                                                   | 13/46 [02:14<05:43, 10.41s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 52.43it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.69it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.37it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.39it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 13 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey kyle i...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.37it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 22.43it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.27it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.12it/s][A
 30%|█████████████████████▉                                                  | 14/46 [02:24<05:30, 10.34s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.55it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.38it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.48it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.11it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 14 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] alright so...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.16it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 44.22it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.70it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.63it/s][A
 33%|███████████████████████▍                                                | 15/46 [02:35<05:25, 10.51s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.29it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.10it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.30it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.04it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 15 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] amanda tay...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.95it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 32.19it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 70.71it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/3 [00:00<?, ?it/s][A
 33%|████████████████████████▋                                                 | 1/3 [00:00<00:00,  2.32it/s][A
 67%|█████████████████████████████████████████████████▎                        | 2/3 [00:00<00:00,  2.39it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.14it/s][A
 35%|█████████████████████████                                               | 16/46 [02:46<05:20, 10.68s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.73it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.67it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.22it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.60it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 16 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hi teresa ...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.95it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.32it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.60it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.20it/s][A
 37%|██████████████████████████▌                                             | 17/46 [02:56<05:03, 10.48s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.50it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.64it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.23it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.62it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 17 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] michelle k...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 43.10it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 37.94it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.98it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.59it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.94it/s][A
 39%|████████████████████████████▏                                           | 18/46 [03:07<04:53, 10.47s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.82it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 35.75it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.97it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.47it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.28it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 18 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] we're gon ...
Average reward: 0.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.67it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 27.97it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.22it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.48it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.69it/s][A
 41%|█████████████████████████████▋                                          | 19/46 [03:18<04:48, 10.68s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.34it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.45it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 66.05it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.79it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 19 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey nichol...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.38it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.08it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.59it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.32it/s][A
 43%|███████████████████████████████▎                                        | 20/46 [03:28<04:32, 10.49s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.53it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 51.51it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.02it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.77it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 20 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey steven...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.47it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.40it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.80it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.35it/s][A
 46%|████████████████████████████████▊                                       | 21/46 [03:38<04:21, 10.47s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.20it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.40it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 68.12it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.50it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 21 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] so jerry i...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 35.87it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.83it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.71it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.20it/s][A
 48%|██████████████████████████████████▍                                     | 22/46 [03:48<04:07, 10.30s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.81it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.78it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.65it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.88it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 22 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hi russell...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.57it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.06it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.76it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.32it/s][A
 50%|████████████████████████████████████                                    | 23/46 [03:59<03:57, 10.33s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.78it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.59it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.18it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.43it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 23 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] michelle k...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.62it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 36.97it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.40it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.42it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.67it/s][A
 52%|█████████████████████████████████████▌                                  | 24/46 [04:09<03:48, 10.39s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.80it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.17it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 72.83it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.66it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.04it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 24 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:hi, susan, how are ...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.63it/s][A


Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/3 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 27.18it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 62.40it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/3 [00:00<?, ?it/s][A
 33%|████████████████████████▋                                                 | 1/3 [00:00<00:00,  2.66it/s][A
 67%|█████████████████████████████████████████████████▎                        | 2/3 [00:00<00:00,  2.65it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.63it/s][A
 54%|███████████████████████████████████████▏                                | 25/46 [04:21<03:45, 10.76s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 62.44it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.74it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 78.34it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.88it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 25 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[patient] and good ...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 23.18it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 20.48it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.63it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.50it/s][A
 57%|████████████████████████████████████████▋                               | 26/46 [04:31<03:34, 10.74s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.99it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.86it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.51it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.41it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 26 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey jose h...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.87it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 38.01it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.97it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.14it/s][A
 59%|██████████████████████████████████████████▎                             | 27/46 [04:42<03:22, 10.64s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.12it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.21it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.26it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.83it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 27 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] patient is...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.35it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.44it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.68it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.23it/s][A
 61%|███████████████████████████████████████████▊                            | 28/46 [04:52<03:07, 10.40s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.33it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 68.69it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.34it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.35it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 28 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] so stephan...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.11it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.78it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 76.07it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.65it/s][A
 63%|█████████████████████████████████████████████▍                          | 29/46 [05:02<02:56, 10.38s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.01it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 33.29it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.86it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.23it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 29 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[patient] next pati...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.32it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 33.96it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 68.65it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/3 [00:00<?, ?it/s][A
 33%|████████████████████████▋                                                 | 1/3 [00:00<00:00,  2.48it/s][A
 67%|█████████████████████████████████████████████████▎                        | 2/3 [00:00<00:00,  2.50it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  3.28it/s][A
 65%|██████████████████████████████████████████████▉                         | 30/46 [05:13<02:48, 10.52s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 56.14it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.70it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 68.07it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.50it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 30 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] so jerry i...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.08it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 26.51it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.09it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.22it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.59it/s][A
 67%|████████████████████████████████████████████████▌                       | 31/46 [05:23<02:37, 10.50s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.35it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.24it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.17it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.38it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 31 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey gabrie...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.53it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 36.73it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.61it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.42it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.82it/s][A
 70%|██████████████████████████████████████████████████                      | 32/46 [05:34<02:26, 10.46s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.60it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 40.01it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 69.87it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.67it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.16it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 32 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hello.
[pa...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 44.20it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 27.46it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 72.96it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.69it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.64it/s][A
 72%|███████████████████████████████████████████████████▋                    | 33/46 [05:45<02:17, 10.60s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.86it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.12it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 66.86it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.69it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 33 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] okay raymo...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.30it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 22.54it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.48it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.23it/s][A
 74%|█████████████████████████████████████████████████████▏                  | 34/46 [05:55<02:07, 10.59s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.93it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.51it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.12it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.92it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 34 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey willia...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.89it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.42it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 69.44it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.32it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.14it/s][A
 76%|██████████████████████████████████████████████████████▊                 | 35/46 [06:06<01:55, 10.54s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.70it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 45.53it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.37it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.69it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 35 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] judy gomez...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.12it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.45it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.16it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.14it/s][A
 78%|████████████████████████████████████████████████████████▎               | 36/46 [06:16<01:44, 10.44s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.67it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.58it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.37it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.91it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 36 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[patient] alright t...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.07it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 43.84it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.56it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.37it/s][A
 80%|█████████████████████████████████████████████████████████▉              | 37/46 [06:26<01:32, 10.29s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.35it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.45it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.41it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.44it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 37 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] okay hi an...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.98it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 42.93it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.48it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.65it/s][A
 83%|███████████████████████████████████████████████████████████▍            | 38/46 [06:36<01:21, 10.24s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.04it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.03it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.33it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.87it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 38 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] yeah so so...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.09it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.67it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.35it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.33it/s][A
 85%|█████████████████████████████████████████████████████████████           | 39/46 [06:46<01:10, 10.11s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.45it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.02it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 51.64it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.82it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 39 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hey betty ...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.22it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.69it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.55it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.33it/s][A
 87%|██████████████████████████████████████████████████████████████▌         | 40/46 [06:56<01:00, 10.09s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 63.79it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.27it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.83it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.78it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 40 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hi virgini...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.15it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.06it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 69.76it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.28it/s][A
 89%|████████████████████████████████████████████████████████████████▏       | 41/46 [07:06<00:50, 10.04s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 58.11it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 62.01it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.18it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.89it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 41 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] hi russell...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 41.67it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.26it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 62.92it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.23it/s][A
 91%|█████████████████████████████████████████████████████████████████▋      | 42/46 [07:16<00:40, 10.13s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.75it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 70.60it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 66.59it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.41it/s][A

Queries: 2, Responses: 2, Rewards: 2



Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 42 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] good alrig...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.88it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.53it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.48it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.39it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.23it/s][A
 93%|███████████████████████████████████████████████████████████████████▎    | 43/46 [07:26<00:30, 10.20s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.47it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.86it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.71it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.79it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 43 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] okay so we...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.30it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 27.60it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 75.69it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.29it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.71it/s][A
 96%|████████████████████████████████████████████████████████████████████▊   | 44/46 [07:37<00:20, 10.46s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.52it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 62.54it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.53it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.71it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 44 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] kayla ward...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.87it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.74it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 76.10it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.29it/s][A
 98%|██████████████████████████████████████████████████████████████████████▍ | 45/46 [07:47<00:10, 10.32s/it]Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch size mismatch: expected 2, got 4


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.28it/s][A


Evaluating consistency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 67.11it/s][A


Evaluating fluency of 2 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 75.33it/s][A


Evaluating factual consistency of 2 samples !!!
Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.83it/s][A


Queries: 2, Responses: 2, Rewards: 2


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Batch 45 PPO step successful!
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans:[doctor] alright ju...
Average reward: -1.0000


Both `max_new_tokens` (=64) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 37.73it/s][A


Evaluating consistency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 40.60it/s][A


Evaluating fluency of 4 samples !!!



100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.76it/s][A


Evaluating factual consistency of 4 samples !!!
Evaluating consistency of 4 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.37it/s][A
100%|████████████████████████████████████████████████████████████████████████| 46/46 [07:57<00:00, 10.39s/it]

Batch size mismatch: expected 2, got 4
✅ Epoch 1/3 complete
🎉 PPO fine-tuning done





In [5]:
ppo_trainer.model.pretrained_model.save_pretrained("D:\kshitij-weights-folder\gpt-2-tuned-ppo")
tokenizer.save_pretrained("D:\kshitij-weights-folder\gpt-2-tuned-ppo")


('D:\\kshitij-weights-folder\\gpt-2-tuned-ppo\\tokenizer_config.json',
 'D:\\kshitij-weights-folder\\gpt-2-tuned-ppo\\special_tokens_map.json',
 'D:\\kshitij-weights-folder\\gpt-2-tuned-ppo\\vocab.json',
 'D:\\kshitij-weights-folder\\gpt-2-tuned-ppo\\merges.txt',
 'D:\\kshitij-weights-folder\\gpt-2-tuned-ppo\\added_tokens.json',
 'D:\\kshitij-weights-folder\\gpt-2-tuned-ppo\\tokenizer.json')