In [None]:
import sys
sys.path.insert(0, r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")

from utils import convert_to_json
from metric.evaluator import get_evaluator

task = 'fact'

evaluator = get_evaluator(task)

In [None]:
from prettytable import PrettyTable

def convert_to_json(output_list, src_list=None, ref_list=None, context_list=None, \
            scores=None, doc_id=None, system_id=None):
    """
        Convert the data into the json format.

        output_list: a list of model output
        src_list: source input for different NLG tasks. For example, source document for summarization
                  and dialogue history for dialogue response generation
        ref_list: human-annotated groundtruth
        context_list: the context needed to evaluate several specific dimension. For example,
                      additional factual information when evaluating engagingness and groundedness in dialogues
        scores: human scores for evaluating the model output. They can be used to calculate the correlation
                between evaluators and human judgements. The scores should be stored in a dictionary. For example,
                {'fluency': 2.0, 'coherence': 3.0} could be the human score for a sample.
        doc_id: the index of the input source. It can be used to calculate summary-level correlation for summarzation
        system_id: the index of the generation system. It can be used to calculate system-level correlation.
    """
    json_data = []
    for i in range(len(output_list)):
        cur = {}
        cur['system_output'] = output_list[i]
        if src_list is not None:
            cur['source'] = src_list[i]
        if ref_list is not None:
            cur['reference'] = ref_list[i]
        if context_list is not None:
            cur['context'] = context_list[i]
        if scores is not None:
            cur['scores'] = scores[i]
        if doc_id is not None:
            cur['doc_id'] = doc_id[i]
        if system_id is not None:
            cur['system_id'] = system_id[i]
        json_data.append(cur)
    return json_data


def add_question(dimension, output, src=None, ref=None, context=None, task=None):
    """
        Add questions to generate input in Bool-QA format for UniEval.

        dimension: specific dimension to be evaluated
        src: source input for different NLG tasks. For example, source document for summarization
             and dialogue history for dialogue response generation.
        output: output text generated by the models
        ref: human-annotataed groundtruth
        context: the context needed to evaluate several specific dimension. For example,
                 additional factual information when evaluating engagingness and groundedness in dialogues.
    """

    input_with_question = []
    for i in range(len(output)):
        # For summarization
        if task == 'summarization':
            if dimension == 'fluency':
                cur_input = 'question: Is this a fluent paragraph? </s> paragraph: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent summary to the document? </s> summary: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'relevance':
                cur_input = 'question: Is this summary relevant to the reference? </s> summary: ' + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For dialogues
        elif task == 'dialogue':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a natural response in the dialogue? </s> response: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent response given the dialogue history? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i]
            elif dimension == 'engagingness':
                cur_input = 'question: Is this an engaging and informative response according to the dialogue history and fact? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i] + ' </s> fact: ' + context[i]
            elif dimension == 'groundedness':
                cur_input = 'question: Is this response consistent with knowledge in the fact? </s> response: '\
                            + output[i] + ' </s> fact: ' + context[i]
            elif dimension == 'understandability':
                cur_input = 'question: Is this an understandable response in the dialogue? </s> response: ' + output[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For data-to-text
        elif task == 'data2text':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a fluent utterance? </s> utterance: ' + output[i]
            elif dimension == 'informativeness':
                cur_input = 'question: Is this sentence informative according to the reference? </s> sentence: '\
                            + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For factual consistency detection
        elif task == 'fact':
            if dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            else:
                raise NotImplementedError('No other dimensions for the factual consistency detection task.')
        # For new customized tasks
        else:
            raise NotImplementedError('Other tasks are not implemented, please customize specific tasks here.')
        input_with_question.append(cur_input)
    return input_with_question


def print_scores(scores):
    table = PrettyTable(['Dimensions','Score'])
    print('\nEvaluation scores are shown below:')
    dims = list(scores[0].keys())
    for dim in dims:
        cur_score = 0
        for i in range(len(scores)):
            cur_score += scores[i][dim]
        table.add_row([dim, round(cur_score / len(scores), 6)])
    print(table)

In [None]:
import numpy as np
from nltk import sent_tokenize
from scorer import UniEvaluator  # Make sure this import works after placing scorer.py in the same directory

def evaluate(data, dims=None, overall=True, print_result=False, model_name_or_path="t5-small", task='summarization', device='cuda:0', individual=True):
    """
    Get the scores of all the given dimensions (fluency, consistency, coherence, relevance)

    data: A list of dictionaries, where each dictionary contains:
          - 'source': The original text
          - 'system_output': The generated system output (summary)
          - 'reference' (optional): Reference summary for relevance evaluation

    dims: A list of dimensions to be evaluated. If dims is None, it evaluates four default dimensions:
          coherence, consistency, fluency, relevance.

    overall: Boolean to indicate whether the overall score is calculated as the average of all dimensions.

    print_result: Boolean to print the results on the screen.

    model_name_or_path: The model name or path to use for evaluation, e.g., 't5-small'

    task: The task type (used in scoring if needed, like summarization or other NLP tasks).

    device: The device to use for evaluation ('cpu' or 'cuda:0').
    """

    # Instantiate the scorer
    scorer = UniEvaluator(model_name_or_path=model_name_or_path, device=device)

    n_data = len(data)
    eval_scores = [{} for _ in range(n_data)]

    # Default dimensions if not provided
    if dims is None:
        dims = ['coherence', 'consistency', 'fluency', 'factual consistency']   #add relevance

    for dim in dims:
        print(f'Evaluating {dim} of {n_data} samples !!!')

        if dim == 'consistency' or dim == 'fluency':
            # Sentence-level scores for consistency and fluency
            src_list, output_list = [], []
            n_sents = []  # number of sentences in each summary

            for i in range(n_data):
                if dim == 'consistency':
                    source = data[i]['source']
                else:
                    source = ''
                system_outputs = sent_tokenize(data[i]['system_output'])
                n_sents.append(len(system_outputs))
                for j in range(len(system_outputs)):
                    src_list.append(source)
                    output_list.append(system_outputs[j])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, task=task)
            sent_score = scorer.score(input_list)

            # Calculate average sentence-level scores for each sample
            start_idx = 0
            score = []
            for cur_n_sent in n_sents:
                score.append(sum(sent_score[start_idx:start_idx + cur_n_sent]) / cur_n_sent)
                start_idx += cur_n_sent

        elif dim == 'coherence' or dim == 'relevance':
            # Summary-level scores for coherence and relevance
            src_list, output_list, ref_list = [], [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])
                if dim == 'relevance':
                    ref_list.append(data[i]['reference'])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, ref=ref_list, task=task)
            score = scorer.score(input_list)

        elif dim == 'factual consistency':
            output_list, src_list = [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])

            data = convert_to_json(output_list=output_list, src_list=src_list)
            eval_score = evaluator.evaluate(data)
            score = []

            for i in eval_score:
                temp = i['consistency']
                score.append(temp)

        else:
            raise NotImplementedError(f"The input format for the dimension '{dim}' is still undefined. Please customize it.")

        # Store the scores for the current dimension
        for i in range(n_data):
            eval_scores[i][dim] = score[i]

    # Calculate overall score (average of all evaluated dimensions)
    if overall:
        for i in range(n_data):
            eval_scores[i]['overall'] = np.mean([eval_scores[i][dim] for dim in dims])

    # Print the result if requested
    if print_result:
        print_scores(eval_scores)

    if individual:
        individual_scores = []
        for i in range(n_data):
            temp = [eval_scores[i][dim] for dim in dims]
            individual_scores.append(temp)

        return np.array(individual_scores)

    # Calculate average score across all the dimensions except 'overall'
    avg_score = []
    for i in range(n_data):
        # Exclude 'overall' from the averaging
        dimensions = [dim for dim in dims if dim != 'overall']
        avg_score.append(np.mean([eval_scores[i][dim] for dim in dimensions]))

    return avg_score

In [None]:
# # Import all required libraries
# import torch
# import transformers
# from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments
# from trl import RewardTrainer, PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
# from datasets import Dataset
# import json
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, TaskType
# import bitsandbytes as bnb
# import numpy as np
# from tqdm import tqdm
# import time
# import sys

# # Add UniEval to path and import
# sys.path.append(r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")
# from utils import convert_to_json
# from metric.evaluator import get_evaluator

# # Configuration
# DATA_PATH = "combined_clinical_notes.csv"
# # MODEL_PATH = r"D:\kshitij-weights-folder\qwen-aloe-9-4-base-fine-tune"
# MODEL_PATH = "gpt2" 
# # PEFT_ADAPTER_PATH = r"D:\kshitij-weights-folder\qwen-aloe-9-4-base-fine-tune-peft-adapaters"
# MEDICAL_PROMPT = "\nGenerate a concise medical summary focusing on key findings and treatment plans:"

# # Load and prepare data
# df = pd.read_csv(DATA_PATH)
# train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)
# eval_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
# dataset = Dataset.from_pandas(eval_df.rename(columns={"dialogue": "review"}))

# # Tokenizer setup
# tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, padding_side='left')
# tokenizer.pad_token = tokenizer.eos_token

# # Dataset preprocessing
# def preprocess_function(examples):
#     return {
#         "input_ids": tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512),
#         "query": tokenizer.decode(tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512), skip_special_tokens=True)
#     }

# dataset = dataset.map(preprocess_function, batched=False)
# dataset.set_format("pytorch")

# # Model configuration
# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
# )

# base_model = AutoModelForCausalLM.from_pretrained(
#     MODEL_PATH,
#     quantization_config=bnb_config,
#     device_map="auto"
# )
# base_model = prepare_model_for_kbit_training(base_model)

# # PEFT/LoRA configuration
# lora_config = LoraConfig(
#     task_type=TaskType.CAUSAL_LM,
#     r=8,
#     lora_alpha=32,
#     lora_dropout=0.1,
#     bias="none",
#     target_modules=["c_attn", "c_proj"],
# )

# # model_with_lora = get_peft_model(base_model, lora_config)
# model = AutoModelForCausalLMWithValueHead.from_pretrained(base_model, peft_config=lora_config).to("cuda")

# # Reference model
# ref_model = create_reference_model(model).to("cuda")
# ref_model.eval()
# for param in ref_model.parameters():
#     param.requires_grad = False

# NUM_CANDIDATES = 2

# # PPO Configuration
# ppo_config = PPOConfig(
#     model_name=MODEL_PATH,
#     ppo_epochs=1,
#     gradient_accumulation_steps=1,
#     steps=5,
#     batch_size=1*NUM_CANDIDATES,
#     mini_batch_size=1*NUM_CANDIDATES,
#     learning_rate=2e-5,
#     log_with=None,
#     # project_kwargs={"logging_dir": r"D:\kshitij-weights-folder\gpt2-rl-logs"}
# )

# # optimizer = torch.optim.AdamW(
# #     filter(lambda p: p.requires_grad, ppo_model.parameters()),
# #     lr=2e-5,
# #     eps=1e-5,  # Slightly larger epsilon for stability
# # )

# # Initialize PPO Trainer
# ppo_trainer = PPOTrainer(
#     config=ppo_config,
#     model=model,
#     ref_model=ref_model,
#     tokenizer=tokenizer,
#     dataset=dataset,
#     optimizer=bnb.optim.Adam8bit(model.parameters(), lr=ppo_config.learning_rate)
# )

# # Evaluation setup
# # sum_eval = get_evaluator("summarization", "cuda"="cuda" if torch.cuda.is_available() else "cpu")

# def get_score(src, res):
#     # weights = np.array([0.1, 0.2, 0.3, 0.4])  # coherence, consistency, fluency, factual consistency
#     # sample_data = [{"source": q, "system_output": r} for q, r in zip(game_data["query"], game_data["response"])]
    
#     # scores = sum_eval.evaluate(sample_data, overall=False)
#     # scores = evaluate(sample_data, overall=False)
#     # weighted_scores = []
    
#     # for dimension_scores in scores:
#     #     adjusted = np.where(
#     #         dimension_scores < 0.5,
#     #         -dimension_scores * weights,
#     #         dimension_scores * weights
#     #     )
#     #     weighted_scores.append(torch.tensor(np.sum(adjusted)/4, dtype=torch.float32).to(model.pretrained_model.device))
    
#     # return weighted_scores

#     data = convert_to_json(
#         output_list=res,
#         src_list=src,
#     )
#     # raw = sum_eval.evaluate(data, print_result=True)
#     # dims = ['coherence', 'consistency', 'fluency', 'factual consistency']
#     raw = evaluate(data, overall=False)
#     score = [
#         [d[0], d[1], d[2], d[3]]
#         for d in raw
#     ]
#     scores = torch.tensor(score, dtype=torch.float32).numpy()  # CPU (B,4

#     k = len(res)
#     dom_counts = np.zeros(k)
    
#     for i in range(k):
#         for j in range(k):
#             if i == j:
#                 continue
#             # Check dominance: i dominates j if all scores are >= and at least one is >
#             if np.all(scores[i] >= scores[j]) and np.any(scores[i] > scores[j]):
#                 dom_counts[i] += 1
    
#     # Convert to [-1, 1] range reward
#     max_dom = k - 1
#     if max_dom > 0:
#         rewards = 2 * (dom_counts / max_dom) - 1
#     else:
#         rewards = np.zeros(k)
    
#     return rewards


# max_position_embeddings = model.pretrained_model.config.max_position_embeddings 
# # Training loop
# generation_kwargs = {
#     "min_length": -1,
#     "top_k": 0.0,
#     "top_p": 1.0,
#     "do_sample": True,
#     "pad_token_id": tokenizer.eos_token_id,
#     # "max_new_tokens": 64,  # Increased for better summary generation
#     "eos_token_id": -1,
#     "max_length": max_position_embeddings,
#     "max_new_tokens": 64
# }
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# # Use consistent compute dtype
# COMPUTE_DTYPE = torch.float32  # Using float32 to avoid dtype issues

# for epoch in range(1):
#     for batch_idx, batch in enumerate(tqdm(ppo_trainer.dataloader)):
#         (logs, game_data,) = (
#             dict(),
#             dict(),
#         )

#         # task_list = choices(ctrl_str, k=config.batch_size)
#         # game_data["query"] = [t + q for t, q in zip(task_list, batch["query"])]
#         game_data["query"] = [q for q in batch["query"]]
#         # query_tensors = [torch.cat((ctrl_tokens[t], input_ids)) for t, input_ids in zip(task_list, batch["input_ids"])]
#         query_tensors = [input_ids for input_ids in batch["input_ids"]]
#         all_outs = []
        
#         response_tensors = []
#         for query in query_tensors:
#             for _ in range(NUM_CANDIDATES):
#                 original_notes = tokenizer.decode(query)
                
#                 # Combine with medical prompt only during generation
#                 full_prompt = f"{MEDICAL_PROMPT}{original_notes}" #TRy with full prompt here
#                 full_prompt_tensor = tokenizer.encode(full_prompt, return_tensors="pt").to("cuda").squeeze(0)
                
#                 with torch.no_grad():
#                     response = ppo_trainer.generate(
#                         full_prompt_tensor,
#                         **generation_kwargs
#                     )
#                 # Ensure response doesn't exceed max length
#                 response = response[:, :generation_kwargs["max_new_tokens"]]
#                 all_outs.append(response)
#                 response_tensors.append(response.squeeze())

#             outs = torch.stack(all_outs, dim=1)
#             B, K, _ = outs.shape
            
#             # Decode outputs for evaluation
#             hyps = []
#             for b in range(B):
#                 hyps_b = []
#                 for k in range(K):
#                     try:
#                         text = tokenizer.decode(outs[b, k], skip_special_tokens=True)
#                         hyps_b.append(text)
#                     except Exception as e:
#                         print(f"Error decoding text: {e}")
#                         hyps_b.append("")  # Add empty string as fallback
#                 hyps.append(hyps_b)

#             # rewards = []
#             # for b in range(len(batch['input_ids'])):
#             #     # Get scores for all candidates (K, 4)
#             #     scores = get_score(
#             #         batch['query'][b] * NUM_CANDIDATES,
#             #         hyps[b]
#             #     ).numpy()

#             #     print("check")

#             #     dom_counts = np.zeros(NUM_CANDIDATES)
#             #     for i in range(NUM_CANDIDATES):
#             #         for j in range(NUM_CANDIDATES):
#             #             if i == j:
#             #                 continue
#             #             # Check if i dominates j
#             #             if np.all(scores[i] >= scores[j]) and np.any(scores[i] > scores[j]):
#             #                 dom_counts[i] += 1

#             #     max_dom = NUM_CANDIDATES - 1
#             #     scalar_rewards = 2 * (dom_counts / max_dom) - 1
#             #     rewards.append(scalar_rewards)

#             # flat_queries = []
#             # flat_responses = []
#             # flat_rewards = []

#             # for b in range(len(batch['input_ids'])):
#             #     for k in range(NUM_CANDIDATES):
#             #         flat_queries.append(batch['input_ids'][b])
#             #         flat_responses.append(outs[b, k])
#             #         flat_rewards.append(torch.tensor([rewards[b][k]], device="cuda"))

#             # stats = ppo_trainer.step(
#             #     queries   = flat_queries,    # e.g. [ q0, q0 ]
#             #     responses = flat_responses,  # e.g. [ r0, r1 ]
#             #     scores    = flat_rewards     # e.g. [ s0, s1 ]
#             # )

#             flat_queries, flat_responses, flat_rewards = [], [], []
                    
#             for b in range(B):
#                 try:
#                     # Calculate rewards
#                     rewards_b = get_score(
#                         [batch['query'][b]] * K, 
#                         hyps[b],
#                     )
                    
#                     # Flatten for PPO
#                     for k in range(K):
#                         flat_queries.append(batch['input_ids'][b])
#                         flat_responses.append(outs[b, k])
#                         flat_rewards.append(torch.tensor([rewards_b[k]], device=DEVICE, dtype=COMPUTE_DTYPE))
                        
#                 except Exception as e:
#                     print(f"Error computing rewards: {e}")
#                     continue
            
#             # Safety check
#             if len(flat_queries) != ppo_config.batch_size:
#                 print(f"Batch size mismatch: expected {ppo_config.batch_size}, got {len(flat_queries)}")
#                 continue

#             try:
#                 # Verify shapes match
#                 print(f"Queries: {len(flat_queries)}, Responses: {len(flat_responses)}, Rewards: {len(flat_rewards)}")
                
#                 # Manual memory management
#                 # torch.cuda.empty_cache()
                
#                 # Do PPO step
#                 stats = ppo_trainer.step(
#                     queries=flat_queries,
#                     responses=flat_responses,
#                     scores=flat_rewards
#                 )
                
#                 # Success! Log the output
#                 print(f"Batch {batch_idx} PPO step successful!")
#                 print(f"Sample output: {hyps[0][0][:100]}...")
#                 avg_reward = np.mean([r.item() for r in flat_rewards])
#                 print(f"Average reward: {avg_reward:.4f}")
                
#             except RuntimeError as e:
#                 print(f"Error in PPO step: {e}")
                
#                 # If still running into CUDA errors, try moving to CPU
#                 if "CUDA" in str(e) and DEVICE != "cpu":
#                     print("\nContinuing to encounter CUDA errors. Try two options:")
#                     print("1. Change DEVICE = 'cpu' at the top of the script")
#                     print("2. Or use the non-quantized model version\n")
                    
#                 # Clear memory and continue
#                 # if torch.cuda.is_available():
#                 #     torch.cuda.empty_cache()
            
#             # if batch_idx % 10 == 0:
#             #     print(f"Epoch {epoch+1}, Batch {batch_idx}")
#             #     print(f"Sample output: {hyps[0][0][:100]}...")
#             #     print(f"Average reward: {np.mean([r.item() for r in flat_rewards]):.4f}")

#     print(f"✅ Epoch {epoch+1}/3 complete")
    
# print("🎉 PPO fine-tuning done")
# #         print(response_tensors)
#         # game_data["response"] = [tokenizer.decode(r) for r in response_tensors]

#         # print("check")

#         # texts = [q + r for q, r in zip(batch["query"], game_data["response"])]
#         # logits = get_score(game_data)
#         # rewards = logits
#         # rewards = pos_logit_to_reward(logits, task_list)
#         # rewards = [torch.tensor([1.0], device=query_tensors[0].device) for _ in range(len(texts))]

#         #### Run PPO trainings
#         # t = time.time()
#         # stats = ppo_trainer.step(query_tensors, response_tensors, rewards)


In [1]:
# Import all required libraries
import torch
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments
from trl import RewardTrainer, PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from datasets import Dataset
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, TaskType
import bitsandbytes as bnb
import numpy as np
import os
from tqdm import tqdm
import time
import sys
# Add UniEval to path and import
sys.path.append(r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")
from utils import convert_to_json
from metric.evaluator import get_evaluator

# Configuration
DATA_PATH = "combined_clinical_notes.csv"
MODEL_PATH = "gpt2" 
EXTRACTION_MODEL_PATH = "bigscience/bloomz-1b7"  # Model for extracting key information
# Enhanced prompt with more guidance
MEDICAL_PROMPT = "\nGenerate a concise medical summary focusing on key findings and treatment plans. Include chief complaints, symptoms, medications, and recommendations if mentioned in the conversation:"

# Initialize the extraction model
print(f"Loading extraction model from {EXTRACTION_MODEL_PATH}...")
extraction_tokenizer = AutoTokenizer.from_pretrained(EXTRACTION_MODEL_PATH)
extraction_model = AutoModelForCausalLM.from_pretrained(
    EXTRACTION_MODEL_PATH,
    torch_dtype=torch.float16,  # Use fp16 for efficiency
    device_map="auto"
)

def extract_medical_info(conversation, max_length=300):
    """Use the extraction model to pull out key medical information"""
    extraction_prompt = (
        "Extract these key medical information points from the conversation below:\n"
        "- Chief complaint\n"
        "- Symptoms and duration\n"
        "- Current medications\n"
        "- Vital signs\n"
        "- Physical exam findings\n"
        "- Recommendations\n\n"
        f"Conversation:\n{conversation}\n\n"
        "Extracted information:"
    )
    
    # Truncate long conversations if needed
    if len(extraction_prompt.split()) > 800:
        conversation_words = conversation.split()
        truncated_conversation = " ".join(conversation_words[:700])
        extraction_prompt = extraction_prompt.replace(conversation, truncated_conversation)
    
    inputs = extraction_tokenizer(extraction_prompt, return_tensors="pt").to(extraction_model.device)
    
    with torch.no_grad():
        try:
            outputs = extraction_model.generate(
                inputs.input_ids,
                max_length=len(inputs.input_ids[0]) + max_length,  # Allow for generated content
                temperature=0.3,  # Lower temperature for more deterministic extraction
                top_p=0.95
            )
            
            extracted_info = extraction_tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Remove the prompt part
            if extraction_prompt in extracted_info:
                extracted_info = extracted_info.replace(extraction_prompt, "").strip()
            else:
                # Try to find a reasonable place to split the text
                # This helps when the model modifies the prompt slightly
                prompt_end = "Extracted information:"
                if prompt_end in extracted_info:
                    extracted_info = extracted_info.split(prompt_end)[1].strip()
            
            return extracted_info
            
        except Exception as e:
            print(f"Error in extraction model: {e}")
            # Return a simplified fallback extraction
            return "Unable to extract detailed information. Please see the original conversation."

# Load and prepare data
df = pd.read_csv(DATA_PATH)
train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)
eval_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
dataset = Dataset.from_pandas(eval_df.rename(columns={"dialogue": "review"}))

# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

# Dataset preprocessing
def preprocess_function(examples):
    return {
        "input_ids": tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512),
        "query": tokenizer.decode(tokenizer.encode(examples["review"], truncation=True, padding="max_length", max_length=512), skip_special_tokens=True)
    }

dataset = dataset.map(preprocess_function, batched=False)
dataset.set_format("pytorch")

# Model configuration
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto"
)
base_model = prepare_model_for_kbit_training(base_model)

# PEFT/LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=["c_attn", "c_proj"],
)

model = AutoModelForCausalLMWithValueHead.from_pretrained(base_model, peft_config=lora_config).to("cuda")

# Reference model
ref_model = create_reference_model(model).to("cuda")
ref_model.eval()
for param in ref_model.parameters():
    param.requires_grad = False

NUM_CANDIDATES = 2

# PPO Configuration
ppo_config = PPOConfig(
    model_name=MODEL_PATH,
    ppo_epochs=1,
    gradient_accumulation_steps=1,
    steps=5,
    batch_size=1*NUM_CANDIDATES,
    mini_batch_size=1*NUM_CANDIDATES,
    learning_rate=2e-5,
    log_with=None,
)

# Initialize PPO Trainer
ppo_trainer = PPOTrainer(
    config=ppo_config,
    model=model,
    ref_model=ref_model,
    tokenizer=tokenizer,
    dataset=dataset,
    optimizer=bnb.optim.Adam8bit(model.parameters(), lr=ppo_config.learning_rate)
)

# Initialize evaluation model
sum_eval = get_evaluator("summarization", device="cpu")

# Evaluation function
def evaluate(data, overall=False):
    """Wrapper around sum_eval.evaluate to handle errors"""
    try:
        return sum_eval.evaluate(data, overall=overall)
    except Exception as e:
        print(f"Error in evaluate: {e}")
        # Return default scores if evaluation fails
        return [[0.5, 0.5, 0.5, 0.5]] * len(data)

def get_score(src, res):
    """Calculate rewards based on evaluation scores"""
    data = convert_to_json(
        output_list=res,
        src_list=src,
    )
    
    raw = evaluate(data, overall=False)
    score = [
        [d[0], d[1], d[2], d[3]]
        for d in raw
    ]
    scores = np.array(score, dtype=np.float32)
    k = len(res)
    dom_counts = np.zeros(k)
    
    for i in range(k):
        for j in range(k):
            if i == j:
                continue
            # Check dominance: i dominates j if all scores are >= and at least one is >
            if np.all(scores[i] >= scores[j]) and np.any(scores[i] > scores[j]):
                dom_counts[i] += 1
    
    # Convert to [-1, 1] range reward
    max_dom = k - 1
    if max_dom > 0:
        rewards = 2 * (dom_counts / max_dom) - 1
    else:
        rewards = np.zeros(k)
    
    return rewards

max_position_embeddings = model.pretrained_model.config.max_position_embeddings 

# Training loop with improved generation parameters
generation_kwargs = {
    "min_length": -1,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "eos_token_id": -1,
    "max_length": max_position_embeddings,
    "max_new_tokens": 96,
    "temperature": 0.7
}

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
COMPUTE_DTYPE = torch.float32  # Using float32 to avoid dtype issues

# Track extract-summarize cache to avoid recomputing
extract_cache = {}

for epoch in range(3):
    for batch_idx, batch in enumerate(tqdm(ppo_trainer.dataloader)):
        logs, game_data = dict(), dict()
        game_data["query"] = [q for q in batch["query"]]
        query_tensors = [input_ids for input_ids in batch["input_ids"]]
        
        # Process one query at a time to prevent batch size mismatch
        for query_idx, query in enumerate(query_tensors):
            try:
                # Skip empty or very short queries
                if len(query) < 10:
                    print(f"Skipping query {query_idx}: too short")
                    continue
                
                # Generate NUM_CANDIDATES responses for this query
                responses = []
                decoded_responses = []
                
                # Get original text and extract key information
                original_notes = tokenizer.decode(query)
                
                # Check cache first to avoid redundant extractions
                if original_notes in extract_cache:
                    extracted_info = extract_cache[original_notes]
                else:
                    extracted_info = extract_medical_info(original_notes)
                    extract_cache[original_notes] = extracted_info
                
                # Construct enhanced prompt with extracted information
                full_prompt = (
                    f"{MEDICAL_PROMPT}\n"
                    f"KEY INFORMATION:\n{extracted_info}\n\n"
                    "Based on the above, generate a concise medical summary:"
                )
                
                # Encode the full prompt
                full_prompt_tensor = tokenizer.encode(full_prompt, return_tensors="pt").to("cuda").squeeze(0)
                
                for _ in range(NUM_CANDIDATES):
                    with torch.no_grad():
                        response = ppo_trainer.generate(
                            full_prompt_tensor,
                            **generation_kwargs
                        )
                    # Ensure response doesn't exceed max length
                    response = response[:, :generation_kwargs["max_new_tokens"]]
                    responses.append(response.squeeze())
                    
                    # Decode for evaluation
                    text = tokenizer.decode(response.squeeze(), skip_special_tokens=True)
                    
                    # Extract just the generated part (often it repeats the prompt)
                    if full_prompt in text:
                        generated_text = text.split(full_prompt)[1].strip()
                    else:
                        generated_text = text
                        
                    decoded_responses.append(generated_text)
                
                # Calculate rewards
                rewards = get_score(
                    [game_data["query"][query_idx]] * NUM_CANDIDATES, 
                    decoded_responses
                )
                
                # Prepare data for PPO step
                flat_queries = [query] * NUM_CANDIDATES
                flat_responses = responses
                flat_rewards = [torch.tensor([r], device=DEVICE, dtype=COMPUTE_DTYPE) for r in rewards]
                
                # Verify sizes match
                if len(flat_queries) == NUM_CANDIDATES and len(flat_responses) == NUM_CANDIDATES and len(flat_rewards) == NUM_CANDIDATES:
                    # Do PPO step for this query
                    stats = ppo_trainer.step(
                        queries=flat_queries,
                        responses=flat_responses,
                        scores=flat_rewards
                    )
                    
                    print(f"Query {query_idx} in batch {batch_idx} - PPO step successful!")
                    print(f"Extracted Info: {extracted_info[:100]}...")
                    print(f"Sample output: {decoded_responses[0][:100]}...")
                    avg_reward = np.mean([r.item() for r in flat_rewards])
                    print(f"Average reward: {avg_reward:.4f}")
                else:
                    print(f"Skipping query {query_idx} due to size mismatch: queries={len(flat_queries)}, responses={len(flat_responses)}, rewards={len(flat_rewards)}")
                    
            except Exception as e:
                print(f"Error processing query {query_idx}: {e}")
                continue
        
        # Save checkpoint after each batch
        if batch_idx % 5 == 0:
            checkpoint_path = f"D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_{batch_idx}"
            try:
                os.makedirs(checkpoint_path, exist_ok=True)
                ppo_trainer.model.pretrained_model.save_pretrained(checkpoint_path)
                tokenizer.save_pretrained(checkpoint_path)
                print(f"Checkpoint saved to {checkpoint_path}")
            except Exception as e:
                print(f"Error saving checkpoint: {e}")
                
    print(f"✅ Epoch {epoch+1}/3 complete")
    
    # Save epoch checkpoint
    epoch_path = f"D:/kshitij-weights-folder/gpt-2-tuned-ppo-epochs/epoch_{epoch+1}"
    os.makedirs(epoch_path, exist_ok=True)
    ppo_trainer.model.pretrained_model.save_pretrained(epoch_path)
    tokenizer.save_pretrained(epoch_path)
    print(f"Epoch checkpoint saved to {epoch_path}")
    
print("🎉 PPO fine-tuning done")

# Save the final model
save_path = r"D:\kshitij-weights-folder\gpt-2-tuned-ppo-extracted"
ppo_trainer.model.pretrained_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"Model saved to {save_path}")

  from .autonotebook import tqdm as notebook_tqdm


Loading extraction model from bigscience/bloomz-1b7...


Map: 100%|███████████████████████████████████████████████████████████| 93/93 [00:00<00:00, 143.58 examples/s]
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.13s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.03it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 0 in batch 0 - PPO step successful!
Extracted Info: The doctor is asking the patient about her symptoms and duration....
Sample output: In cases of a woman with bipolar disorder who has never been treated with any sort of medication, sh...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.48s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.67it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 1 in batch 0 - PPO step successful!
Extracted Info: Patient is a 60 year old, right-handed male, referred today for evaluation of numbness and tingling....
Sample output: 1. This is a very important and very important issue.

2. Patients are suffering at the...
Average reward: -1.0000


  2%|█▌                                                                       | 1/46 [00:25<19:10, 25.57s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_0


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.60s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.54it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 1 - PPO step successful!
Extracted Info: The patient is currently taking medications for his foot pain....
Sample output: You can read the medical summary at the beginning of your medical summary.

You can read the medical...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.57s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  4%|███▏                                                                     | 2/46 [00:53<19:49, 27.03s/it]

Query 1 in batch 1 - PPO step successful!
Extracted Info: The doctor will examine the patient's knee....
Sample output: I'm so glad to see you've read this.

I can't believe this.

I'm so glad to hear that.

I can't beli...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.84s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.27it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 2 - PPO step successful!
Extracted Info: The patient is a 31-year-old female with a history of diabetes and asthma....
Sample output: Key:

The patient is a young male with an asthma history of diabetes and asthma.

The patient is a m...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.81s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.62it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  7%|████▊                                                                    | 3/46 [01:20<19:09, 26.74s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 2 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: What is the key to this?

Why did you say this?

When did you say this?


What is the key?

WHAT is ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.22s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.43s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.98it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 3 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: KEY DESCRIPTION:

"You should have a concise medical summary.
"A medical summary of a medical and he...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.38s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.30it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  9%|██████▎                                                                  | 4/46 [01:48<19:13, 27.47s/it]

Query 1 in batch 3 - PPO step successful!
Extracted Info: Michael is seeing a doctor for back pain....
Sample output: I have a back pain.

Your pain is back pain.

I have a back pain.

Your pain is back pain.

Your bac...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.72s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.46it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 0 in batch 4 - PPO step successful!
Extracted Info: The doctor will check up on the patient's past medical history....
Sample output: KEY INFO:
I've been told that it's not a good idea to use.
A good idea.

So I decided to look into t...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.17s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.07s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.79it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 11%|███████▉                                                                 | 5/46 [02:15<18:36, 27.23s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 4 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: A doctor will discuss the treatment options, the treatment options, and the current treatment plans....
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.81s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.00it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 5 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: The following are the major problems IHGP's are dealing with:

1.The primary goal of the conversatio...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.81s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 5 - PPO step successful!
Extracted Info: The patient is experiencing pain in her elbow....
Sample output: What are the best medical decisions for this patient?

You have the best medical opinion.

This pati...
Average reward: -1.0000


 13%|█████████▌                                                               | 6/46 [02:46<19:05, 28.65s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_5


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.70s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.79it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 6 - PPO step successful!
Extracted Info: The patient is concerned about her recent blood work....
Sample output: KEY INFORMATION:

The patient's symptoms, and the way they are treated.

This is not a medical revie...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.98it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 15%|███████████                                                              | 7/46 [03:10<17:32, 26.99s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 6 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: In a nutshell:

The first step is to identify the symptoms:

The first step is to identify the speci...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.70s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 7 - PPO step successful!
Extracted Info: The patient is experiencing a knee pain....
Sample output: This is a case case of a knee.

This is a case of a knee.

This is a case of a knee.

This is a case...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.60it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 17%|████████████▋                                                            | 8/46 [03:35<16:46, 26.48s/it]

Query 1 in batch 7 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Key facts:

A doctor who has a primary care diagnosis is considered as a primary care physician.

A ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.15it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 8 - PPO step successful!
Extracted Info: The doctor will check up on the patient's health and recommend treatment....
Sample output: Here is a list of the top issues of the issue that have been discussed by our physician, and the doc...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.52s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:09<00:00,  4.71s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.52it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.36it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 20%|██████████████▎                                                          | 9/46 [04:05<16:53, 27.39s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 8 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: MEDICAL SUMMARY:

1. Symptoms and duration

2. Drug use, alcohol use, and

3. Alcohol use

4. Key cl...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.64s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.32it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 9 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Symptoms:

What is it?


The primary symptom is a severe headache.

What is it does it?
The primary ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.50s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.47it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 22%|███████████████▋                                                        | 10/46 [04:35<16:55, 28.19s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 9 - PPO step successful!
Extracted Info: The doctor is concerned about the patient's health....
Sample output: Medical marijuana is on the patient's side.

If the patient is serious, the doctor will give the pat...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.09s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.04s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.28it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 10 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: The doctor will take a prescription of the following:

a. a. a. a. a.

b.

c...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.30it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 10 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: S:

A:

C:

D:

T:
T:
KEY:

C:
M:

A:

D:
A...
Average reward: -1.0000


 24%|█████████████████▏                                                      | 11/46 [04:57<15:22, 26.36s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_10


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.48s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.69it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 11 - PPO step successful!
Extracted Info: The doctor is asking about the symptoms of the patient....
Sample output: The doctor is looking for a medical system that is able to provide treatment plans for the patient.
...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.96s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.87it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 26%|██████████████████▊                                                     | 12/46 [05:21<14:31, 25.64s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 11 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Symptoms and duration of treatment:

Based on a person's medical history, if it is a person with a d...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.04it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 12 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Key findings:

What is the current medical status of the patient or the following is a topic of inte...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.25s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.24s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.65it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 28%|████████████████████▎                                                   | 13/46 [05:45<13:47, 25.08s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 12 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Key:

A few key details are included, and this should be used in your discussion.

A few details tha...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.55s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.27it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 13 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Key Words:
"I am a bad doctor and I have no medical practice in my life"

"I am a bad doctor, and my...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.37s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:08<00:08,  8.20s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:09<00:00,  4.57s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  1.57it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.51it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 30%|█████████████████████▉                                                  | 14/46 [06:16<14:21, 26.92s/it]

Query 1 in batch 13 - PPO step successful!
Extracted Info: The patient is a 37-year-old female with a history of hypertension and diabetes ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.12it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 14 - PPO step successful!
Extracted Info: Lawrence is a 62-year-old male with a past medical history significant for type i diabetes, congesti...
Sample output: Lack of care, which is the same for the patient,...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.05it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 33%|███████████████████████▍                                                | 15/46 [06:36<12:51, 24.88s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 14 - PPO step successful!
Extracted Info: susan is a 26-year-old female who has high blood pressure....
Sample output: Treatment plan:

The goal is to treat all the symptoms of the disease, including:

a low blood press...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.91s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.67it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 15 - PPO step successful!
Extracted Info: chief complaint is chest pain...
Sample output: chief complaint is the general general condition of the patient

Major complaint is the general cond...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.57s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.81it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 1 in batch 15 - PPO step successful!
Extracted Info: The patient is currently taking painkillers to treat the pain....
Sample output: KEY CONCER:
The Patient is currently taking painkillers to treat the pain.

The Patient is currently...
Average reward: -1.0000


 35%|█████████████████████████                                               | 16/46 [07:02<12:37, 25.25s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_15


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.56it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 0 in batch 16 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: "Symptoms:


We have the following symptoms:

We have the following symptoms:

We have the following...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.79s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.99it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 37%|██████████████████████████▌                                             | 17/46 [07:24<11:43, 24.25s/it]

Query 1 in batch 16 - PPO step successful!
Extracted Info: The doctor will examine the patient and give her a physical exam....
Sample output: For a medical summary of the following:


The doctor will look at the patient with a medical alert.
...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.79s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.43it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 0 in batch 17 - PPO step successful!
Extracted Info: The doctor will examine the patient and recommend a physical exam....
Sample output: KEY NOTIFICATION:
This is a good summary of the evidence in the doctor's office.

The doctor's repor...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.82s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.33s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.53it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 39%|████████████████████████████▏                                           | 18/46 [07:51<11:42, 25.07s/it]

Query 1 in batch 17 - PPO step successful!
Extracted Info: The patient is a female with acid reflux....
Sample output: DETERATION:
Pregnancy/Pregnancy:

DETERMINATION:
Hormonal:

DETERMATION:

H...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.85s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.97it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 18 - PPO step successful!
Extracted Info: The doctor will call out some of the physical exam findings....
Sample output: Medical information about the medical information of the patient.

In the event the doctor does not ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.58s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 41%|█████████████████████████████▋                                          | 19/46 [08:18<11:35, 25.77s/it]

Query 1 in batch 18 - PPO step successful!
Extracted Info: The doctor is asking the patient about his neck pain....
Sample output: The doctor is asking about a specific symptom. The doctor is asking about a specific treatment plan....
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.69s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.73it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 19 - PPO step successful!
Extracted Info: The doctor will recommend a course of antibiotics for the patient....
Sample output: The doctor will recommend a course of antibiotics.

The doctor will recommend a course of antibiotic...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



0it [00:00, ?it/s][A


Error in evaluate: division by zero


 43%|███████████████████████████████▎                                        | 20/46 [08:41<10:45, 24.81s/it]

Query 1 in batch 19 - PPO step successful!
Extracted Info: Raymond has been having trouble swallowing for a period of time. He has been having trouble swallowi...
Sample output: ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.92it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 20 - PPO step successful!
Extracted Info: Julia has had a heart attack and is undergoing treatment. She has had a stent placed in her heart. J...
Sample output: Juliaia has a heart attack and has had...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.76s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.21it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 20 - PPO step successful!
Extracted Info: joseph is a 59 year old male who has chronic problems....
Sample output: If a medical condition is a problem with the general state of care, it is considered to be a problem...
Average reward: -1.0000


 46%|████████████████████████████████▊                                       | 21/46 [09:03<10:02, 24.11s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_20


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.60s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.06it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 21 - PPO step successful!
Extracted Info: The patient is a 37-year-old male with a complaint of left arm pain....
Sample output: In case of a patient, they will write a medical summary.

IN CASE:

The is a medical summary, based...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.58it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 48%|██████████████████████████████████▍                                     | 22/46 [09:28<09:40, 24.17s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 21 - PPO step successful!
Extracted Info: The doctor is asking the patient about his back pain....
Sample output: You can ask a question about the pain and the pain may be mentioned.

You can ask for more informati...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.14it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 22 - PPO step successful!
Extracted Info: Julia has had a heart attack and is undergoing treatment. She has had a stent placed in her heart. J...
Sample output: Julia is on her stent....
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.08it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 50%|████████████████████████████████████                                    | 23/46 [09:53<09:23, 24.48s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 22 - PPO step successful!
Extracted Info: The patient has a broken wrist....
Sample output: The patient's wrist is broken.
The patient's wrist is broken.
The patient's wrist is broken.
The pat...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.15s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.71it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 23 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: Your doctor.

To determine if your doctor is recommending a treatment for you.

Include a list of co...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.33it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 52%|█████████████████████████████████████▌                                  | 24/46 [10:18<09:00, 24.57s/it]

Query 1 in batch 23 - PPO step successful!
Extracted Info: chief complaint is acid reflux...
Sample output: KEY INFORMATION:

KEY:

The primary complaint is acid reflux (acid reflux is a disease caused by the...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.75s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.19it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 24 - PPO step successful!
Extracted Info: Chief complaint is abnormal renal ultrasound with an atrophic right kidney....
Sample output: COURT:

Your medical history is:


(not included in the discussion)

This medical history is:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.71s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.69it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 54%|███████████████████████████████████████▏                                | 25/46 [10:41<08:26, 24.10s/it]

Query 1 in batch 24 - PPO step successful!
Extracted Info: The doctor will examine the patient and will recommend a course of treatment....
Sample output: The patient will discuss the condition and cause for their treatment.

The patient will recommend a ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.86s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.53it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 25 - PPO step successful!
Extracted Info: The patient is a female who has a severe right upper arm pain....
Sample output: PROGUMS:

PROGOGY:


A:

The patient has an issue that can cause problems, and you...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.60s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.37it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 1 in batch 25 - PPO step successful!
Extracted Info: William is a doctor. He is treating a patient who injured his knee....
Sample output: Bill is a doctor.

Bill, is a doctor.

Bill has an injury.

Bill is a doctor.

Bill is...
Average reward: -1.0000


 57%|████████████████████████████████████████▋                               | 26/46 [11:07<08:17, 24.87s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_25


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.30s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.87s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:09<00:00,  4.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  1.81it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.71it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 26 - PPO step successful!
Extracted Info: The doctor is concerned about the patient 's recent illness ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.59s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.51it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 59%|██████████████████████████████████████████▎                             | 27/46 [11:40<08:38, 27.29s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 26 - PPO step successful!
Extracted Info: chief complaint is back pain...
Sample output: ADDITIONAL INFORMATION:

Cocaine is a major cause of ailing and/or anemia. Alcohol is an increase in...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.83s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.06it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 27 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: INTRUDITION:

Treatment Plans

INTRUDIERS:

A good diagnosis of a major and a minor is required to b...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.96s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.68it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 61%|███████████████████████████████████████████▊                            | 28/46 [12:03<07:47, 26.00s/it]

Query 1 in batch 27 - PPO step successful!
Extracted Info: The doctor will call out some of the physical exam findings....
Sample output: The doctor will call out some of the physical exam findings, to give some insight into the condition...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.51s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.60it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 28 - PPO step successful!
Extracted Info: The doctor will examine the patient and discuss the cough....
Sample output: This is a concise medical summary of the general medicine.

This is a medical summary of the patient...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.56s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.81it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 63%|█████████████████████████████████████████████▍                          | 29/46 [12:30<07:24, 26.18s/it]

Query 1 in batch 28 - PPO step successful!
Extracted Info: The doctor will discuss the patient's symptoms and current medications....
Sample output: The patient will discuss the current medications, current treatment options, current treatments, and...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.69it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 29 - PPO step successful!
Extracted Info: The doctor is asking the patient about his current medical condition....
Sample output: KEY FEATURES:

Key points:

Primary causes of death:


Major:


Minor:

Solutions:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.23it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 65%|██████████████████████████████████████████████▉                         | 30/46 [12:51<06:34, 24.68s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 29 - PPO step successful!
Extracted Info: Patient is 72-year-old woman with hypertension....
Sample output: Key Medical notes:

The following is a medical summary of the following patient:

The patient's medi...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.23s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.10s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.91it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 30 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: This is the most important thing I would like to know the best.

This is what I would like to know:
...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.17s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.32it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 30 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Key information:


DILOGS:

Key ideas:

WHAT?

If you're in the market for a medical summary, you ca...
Average reward: -1.0000


 67%|████████████████████████████████████████████████▌                       | 31/46 [13:20<06:29, 25.99s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_30


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.63it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 31 - PPO step successful!
Extracted Info: The patient is a 57-year-old female who is here for a surgical consult....
Sample output: The patient is a male (male) with at least 3 symptoms with a high risk of serious, if not present, f...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.79s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.53it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 70%|██████████████████████████████████████████████████                      | 32/46 [13:41<05:44, 24.59s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 31 - PPO step successful!
Extracted Info: Melissa sanchez is a female patient who is being seen in the office for status post mitral valve rep...
Sample output: A brief medical summary of the following medical topic:

Anal:


A brief medical topic...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



0it [00:00, ?it/s][A


Error in evaluate: division by zero


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 32 - PPO step successful!
Extracted Info: Raymond has been having trouble swallowing for a period of time. He has been having trouble swallowi...
Sample output: ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.83s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.84it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 72%|███████████████████████████████████████████████████▋                    | 33/46 [14:03<05:08, 23.70s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 32 - PPO step successful!
Extracted Info: The doctor is concerned about the patient's health....
Sample output: What is happening?

What is happening?

This is a summary of information that topic has been discuss...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.70s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.01it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 33 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: "You're going to go to the the hospital. What do you do? What do you do? You're going to take it for...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.52s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.93it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 74%|█████████████████████████████████████████████████████▏                  | 34/46 [14:31<04:59, 24.92s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 33 - PPO step successful!
Extracted Info: Kayla has been using perc gel and washing regularly which is somewhat helpful....
Sample output: There are no medical need for Kayla.

It is available in a separate file, separate medical report.
I...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.22it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 34 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Symptoms and duration

Based on the above, generate a concise medical summary:

Summary and duration...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.96s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.11it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 76%|██████████████████████████████████████████████████████▊                 | 35/46 [14:52<04:21, 23.80s/it]

Query 1 in batch 34 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: In the following, the following is a list of the available medical-relevant medical-related informat...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.60s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.66it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 35 - PPO step successful!
Extracted Info: The doctor is trying to diagnose the patient with a medical condition....
Sample output: The doctor is trying to diagnose a medical condition.

The doctor is trying to diagnose a medical co...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.29s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.60it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 35 - PPO step successful!
Extracted Info: The doctor is concerned about the patient 's recent illness ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


 78%|████████████████████████████████████████████████████████▎               | 36/46 [15:22<04:15, 25.56s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_35


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.61s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.50it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 36 - PPO step successful!
Extracted Info: The patient has been in a car accident and has been experiencing neck pain....
Sample output: The has been experiencing neck pain.

The current condition is not present.


The current condition ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.42it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 80%|█████████████████████████████████████████████████████████▉              | 37/46 [15:46<03:46, 25.18s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 36 - PPO step successful!
Extracted Info: The patient is an 82-year-old male with past medical history significant for hypertension and stage ...
Sample output: A patient with a history...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.57s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.45it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'
Query 0 in batch 37 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Symptoms

Mention and/addiction is the most common symptom

The most common medical symptoms that ar...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.78s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.83it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 83%|███████████████████████████████████████████████████████████▍            | 38/46 [16:11<03:21, 25.13s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 37 - PPO step successful!
Extracted Info: The patient is a woman with a history of type 2 diabetes and ovarian cancer....
Sample output: GENERAL:

A total of 1:

The patient is a woman with a history of type 2 diabetes.


The...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.58it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 38 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: FAMEMAKED

The first two items are for your medical treatment:
FAMEMAKED

The second item is for you...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.49s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.73it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 85%|█████████████████████████████████████████████████████████████           | 39/46 [16:34<02:50, 24.37s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 38 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: The general general idea is that these symptoms are not very common but are very common, in the Unit...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.28it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 39 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: HIV/treatments:
HIV/AIDS:

Hormone:

HIV/AIDS:

Hemorrhine:

Hormonal:

L...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.56it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 87%|██████████████████████████████████████████████████████████████▌         | 40/46 [16:55<02:21, 23.54s/it]

Query 1 in batch 39 - PPO step successful!
Extracted Info: chief complaint is worsening headaches...
Sample output: CITOR
Key to this call:
CITOR
Key to this call:
CITOR

CITOR:
CITORGARD:
CITOR:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.46s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.70it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 40 - PPO step successful!
Extracted Info: The patient has a foot ulcer that has been there for six weeks....
Sample output: GENERAL INFORMATION:

Patient's foot ulcer is a condition that has been there for six weeks.

The pa...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.26s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.94it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 40 - PPO step successful!
Extracted Info: The patient has been suffering from back pain for a few years....
Sample output: The patient has been suffering from back pain for a few years.

This is a list of the following:

Th...
Average reward: -1.0000


 89%|████████████████████████████████████████████████████████████████▏       | 41/46 [17:22<02:01, 24.36s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_40


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.35it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 41 - PPO step successful!
Extracted Info: Karen is a 34-year-old female with a history of chronic migraines and hypertension who is here today...
Sample output: Karen says that she feels as if she is a pain sufferer....
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.59s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.68it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 91%|█████████████████████████████████████████████████████████████████▋      | 42/46 [17:45<01:36, 24.22s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 41 - PPO step successful!
Extracted Info: The doctor is asking the patient about his back pain....
Sample output: You see a doctor who is going through pain.

The doctor also needs to explain to make sure the pain ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.33s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 42 - PPO step successful!
Extracted Info: The patient is a 41-year-old female....
Sample output: (D)This is the patient's current treatment plan.

(D) is the current treatment plan.
(D) is the curr...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.78s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.58it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 93%|███████████████████████████████████████████████████████████████████▎    | 43/46 [18:14<01:16, 25.53s/it]

Query 1 in batch 42 - PPO step successful!
Extracted Info: The doctor will discuss the patient's symptoms and current medications....
Sample output: Please include a diagnosis of the problem.

Please include the patient information.

Please include ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.84s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.56it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 43 - PPO step successful!
Extracted Info: The doctor is asking the patient about her knee pain....
Sample output: If she has a knee pain, the doctor is a doctor.

If she has a hip pain, the doctor is a doctor.

She...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.58s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.95it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 96%|████████████████████████████████████████████████████████████████████▊   | 44/46 [18:37<00:49, 24.89s/it]

Query 1 in batch 43 - PPO step successful!
Extracted Info: The patient has been suffering from back pain for a few years....
Sample output: A brief medical summary focusing on the treatment of the patient is provided on the patient's first ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.37s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00,  4.13s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.50it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.61it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 44 - PPO step successful!
Extracted Info: The patient has a complaint of knee pain....
Sample output: WHAT:
Your patient has a complaint of knee pain.

Your patient is a doctor.

Your patient is a docto...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.72s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.51it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 98%|██████████████████████████████████████████████████████████████████████▍ | 45/46 [19:10<00:27, 27.08s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 44 - PPO step successful!
Extracted Info: The patient has a broken wrist....
Sample output: The patient has a wrist injury.

The patient has a broken wrist.

The patient has a broken wrist.


...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.40it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 0 in batch 45 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: PROGIC INFO:

PROGIC INFO:

PROGICIFIC INFO:


PROGICIFIC INFO:

PROG:

PROG:

PR...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.59s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.91it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 45 - PPO step successful!
Extracted Info: The doctor is talking about her patient and the current medications....
Sample output: The doctor is talking about her patient and the current medications.


The doctor says she is on the...
Average reward: -1.0000


100%|████████████████████████████████████████████████████████████████████████| 46/46 [19:33<00:00, 25.52s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_45
✅ Epoch 1/3 complete





Epoch checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-epochs/epoch_1


  0%|                                                                                 | 0/46 [00:00<?, ?it/s]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.70it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 0 - PPO step successful!
Extracted Info: The patient is concerned about her recent blood work....
Sample output: The patient is concerned about her recent blood work.

The patient is concerned about her recent blo...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.46it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 0 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: KEY/PROB:

The patient's symptoms should be included in the conversation:

PROB:

The patient's symp...
Average reward: -1.0000


  2%|█▌                                                                       | 1/46 [00:24<18:44, 25.00s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_0


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.09s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.96it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 1 - PPO step successful!
Extracted Info: The doctor is asking the patient about her symptoms and duration....
Sample output: 1. How did the doctor think the patient was ill or well?
2. How did the doctor think the the patient...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.10s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.42it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  4%|███▏                                                                     | 2/46 [00:56<21:13, 28.95s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 1 - PPO step successful!
Extracted Info: The patient is a female with acid reflux....
Sample output: The is a male with acid reflux.
The patient is in a female with acid reflux.

The patient has acid r...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.03it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 2 - PPO step successful!
Extracted Info: Patient is a 60 year old, right-handed male, referred today for evaluation of numbness and tingling....
Sample output: Patient is a 60 year old, right-handed male, referred today for medical evaluation of numb...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.74s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.70it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  7%|████▊                                                                    | 3/46 [01:20<18:59, 26.50s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 2 - PPO step successful!
Extracted Info: The doctor will discuss the patient's symptoms and current medications....
Sample output: Key Information:
The doctor will discuss the current medical condition.

The doctor will discuss the...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.82s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.31it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 3 - PPO step successful!
Extracted Info: The patient is a 57-year-old female who is here for a surgical consult....
Sample output: The patient is here for medical consultation:

The patient is here for a surgical procedure:

The pa...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.05s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.79it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  9%|██████▎                                                                  | 4/46 [01:47<18:44, 26.77s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 3 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Frequency of discussion with relevant participants:
ININININATION:

Symptoms and duration:

SUMMARY:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.45s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.79it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 4 - PPO step successful!
Extracted Info: The doctor is asking the patient about his back pain....
Sample output: KEY CHART:

The doctor is asking if the patient is experiencing any special pain.

Key needs to be:
...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.53it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 11%|███████▉                                                                 | 5/46 [02:13<18:12, 26.64s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 4 - PPO step successful!
Extracted Info: The doctor is talking about her patient and the current medications....
Sample output: Key:

Majora.

Major:

Major:
If:

A:

A.

Major:
A:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.78s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.07it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 5 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Symptoms and duration
AAS: "We are currently considering a diagnosis of a condition that is not curr...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.78s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.21it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 5 - PPO step successful!
Extracted Info: The doctor is concerned about the patient's health....
Sample output: The doctor:
The doctor:
The patient:

The doctor:

The doctor:
The doctor:

The doctor:

The doctor:...
Average reward: -1.0000


 13%|█████████▌                                                               | 6/46 [02:36<16:48, 25.22s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_5


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.74s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.19it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 6 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: -
-

-
-

-
-


-

-
-

-

-

-

-

-

-
-...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.78s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.78it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 15%|███████████                                                              | 7/46 [02:58<15:49, 24.34s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 6 - PPO step successful!
Extracted Info: The doctor will examine the patient and discuss the cough....
Sample output: The medical summary includes the symptoms, and the symptoms,
The doctor will review the current medi...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.27s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.94s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00,  4.43s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  1.70it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.69it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 7 - PPO step successful!
Extracted Info: The doctor is concerned about the patient 's recent illness ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.51s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 17%|████████████▋                                                            | 8/46 [03:29<16:35, 26.20s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 7 - PPO step successful!
Extracted Info: William is a doctor. He is treating a patient who injured his knee....
Sample output: Key symptoms:

He is a doctor.

He is a doctor.

He is a doctor.

(This is a statement...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.48s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.37it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 8 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: This is a list of what the person said was "not sure" if mentioned.

The person:
The person:

the pe...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.04s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.18it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 20%|██████████████▎                                                          | 9/46 [03:54<16:04, 26.07s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 8 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: If the doctor believes that it is an incontinence, it is not a doctor's opinion:

If the doctor beli...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.81s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.17it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 9 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: SUMMARY:
The initial medical summary was not in the discussion of this article.
SUMMARY:

If the dis...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.54s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00,  4.23s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.52it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.50it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 22%|███████████████▋                                                        | 10/46 [04:22<15:57, 26.60s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 9 - PPO step successful!
Extracted Info: The patient is currently taking medications for his foot pain....
Sample output: The patient is currently taking medications for his foot pain.

The patient is currently taking medi...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.00it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 10 - PPO step successful!
Extracted Info: Lawrence is a 62-year-old male with a past medical history significant for type i diabetes, congesti...
Sample output: If you are a patient, will be sure to include your medical record...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.22it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 10 - PPO step successful!
Extracted Info: Karen is a 34-year-old female with a history of chronic migraines and hypertension who is here today...
Sample output: Karen is a 35-year-old male male with a history of chronic pain...
Average reward: -1.0000


 24%|█████████████████▏                                                      | 11/46 [04:44<14:43, 25.24s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_10


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.36it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 11 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: COMMENT:

COMMENT:

-

-
-
-

-
-

-

-
-
-

-
-

-...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.76s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.48it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 26%|██████████████████▊                                                     | 12/46 [05:05<13:28, 23.78s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 11 - PPO step successful!
Extracted Info: The patient has been in a car accident and has been experiencing neck pain....
Sample output: 1:
2:
3:

4:

5:

6:

7:
8:

9...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.30s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.04s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.44it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.22it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 12 - PPO step successful!
Extracted Info: The patient has a complaint of knee pain....
Sample output: - medical summary:

-1.

-2.

-1.

-2.

-2.

-2.

-...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.14s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.18it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 28%|████████████████████▎                                                   | 13/46 [05:34<14:02, 25.54s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 12 - PPO step successful!
Extracted Info: chief complaint is worsening headaches...
Sample output: KEY:

Key:

Migraine and other headache conditions

Migraine is a symptom of:

a history of:

anxiet...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.16s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.56it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 13 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: (1) If the doctor thinks the ankle and the medicine is too good, call the doctor

(2) The physician ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.39it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 30%|█████████████████████▉                                                  | 14/46 [06:01<13:47, 25.86s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 13 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: I. Information:
A. Information:
A. Brief description of the condition
Brief description of the sympt...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.52s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.68it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 14 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: D: Treatment plan:
Drug:

TRAIN:

General:

D:

T:

D:

General:
General:

D:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.45s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.02it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 33%|███████████████████████▍                                                | 15/46 [06:26<13:15, 25.66s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 14 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: INFORMATION:

Treatments:

CITY:

The following topics:
1.
Innal,2
2

C:

The following topics:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.39s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 15 - PPO step successful!
Extracted Info: The doctor is concerned about the patient's health....
Sample output: I. The doctor's concern about the patient's health.

The doctor's concern is about the patient's hea...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.81it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 15 - PPO step successful!
Extracted Info: The doctor will check up on the patient's health and recommend treatment....
Sample output: If the doctor is a doctor who is taking your doctor's recommendation:

the doctor will check up on y...
Average reward: -1.0000


 35%|█████████████████████████                                               | 16/46 [06:55<13:17, 26.57s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_15


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.57s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.68it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 16 - PPO step successful!
Extracted Info: The doctor will examine the patient and recommend a physical exam....
Sample output: The doctor will discuss the results of the physical exam, and then ask the patient if they have any ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.74s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.64it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 37%|██████████████████████████▌                                             | 17/46 [07:21<12:51, 26.60s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 16 - PPO step successful!
Extracted Info: The doctor will call out some of the physical exam findings....
Sample output: HOSPITAL:

If you have a medical condition, call 1-1-1.

If you have a condition, you should contact...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.71s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.52it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 17 - PPO step successful!
Extracted Info: The doctor is asking about the symptoms of the patient....
Sample output: The doctor is asking the patient about the symptoms.

The doctor is asking about the symptoms.

the ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.82s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.69it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 39%|████████████████████████████▏                                           | 18/46 [07:47<12:16, 26.30s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 17 - PPO step successful!
Extracted Info: The doctor is asking the patient about her knee pain....
Sample output: If there is a joint pain,

to the patient:

If there is a pain:
The doctor asks the patient if he is...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.36it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 18 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Treatments that are discussed:

COURS:
Treatments that are discussed:
COURS:

COURS:

Treatments tha...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.57s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.66it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 41%|█████████████████████████████▋                                          | 19/46 [08:10<11:20, 25.19s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 18 - PPO step successful!
Extracted Info: The patient is currently taking painkillers to treat the pain....
Sample output: If you mention this, you are a patient.

If you have a question, you want to discuss this

If you ha...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.44s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 19 - PPO step successful!
Extracted Info: The patient has a foot ulcer that has been there for six weeks....
Sample output: Key:
Symptoms:
The patient has a foot ulcer that has been there for six weeks.
The patient has a foo...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.86s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.25it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 43%|███████████████████████████████▎                                        | 20/46 [08:32<10:29, 24.22s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 19 - PPO step successful!
Extracted Info: Julia has had a heart attack and is undergoing treatment. She has had a stent placed in her heart. J...
Sample output: Julia's main concern: The doctor has been...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.85s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.86it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 20 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: INSTRUCTIONS:
1. Discuss the main topics of the conversation:

This is a conversation with a patient...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.10it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 20 - PPO step successful!
Extracted Info: The patient is a woman with a history of type 2 diabetes and ovarian cancer....
Sample output: Key symptoms, symptoms and symptoms:

"ADVERSE:

"
"

"ADVANCIC:...
Average reward: -1.0000


 46%|████████████████████████████████▊                                       | 21/46 [08:54<09:54, 23.77s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_20


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.47s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.41it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 21 - PPO step successful!
Extracted Info: The doctor will check up on the patient's past medical history....
Sample output: The doctor will check up on the patient's medical history.

The doctor will check up on the patient'...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.46it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 48%|██████████████████████████████████▍                                     | 22/46 [09:20<09:44, 24.34s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 21 - PPO step successful!
Extracted Info: chief complaint is chest pain...
Sample output: Related topics:
The topic(s):
The discussion(s):

The discussion(s):

CARE:
The conversation(s):

Th...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.82s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.54it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 22 - PPO step successful!
Extracted Info: The patient is an 82-year-old male with past medical history significant for hypertension and stage ...
Sample output: {:A recent study...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.95it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 50%|████████████████████████████████████                                    | 23/46 [09:46<09:31, 24.84s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 22 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: GENERAL INFO:
General information about the underlying and current treatment of the problem

The pat...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.00it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 23 - PPO step successful!
Extracted Info: The patient is a 31-year-old female with a history of diabetes and asthma....
Sample output: (a)

(b)

(c)

(d)

(e)

(d...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.82s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.22it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 52%|█████████████████████████████████████▌                                  | 24/46 [10:07<08:38, 23.57s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 23 - PPO step successful!
Extracted Info: The doctor will recommend a course of antibiotics for the patient....
Sample output: [The following are the same notes:

[1]

[2]

[1]


[2]
[3]...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.86s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.20it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 24 - PPO step successful!
Extracted Info: The doctor will examine the patient and give her a physical exam....
Sample output: *(d) The doctor will discuss the patient's medical history, history, and/or recommend treatment plan...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.28s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.06it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 54%|███████████████████████████████████████▏                                | 25/46 [10:32<08:26, 24.11s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 24 - PPO step successful!
Extracted Info: The doctor is concerned about the patient 's recent illness ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:07<00:07,  7.48s/it][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00,  4.23s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/2 [00:00<?, ?it/s][A
 50%|█████████████████████████████████████                                     | 1/2 [00:00<00:00,  2.79it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.62it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 25 - PPO step successful!
Extracted Info: The patient has a broken wrist....
Sample output: -1.

-1.

-1.

-
-1.
-1.

-1.

-2.

-1...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.75s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.32it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 25 - PPO step successful!
Extracted Info: The doctor is asking the patient about his back pain....
Sample output: What the doctor is asking the patient:

WHAT:

The doctor is interested in the conversation.

The do...
Average reward: -1.0000


 57%|████████████████████████████████████████▋                               | 26/46 [11:00<08:28, 25.41s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_25


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.71s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.27it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 26 - PPO step successful!
Extracted Info: The patient has been suffering from back pain for a few years....
Sample output: Policies:
MYG:

Policious medicines:

MYG::

Policies:
MYG:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.56it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 59%|██████████████████████████████████████████▎                             | 27/46 [11:21<07:33, 23.89s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 26 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Diseasell:

Dise:
Treatment:

(a)

Treatment:

(a)

Frequency:
(a)

F...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.71it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 27 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: IN:

SUM:

SUM:

SUM:
SUM:

SUM:


SUM:
SUM:

SUM:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 61%|███████████████████████████████████████████▊                            | 28/46 [11:45<07:10, 23.92s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 27 - PPO step successful!
Extracted Info: The patient has a broken wrist....
Sample output: 1. The patient is suffering from a broken wrist.

2. The patient has a broken wrist.


The patient:
...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.49it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 28 - PPO step successful!
Extracted Info: Michael is seeing a doctor for back pain....
Sample output: The following is an important message to the user that was sent:
The following is a medical summary ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.76s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.37it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 63%|█████████████████████████████████████████████▍                          | 29/46 [12:05<06:28, 22.86s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 28 - PPO step successful!
Extracted Info: susan is a 26-year-old female who has high blood pressure....
Sample output: CALL:

S:

M:

P:
S:
a:

::

c...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.83s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.58s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.02it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 29 - PPO step successful!
Extracted Info: The patient is a 37-year-old male with a complaint of left arm pain....
Sample output: What did you call?

The patient is a 37-year-old male with a right arm pain.

What were...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.79s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 65%|██████████████████████████████████████████████▉                         | 30/46 [12:28<06:07, 22.96s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 29 - PPO step successful!
Extracted Info: The doctor is asking the patient about his neck pain....
Sample output: A person is being asked about the current situation.

The doctor asks if the person is in the area w...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.60s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.88it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 30 - PPO step successful!
Extracted Info: The patient has been suffering from back pain for a few years....
Sample output: PRACTIVE:

Please note that this is not an opiate

PRACTIVE:

ALL:
:

PRACT:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.36s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.16s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.74it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 30 - PPO step successful!
Extracted Info: The patient is a 37-year-old female with a history of hypertension and diabetes ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


 67%|████████████████████████████████████████████████▌                       | 31/46 [12:57<06:09, 24.60s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_30


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.87s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.10it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 31 - PPO step successful!
Extracted Info: Kayla has been using perc gel and washing regularly which is somewhat helpful....
Sample output: Key:
[{i:{a:at:]]{}(a)

:callor:call:}...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.83s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 70%|██████████████████████████████████████████████████                      | 32/46 [13:18<05:31, 23.69s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 31 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: HIV/A:
* If:
* (1)* is an emergency (if present)
if (2)
if (3)
*(1)*(1)*...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.14it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 32 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: [ { "1.5b-1.6:" :[ 0 ]{ "1-2.5:" :[{ "1-2:1:{:i:{:"...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.73it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 72%|███████████████████████████████████████████████████▋                    | 33/46 [13:38<04:53, 22.58s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 32 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: [c] Medical history of the patient

[c]

[c]
[c]
[c]

[c]
[c]

[c]...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.05it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 33 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: KEY:
(1) :[
a) What is your patient's complaint:
:
(a)
<b>
(1)

[{id:
{
}...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.74it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 74%|█████████████████████████████████████████████████████▏                  | 34/46 [13:59<04:23, 21.98s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 33 - PPO step successful!
Extracted Info: The doctor will discuss the patient's symptoms and current medications....
Sample output: [MEDC] The doctor discusses:

[QUESTION:

[PROPOSED]

[CONTACT]

[]
[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.67s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.35it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 34 - PPO step successful!
Extracted Info: Julia has had a heart attack and is undergoing treatment. She has had a stent placed in her heart. J...
Sample output: Julia:
1. This includes:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.52it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 76%|██████████████████████████████████████████████████████▊                 | 35/46 [14:20<03:59, 21.75s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 34 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: TALK:

I:
[t]

[msg1]

[msg2]

[t2] [t2]

[[t]...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.82s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.84s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.78it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 35 - PPO step successful!
Extracted Info: The patient is a female who has a severe right upper arm pain....
Sample output: PROGIC:

Your patient has a severe upper arm pain:

in the patient's current medical history:

PROPI...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s][A


Error in evaluate: division by zero




Query 1 in batch 35 - PPO step successful!
Extracted Info: Raymond has been having trouble swallowing for a period of time. He has been having trouble swallowi...
Sample output: Ray...
Average reward: -1.0000


 78%|████████████████████████████████████████████████████████▎               | 36/46 [14:39<03:29, 20.99s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_35


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.48s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.67it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 36 - PPO step successful!
Extracted Info: Chief complaint is abnormal renal ultrasound with an atrophic right kidney....
Sample output: 1. "What is the most important medical thing you're doing right now?"

2. "The most important medica...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.03s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.49it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 80%|█████████████████████████████████████████████████████████▉              | 37/46 [15:06<03:24, 22.72s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 36 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: 1. What is the diagnosis?
[(a) What is the condition?
[b] [c]
[c][b] [c]
[c][d]...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.54s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.36it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 37 - PPO step successful!
Extracted Info: chief complaint is back pain...
Sample output: #[all patients: name:s]

[all patients:name:[all patients]

[name:]
[all_info:name]
[#[all...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.71it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 83%|███████████████████████████████████████████████████████████▍            | 38/46 [15:29<03:00, 22.62s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 37 - PPO step successful!
Extracted Info: The doctor is asking the patient about his current medical condition....
Sample output: [1]# (1)

[INDEX:

[1][1]#([2)][[[INDEX:][...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 38 - PPO step successful!
Extracted Info: chief complaint is acid reflux...
Sample output: 1. [PTSD:Addiction]
[:PTS]
[PTSD:Cannot_advocate_:PTS]
[PTSD:
I...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.17s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.25it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 85%|█████████████████████████████████████████████████████████████           | 39/46 [15:50<02:35, 22.19s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 38 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: PRACT:

<a href="https://www.cannabis.com/research/cannab
>Possible referral:

https://www.cannabis....
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.32it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 39 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: INDEX:

PATORY:
[PREvention(s):

Positives
[b]--[b]

[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.44it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 87%|██████████████████████████████████████████████████████████████▌         | 40/46 [16:11<02:11, 21.83s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 39 - PPO step successful!
Extracted Info: The doctor will examine the patient and will recommend a course of treatment....
Sample output: WHAT:
The doctor

INTR:The doctor is calling for a

MUST:

Possible cause(s):...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.83s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.74it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 40 - PPO step successful!
Extracted Info: The doctor will examine the patient's knee....
Sample output: If the person:
isn't talking:
The conversation is over.


Possible future

The conversation:
The con...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.73it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 40 - PPO step successful!
Extracted Info: joseph is a 59 year old male who has chronic problems....
Sample output: GENERAL:
josephant is a current or current medical condition

I am an in conversation with a patient...
Average reward: -1.0000


 89%|████████████████████████████████████████████████████████████████▏       | 41/46 [16:33<01:50, 22.01s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_40


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.87it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 41 - PPO step successful!
Extracted Info: The patient is experiencing a knee pain....
Sample output: CALL:

[01]
[22]
[05]

[23]

[24]


[25]

[26]...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.28it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 91%|█████████████████████████████████████████████████████████████████▋      | 42/46 [16:53<01:25, 21.32s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 41 - PPO step successful!
Extracted Info: Melissa sanchez is a female patient who is being seen in the office for status post mitral valve rep...
Sample output: MULTIMED_:

(not in the conversation)MIS_CLINARITY:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.23s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.29it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 42 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: -
[ID]

https://www.healthcare.com/professions/medicine-profession/citation-pro-sus-ad-ad_01_fb_...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.71it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 93%|███████████████████████████████████████████████████████████████████▎    | 43/46 [17:15<01:04, 21.66s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 42 - PPO step successful!
Extracted Info: The patient is experiencing pain in her elbow....
Sample output: This conversation is now closed.

Anonymous 06/10/17
AnonymousAnonymousAnonymous: My girlfriend, I h...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.79s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.07it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 43 - PPO step successful!
Extracted Info: The patient is a 41-year-old female....
Sample output: [msg]Announce[this:

[all:[this:]]adverification:

[title:this]patient#.cnnant[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.77it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 96%|████████████████████████████████████████████████████████████████████▊   | 44/46 [17:36<00:42, 21.33s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 43 - PPO step successful!
Extracted Info: The doctor will call out some of the physical exam findings....
Sample output: WHAT CONSIDER:
problems in the case
The caller:
cannotate

:
(1)a(front)call:
ad#...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.08it/s][A


Error in evaluate: division by zero


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 44 - PPO step successful!
Extracted Info: Raymond has been having trouble swallowing for a period of time. He has been having trouble swallowi...
Sample output: ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.48it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 98%|██████████████████████████████████████████████████████████████████████▍ | 45/46 [17:54<00:20, 20.46s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 44 - PPO step successful!
Extracted Info: The doctor is trying to diagnose the patient with a medical condition....
Sample output: {title:clarval:selection:[all:title:"

[all:flair:brand:doctor=P&key:no:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.21s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.14s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.94it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 45 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: LOCATION:
I'm inpatient,researching:
[research topic]
research topic:
ad=%&action=&i=&action=&c=%&f&...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.17it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 45 - PPO step successful!
Extracted Info: Patient is 72-year-old woman with hypertension....
Sample output: PATP1:

PID:patient

AdvC:

patient:

hpetpetpetpetpetpetpetpet:...
Average reward: -1.0000


100%|████████████████████████████████████████████████████████████████████████| 46/46 [18:18<00:00, 23.87s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_45
✅ Epoch 2/3 complete





Epoch checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-epochs/epoch_2


  0%|                                                                                 | 0/46 [00:00<?, ?it/s]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.20s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.56s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.46it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 0 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: PREDICT:
I. Know what this is:
I.explanation:
I. Know what this is the topic:
My.explanation:
My:pet...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.59it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 0 - PPO step successful!
Extracted Info: joseph is a 59 year old male who has chronic problems....
Sample output: cognis is a patient in conversation

your conversation:

please provide a phone number for your conc...
Average reward: -1.0000


  2%|█▌                                                                       | 1/46 [00:23<17:38, 23.52s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_0


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.54it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 1 - PPO step successful!
Extracted Info: The patient is currently taking medications for his foot pain....
Sample output: WHAT: The conversation:
The conversation is about the patient's symptoms:
What is going on
ad:
The c...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.13s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.34it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  4%|███▏                                                                     | 2/46 [00:43<15:48, 21.55s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 1 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: PACTOR:
anyone:frontline or:frontline#PR:p

INDEX:
I:[current=TRUNC:currentbalance[1-4]{1...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.86s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.85s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.58it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 2 - PPO step successful!
Extracted Info: The patient has been suffering from back pain for a few years....
Sample output: PATIENTS:

Comment:
*[MEDLINE_ID:1210]
[D] Patient:
a.
PAT...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.53it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  7%|████▊                                                                    | 3/46 [01:05<15:27, 21.57s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 2 - PPO step successful!
Extracted Info: The doctor is concerned about the patient's health....
Sample output: (Adv. to mention)

"The day after I woke up.

[The emergency medical that the caller reported:
"The ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.28it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 3 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: --
:--
Advocate

neutral:--
Advice:--
hotfrontp://frontpa.comfrontportal.comfrontfrontpage.com|msg=%...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.86s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.32it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


  9%|██████▎                                                                  | 4/46 [01:25<14:36, 20.87s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 3 - PPO step successful!
Extracted Info: The patient has been suffering from back pain for a few years....
Sample output: [fronttitle]expand(this[_{}\]]{:clinchtitletitle}advancedrad:ex:report{title=}}]...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.14s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.49it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 4 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: your health care provider:

your health caret:cjohnstoner@gmail.com

is:johnston,v=respondin...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.49it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 11%|███████▉                                                                 | 5/46 [01:45<14:05, 20.62s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 4 - PPO step successful!
Extracted Info: The patient is experiencing a knee pain....
Sample output: (front)

[back]
-
Anonymous, research_state:
There are currently unengaging thoughts and symptoms in...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.47it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 5 - PPO step successful!
Extracted Info: The patient is a female who has a severe right upper arm pain....
Sample output: INDEX:
"myastr:fronte:frontfront:front_138
​Dawn: A few months ago, when I was...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.21s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.06it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 5 - PPO step successful!
Extracted Info: The patient is concerned about her recent blood work....
Sample output: Why (please stateprovid=comparison included(and=in)&data={prevention:partner} alert(am)existing_stat...
Average reward: -1.0000


 13%|█████████▌                                                               | 6/46 [02:06<13:58, 20.97s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_5


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.13it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 6 - PPO step successful!
Extracted Info: chief complaint is chest pain...
Sample output: PRACTING:
your patient is complaining symptoms
your emergency flu-related (provide a flu-aid:))provi...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.78it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 15%|███████████                                                              | 7/46 [02:26<13:24, 20.62s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 6 - PPO step successful!
Extracted Info: Raymond has been having trouble swallowing for a period of time. He has been having trouble swallowi...
Sample output: (...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.43it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 7 - PPO step successful!
Extracted Info: The doctor is asking the patient about his neck pain....
Sample output: — The alert thought
— The patient's complaint is reported safely usinghotfluidapiall,fastingfrontfli...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.43it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 17%|████████████▋                                                            | 8/46 [02:46<12:54, 20.37s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 7 - PPO step successful!
Extracted Info: The doctor will examine the patient and give her a physical exam....
Sample output: result in the research, reported case[(advengeng)f.google.your.us.flax,advengengant&share=&...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.18it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 8 - PPO step successful!
Extracted Info: The doctor is asking about the symptoms of the patient....
Sample output: FINDS:antaiplan[front-your-opportunity] •providerprovidersmet:probplan[advices wereproviderpristcess...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.86s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.80it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 20%|██████████████▎                                                          | 9/46 [03:07<12:38, 20.51s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 8 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: ploSound:frontlinefrontpagerfrontpaey:frontlocationfrontpagerinfofrontcellphonefrontpradeparmusfront...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.11s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.09it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 9 - PPO step successful!
Extracted Info: The patient is experiencing pain in her elbow....
Sample output: Patient(s):
Medlineer rfer@hot.com
flvmarkup.google:beamfrontup alert alertadvmsg:close=coflvv...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.94s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.93it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 22%|███████████████▋                                                        | 10/46 [03:28<12:25, 20.72s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 9 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: frontistingresearchfrontistingresearchadvadv thoughtspr

48. Discussion:
Possible patientengaging th...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.16it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 10 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: investigator4(s)flair:com:bmco:uk/pr13114

Brand the fluflair:aberoncom,researchprobe:engaginglucyt1...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.67it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 10 - PPO step successful!
Extracted Info: The patient is a woman with a history of type 2 diabetes and ovarian cancer....
Sample output: investigate the patient's thoughts

Advice acetam notexpiry Protect safe,pirational,neutral,:

— abo...
Average reward: -1.0000


 24%|█████████████████▏                                                      | 11/46 [03:49<12:07, 20.78s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_10


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.40s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.19s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.90it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 11 - PPO step successful!
Extracted Info: The patient is a 37-year-old female with a history of hypertension and diabetes ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.17it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 26%|██████████████████▊                                                     | 12/46 [04:14<12:28, 22.02s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 11 - PPO step successful!
Extracted Info: The doctor will call out some of the physical exam findings....
Sample output: Pat\t:f\partay patient:sym\romedays:proced_pillnum thoughts:possible_diagnose:{advadv_hy...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.00it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 12 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: —
vincent_data
advanced_plan_navigator6565

(0793)​antinfo:
met://65ageant%20advanced_03:discrep837...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.81s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.09it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 28%|████████████████████▎                                                   | 13/46 [04:36<12:04, 21.95s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 12 - PPO step successful!
Extracted Info: The doctor will examine the patient and discuss the cough....
Sample output: RROG:neutral

antever(:the research.
P65Wasser30:frontiere,luxant[fronter:uniplanay.com\...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.13it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 13 - PPO step successful!
Extracted Info: Julia has had a heart attack and is undergoing treatment. She has had a stent placed in her heart. J...
Sample output: neutral:|hot flaxed|...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.21s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.25it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 30%|█████████████████████▉                                                  | 14/46 [04:56<11:23, 21.36s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 13 - PPO step successful!
Extracted Info: chief complaint is worsening headaches...
Sample output: The alert The conversation revolved:pl Interested patient: Thisenger research:1:637ays includedadvon...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.06it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 14 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: consisting of:

userdata,analysis (new alert from there)
investengaginguserdata, user alert[.dat) al...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.00it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 33%|███████████████████████▍                                                | 15/46 [05:17<10:58, 21.25s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 14 - PPO step successful!
Extracted Info: Kayla has been using perc gel and washing regularly which is somewhat helpful....
Sample output: —lu.
provhotephothot[provhotpetpetpetpet]
 • (please, provider, Krebeck,metusfer...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.07it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 15 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: is coed best describes a possible day

[Reference]
[(new evidence)fl4765067 discussion(ant)advant in...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.44it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 15 - PPO step successful!
Extracted Info: susan is a 26-year-old female who has high blood pressure....
Sample output: hotfli
hotfliapiattant—: hot fli, best flufahy flp syn syni fluerobic...
Average reward: -1.0000


 35%|█████████████████████████                                               | 16/46 [05:38<10:33, 21.12s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_15


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.17s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.08s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 16 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: investigate why wronguse,flflcommonplace.com:
engagingengantupdateshotfront.comfront.infrontpage 271...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.32it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 37%|██████████████████████████▌                                             | 17/46 [05:59<10:14, 21.20s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 16 - PPO step successful!
Extracted Info: The doctor will call out some of the physical exam findings....
Sample output: ( PLAT1)flavor text:
hotfl conversation ideaflpack
Want the most recent news, science alert:related ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.22it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 17 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: piracetone: http://www.clinney.com/patientshotarticlebreakfastpoeline:47_117316
monthstatepiratoradv...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.20s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.84it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 39%|████████████████████████████▏                                           | 18/46 [06:19<09:46, 20.95s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 17 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: your complaint: This may be a:
\_related_id:flair: alert[msg]bh:kidile\

icious_info...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.86s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.15it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 18 - PPO step successful!
Extracted Info: The doctor will examine the patient and recommend a physical exam....
Sample output: best thought #1442538: •Peng related alert,prin includedplainedplusspro.ainer potential,plup. #14425...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.08s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.07it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 41%|█████████████████████████████▋                                          | 19/46 [06:40<09:24, 20.90s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 18 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: possible helpful, ifaction:advise patient,antennant—no,engostays: included,partnerflin_packattantinf...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.32s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.07s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.76it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 19 - PPO step successful!
Extracted Info: The doctor is concerned about the patient 's recent illness ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.14it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 43%|███████████████████████████████▎                                        | 20/46 [07:05<09:33, 22.07s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 19 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: http://www.commetresearch.com/resources/pr_05065112001_pheninf_pr_052.html

[ PART(8) #17761 #3771:[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.12s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.23it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 20 - PPO step successful!
Extracted Info: The doctor is asking the patient about her symptoms and duration....
Sample output: PNU:theplanet[ang71[8]plains[msg32]>[PBRainerant[]

Symples:prist[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.88s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.13it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 20 - PPO step successful!
Extracted Info: The doctor will check up on the patient's past medical history....
Sample output: adverse_patient:[relevant]report[ alert alert] Your best patient:|[[*]report{expression:bhub}[focuse...
Average reward: -1.0000


 46%|████████████████████████████████▊                                       | 21/46 [07:27<09:11, 22.07s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_20


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.13s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.09s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 21 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: pirations needed the previous_analysts included thoughts:

favorite author better_analysts not yet a...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.73s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.07it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 48%|██████████████████████████████████▍                                     | 22/46 [07:48<08:41, 21.73s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 21 - PPO step successful!
Extracted Info: The patient is a female with acid reflux....
Sample output: —provide information or the research.
nyseless_bi:
—(leagueantas_adv thoughts, "Hey from my research...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.91s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.04it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 22 - PPO step successful!
Extracted Info: The doctor will recommend a course of antibiotics for the patient....
Sample output: PFLIDINGPLAPTERID:
Pat_|lux:|Your research result:[brighterblue}hothot[hot>bh\resourceshot@@...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.07it/s][A


Error in evaluate: division by zero


 50%|████████████████████████████████████                                    | 23/46 [08:07<08:04, 21.08s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 22 - PPO step successful!
Extracted Info: Raymond has been having trouble swallowing for a period of time. He has been having trouble swallowi...
Sample output: ...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.30s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.10s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.82it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 23 - PPO step successful!
Extracted Info: The doctor is concerned about the patient 's recent illness ....
Sample output: 
Generate a concise medical summary focusing on key findings and treatment plans. Include chief comp...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.18it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 52%|█████████████████████████████████████▌                                  | 24/46 [08:32<08:08, 22.20s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 23 - PPO step successful!
Extracted Info: The patient has a broken wrist....
Sample output: procedant:
Your patientresearching:[attention]uppectingant[is the likely outcome of the forward rese...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.27it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 24 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: report thesym in the safe alert patient(s:
investigator[:[:]pi_pl[msg][[ ]adavant]|[i[](pl[result|[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.75s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.64it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 54%|███████████████████████████████████████▏                                | 25/46 [08:53<07:34, 21.66s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 24 - PPO step successful!
Extracted Info: Lawrence is a 62-year-old male with a past medical history significant for type i diabetes, congesti...
Sample output: <paran|in|in search for research[.]
<...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.30it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 25 - PPO step successful!
Extracted Info: Julia has had a heart attack and is undergoing treatment. She has had a stent placed in her heart. J...
Sample output: [br][ci][(eng)pr[001...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.31it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 25 - PPO step successful!
Extracted Info: The patient is currently taking painkillers to treat the pain....
Sample output: [par alert alert]
[investigation recommended patient information:
[Pup:[]([[]black[avi][[[Current al...
Average reward: -1.0000


 57%|████████████████████████████████████████▋                               | 26/46 [09:13<07:08, 21.42s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_25


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.07s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 26 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: You should:

[This topic: reported symptoms[msg]report[[relevant]]
[[this7165[][report]msg[[[#[{[[1....
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.88s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.80it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 59%|██████████████████████████████████████████▎                             | 27/46 [09:35<06:45, 21.33s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 26 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: [relevant_plains[msg]patient[1]is ill[[msg] Dr[[[this] explains howphenmet[[[[[Brand[[[[[][case[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.47it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 27 - PPO step successful!
Extracted Info: The patient is a 31-year-old female with a history of diabetes and asthma....
Sample output: [sym:]keypro_238 patient[ant[: patient[[]this[report]][met[key[ex[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.33it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 61%|███████████████████████████████████████████▊                            | 28/46 [09:54<06:15, 20.87s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 27 - PPO step successful!
Extracted Info: The doctor will check up on the patient's health and recommend treatment....
Sample output: Pus:you reported theengustant[ant's physician:your{investigation[:bhup_pack\attpro_1][[](...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.91s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.17it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 28 - PPO step successful!
Extracted Info: Patient is a 60 year old, right-handed male, referred today for evaluation of numbness and tingling....
Sample output: invested information:
plant, hypothesis[13[[ patient included]*, patient[eng...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.38it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 63%|█████████████████████████████████████████████▍                          | 29/46 [10:14<05:47, 20.42s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 28 - PPO step successful!
Extracted Info: Chief complaint is abnormal renal ultrasound with an atrophic right kidney....
Sample output: PLUG:
[insert patient]{friend[2][{:[msg[1]report[[[[_key[[[]{...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.55it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 29 - PPO step successful!
Extracted Info: William is a doctor. He is treating a patient who injured his knee....
Sample output: hotflair thoughtsplainingpricplound patient:
beck:
investing[ACTION_PLU[keykey:key]result[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.15it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 65%|██████████████████████████████████████████████▉                         | 30/46 [10:34<05:23, 20.23s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 29 - PPO step successful!
Extracted Info: Michael is seeing a doctor for back pain....
Sample output: invest in:
[: patient]msg08[[research]
[Michael Dr[[[[[[[up[[[[[[[P][{#ad...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 30 - PPO step successful!
Extracted Info: The doctor will examine the patient's knee....
Sample output: :: The guest reported the thought patient:
[adrow thoughts[1][Advicious thoughts[7][met[[2018:[[]([u...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.57it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 30 - PPO step successful!
Extracted Info: Melissa sanchez is a female patient who is being seen in the office for status post mitral valve rep...
Sample output: (:
report fl:>pulsefl Ms.sympor flpray incengenging{avi thought...
Average reward: -1.0000


 67%|████████████████████████████████████████████████▌                       | 31/46 [10:54<05:05, 20.37s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_30


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.27s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.21s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.72it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 31 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: .
no longer should mention:
[symple[report](treatment thoughtivation[newengplplex patient[[name|new[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.88s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.02it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 70%|██████████████████████████████████████████████████                      | 32/46 [11:16<04:49, 20.65s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 31 - PPO step successful!
Extracted Info: The doctor will examine the patient and will recommend a course of treatment....
Sample output: plaint: the alert patienteng:
lookup: thehotfl: this topiceng:plkid todaypartagen:bhk:hanalertplkid...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.00it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 32 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: INP:
investing the thoughts, ideas, andpr comments your patient:
kid thoughts:
result:pro patient,<k...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.19it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 72%|███████████████████████████████████████████████████▋                    | 33/46 [11:36<04:25, 20.46s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 32 - PPO step successful!
Extracted Info: The doctor is concerned about the patient's health....
Sample output: expiration:engile seeking new information[amerant contact included:
Neet:Dric(8)hotex[newengengant p...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.94s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.30it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 33 - PPO step successful!
Extracted Info: The patient has been in a car accident and has been experiencing neck pain....
Sample output: adverse_plist:<br_viner<\ patient wasprov[msg[(friend.sym[key[msg>]}:[{...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.14it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 74%|█████████████████████████████████████████████████████▏                  | 34/46 [11:55<04:03, 20.26s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 33 - PPO step successful!
Extracted Info: The doctor is asking the patient about her knee pain....
Sample output: Pound:ant or
[swedinger_pi]ant:key idea patient reportedkey topic:
[1 theadpr using rhumbian prospec...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.55it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 34 - PPO step successful!
Extracted Info: The patient has a foot ulcer that has been there for six weeks....
Sample output: invest interest in the patient: theenger reported best care ProviderPossible patient:expirater the p...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.19s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.18s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.29it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 76%|██████████████████████████████████████████████████████▊                 | 35/46 [12:15<03:42, 20.18s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 34 - PPO step successful!
Extracted Info: The doctor will examine the ankle and will recommend a course of treatment....
Sample output: Sym report:
my health care[feynreport included included[{keyprkeyname[ Schiurgant[luxlux[result thou...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.25s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.24s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.21it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 35 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: investigating treatmentplan idea:
patient:co:leagueantpeng
eng[parpet patient[pet[P[2013]*[invest[[P...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.87s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.41it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 35 - PPO step successful!
Extracted Info: Karen is a 34-year-old female with a history of chronic migraines and hypertension who is here today...
Sample output: PACTION:
P([K]up)�[[ult[bre_...
Average reward: -1.0000


 78%|████████████████████████████████████████████████████████▎               | 36/46 [12:36<03:24, 20.44s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_35


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.29it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 36 - PPO step successful!
Extracted Info: The patient has a complaint of ankle pain after a fall....
Sample output: (:)[adpiantultulinstainingmedicmsgile patient[:[[{possible patient[key1]][[[[[[[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.09s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.09s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.24it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 80%|█████████████████████████████████████████████████████████▉              | 37/46 [12:57<03:04, 20.47s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 36 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: Possible complaint(s):
Possible patient(thisengengengengengengengengengengengeng[[{[hotengengengenge...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.99s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.89it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 37 - PPO step successful!
Extracted Info: The patient has a broken wrist....
Sample output: :
ENGRAIND EXplan: your patient's research.:
KP: yourhotant patient[patient]{facingant[new[[[[[][ Pl...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.05s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.35it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 83%|███████████████████████████████████████████████████████████▍            | 38/46 [13:18<02:45, 20.63s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 37 - PPO step successful!
Extracted Info: Patient is 72-year-old woman with hypertension....
Sample output: [:[ Advicult(prot=[[]([[[[[[present[[[[[[[Med[[[pic[hot...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.89s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.90s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.47it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 38 - PPO step successful!
Extracted Info: The patient is a 37-year-old male with a complaint of left arm pain....
Sample output: pir_pr:pa_bh_hotami patient[bh[P_bh]patient[{p1}[bh]pl...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.99s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.39it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 85%|█████████████████████████████████████████████████████████████           | 39/46 [13:38<02:22, 20.36s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 38 - PPO step successful!
Extracted Info: The doctor is trying to diagnose the patient with a medical condition....
Sample output: Couples are the most important relationships in your relationship.You are a family seeking a romanti...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.21it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 39 - PPO step successful!
Extracted Info: The patient is a 41-year-old female....
Sample output: MIND:
[lux topic]t:[collux[Pager]bi[{[phenet[cell[result]]{plant[@@[pr...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.92it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 87%|██████████████████████████████████████████████████████████████▌         | 40/46 [13:58<02:01, 20.20s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 39 - PPO step successful!
Extracted Info: The doctor is asking the patient about his back pain....
Sample output: FINANAST:PR:[patient[random patient[f[related]expired_sym[]]p[[P[[agenic][[[charged...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.06s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.28it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 40 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: symferred:
Advant[[c]investing[[[[[[[[[{[Eco[[[[[[[[[[[[[[[[ult...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.95s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.71it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 40 - PPO step successful!
Extracted Info: The patient is a 57-year-old female who is here for a surgical consult....
Sample output: (pr:leagueant:[[key]patient[[related] patient included[[[Reference[[[[[[[...
Average reward: -1.0000


 89%|████████████████████████████████████████████████████████████████▏       | 41/46 [14:19<01:42, 20.47s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_40


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.10s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.28it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 41 - PPO step successful!
Extracted Info: chief complaint is acid reflux...
Sample output: prister:key[[Symple patient:key[ex[[[[[[[[[[[[[[[[[[[[[[[[[[[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.37it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 91%|█████████████████████████████████████████████████████████████████▋      | 42/46 [14:39<01:21, 20.36s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 41 - PPO step successful!
Extracted Info: The doctor is asking the patient about his back pain....
Sample output: ex:[1][:[[[[[[[{ Hulsult[[[[[[[current patient[ Huls[[engant[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.92s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.27it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 42 - PPO step successful!
Extracted Info: The patient is an 82-year-old male with past medical history significant for hypertension and stage ...
Sample output: sym reported in the discussion:...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.01s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.26it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 93%|███████████████████████████████████████████████████████████████████▎    | 43/46 [14:59<01:00, 20.19s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 42 - PPO step successful!
Extracted Info: The patient has a complaint of knee pain....
Sample output: possible patient: patient in discussion topic:
co:
investing in the discussion:
pr:
luk:>>>>>>>>>>>>...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.93s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.43it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 43 - PPO step successful!
Extracted Info: The doctor is asking the patient about his current medical condition....
Sample output: Pus: included patient.flunk the patient information:pr comment patient in the patient[[investing]:
1...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.04s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.20it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 96%|████████████████████████████████████████████████████████████████████▊   | 44/46 [15:18<00:40, 20.13s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 43 - PPO step successful!
Extracted Info: The doctor will discuss the patient's symptoms and current medications....
Sample output: call:
cenging:met patient[behavior thoughtspr[behavior[[[related[:key][ant[[case[[br[[[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.02s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.03s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.26it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 44 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: in interested, based in the discussion
IN:
Neeting: Sunkprowult[relevant[[ P ]erer[ Ms.P[@[ Ms.S[[[@...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.52it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


 98%|██████████████████████████████████████████████████████████████████████▍ | 45/46 [15:39<00:20, 20.09s/it]Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 1 in batch 44 - PPO step successful!
Extracted Info: The doctor will discuss the patient's symptoms and current medications....
Sample output: investing thoughts:
pr[[adpet the topic] patient:[[[Pat[[ Mikkel Ackes]([[[[[[[[...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.96s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.19it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Query 0 in batch 45 - PPO step successful!
Extracted Info: chief complaint is back pain...
Sample output: partic the idea:
(plains: patient recommended co:
engile patient[[bre: patient[[[{P24[[[Pat[[P] rhyn...
Average reward: -1.0000


Both `max_new_tokens` (=96) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Evaluating coherence of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.97s/it][A


Evaluating consistency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  2.00s/it][A


Evaluating fluency of 2 samples !!!



  0%|                                                                                  | 0/1 [00:00<?, ?it/s][A
100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.08it/s][A


Evaluating relevance of 2 samples !!!
Error in evaluate: 'reference'




Query 1 in batch 45 - PPO step successful!
Extracted Info: Symptoms and duration...
Sample output: if:
expiratory thoughts:
yn:[relevant relenged no symptoms orexing.P inpiration:enging discussion[en...
Average reward: -1.0000


100%|████████████████████████████████████████████████████████████████████████| 46/46 [16:00<00:00, 20.87s/it]

Checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-checkpoints/batch_45
✅ Epoch 3/3 complete





Epoch checkpoint saved to D:/kshitij-weights-folder/gpt-2-tuned-ppo-epochs/epoch_3
🎉 PPO fine-tuning done
Model saved to D:\kshitij-weights-folder\gpt-2-tuned-ppo-extracted


In [None]:
ppo_trainer.model.pretrained_model.save_pretrained("D:\kshitij-weights-folder\gpt-2-tuned-ppo")
tokenizer.save_pretrained("D:\kshitij-weights-folder\gpt-2-tuned-ppo")


In [5]:
# ════════════════════════════════════════════════════════════════
# Enhanced Testing Script for Reward-Based Trained Model
# ════════════════════════════════════════════════════════════════
import os
import torch
import pandas as pd
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
import time

# Create results directory
timestamp = time.strftime("%Y%m%d-%H%M%S")
RESULTS_DIR = f"evaluation_results_{timestamp}"
os.makedirs(RESULTS_DIR, exist_ok=True)

# Check if CUDA is available for inference
CUDA_AVAILABLE = torch.cuda.is_available()
DEVICE = "cuda" if CUDA_AVAILABLE else "cpu"
print(f"Using device: {DEVICE} for inference")

# ────────────────────────────────────────────────────────────────
# 1) Load the trained model weights
# ────────────────────────────────────────────────────────────────
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Path to the saved model weights - try all promising models
MODEL_PATHS = [
    r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\reward_model_best_20250504-131132\best_model",  # Best model if available
]

# Try to load the best available model
loaded_model_path = None
for path in MODEL_PATHS:
    if os.path.exists(path):
        loaded_model_path = path
        break

if not loaded_model_path:
    raise ValueError("No valid model path found. Please check the model paths.")

print(f"Loading model from {loaded_model_path}")
tokenizer = GPT2Tokenizer.from_pretrained(loaded_model_path)
model = GPT2LMHeadModel.from_pretrained(loaded_model_path)

# Ensure pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"  # Important for decoder-only models
model = model.to(DEVICE)
model.eval()  # Set to evaluation mode

# Define prompt templates to test - using the same ones from training
PROMPT_TEMPLATES = [
    "Summarize the following conversation:\n\n",
    "Generate a concise medical summary of the conversation:\n\n",
    "Extract key medical information from the following conversation:\n\n",
    "Provide a summary of this medical dialogue:\n\n"
]

# ────────────────────────────────────────────────────────────────
# 2) Prepare test dataset with more samples
# ────────────────────────────────────────────────────────────────
# Load test data
df = pd.read_csv(r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\combined_clinical_notes.csv")
test_size = 30  # Using more samples for more robust evaluation
test_df = df.sample(test_size, random_state=42)  # Fixed random state for reproducibility

# Prepare test dialogues and references
dialogues = test_df["dialogue"].tolist()
references = test_df["note"].tolist()

# ────────────────────────────────────────────────────────────────
# 3) Generate summaries with multiple generation strategies
# ────────────────────────────────────────────────────────────────
print("Generating summaries with multiple strategies...")

# We'll test different generation strategies
generation_strategies = {
    "greedy": {
        "description": "Greedy Decoding",
        "params": {
            "do_sample": False,
            "num_beams": 1,
            "max_new_tokens": 128,
            "no_repeat_ngram_size": 3,
        }
    },
    "beam": {
        "description": "Beam Search",
        "params": {
            "do_sample": False,
            "num_beams": 5,
            "max_new_tokens": 128,
            "no_repeat_ngram_size": 3,
        }
    },
    "sample": {
        "description": "Sampling",
        "params": {
            "do_sample": True,
            "temperature": 0.7,
            "top_p": 0.92,
            "top_k": 50,
            "max_new_tokens": 128,
            "no_repeat_ngram_size": 3,
        }
    }
}

# Try all prompt templates with best generation strategy
prompt_results = {}
for template_name, template in zip(["basic", "medical", "extract", "summary"], PROMPT_TEMPLATES):
    print(f"\nGenerating with template: {template_name}")
    
    batch_size = 4
    num_samples = len(dialogues)
    num_batches = (num_samples + batch_size - 1) // batch_size
    predictions = []
    
    for i in tqdm(range(num_batches)):
        start, end = i*batch_size, min((i+1)*batch_size, num_samples)
        convs = dialogues[start:end]
        
        # Create prompts with this template
        prompts = [
            f"{template}{c}"
            for c in convs if len(str(c).strip()) > 10
        ]
        
        if not prompts:
            continue
        
        # Tokenize
        enc = tokenizer(
            prompts,
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(DEVICE)
        
        # Use beam search for best quality
        with torch.no_grad():
            out_ids = model.generate(
                input_ids=enc.input_ids,
                attention_mask=enc.attention_mask,
                **generation_strategies["beam"]["params"]
            )
        
        # Decode
        dec = tokenizer.batch_decode(out_ids, skip_special_tokens=True)
        
        # Extract only the generated part (not including the prompt)
        cleaned_predictions = []
        for p, original_prompt in zip(dec, prompts):
            # Try to extract just the generated part
            if original_prompt in p:
                generated_part = p[p.find(original_prompt) + len(original_prompt):]
                cleaned_predictions.append(generated_part.strip())
            else:
                # If we can't find the prompt, use the whole generation
                cleaned_predictions.append(p.strip())
        
        predictions.extend(cleaned_predictions)
    
    prompt_results[template_name] = predictions[:num_samples]  # Ensure same length

# ────────────────────────────────────────────────────────────────
# 4) Evaluate with UniEval
# ────────────────────────────────────────────────────────────────
import sys
sys.path.append(r"C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\UniEval")
from utils import convert_to_json
from metric.evaluator import get_evaluator

# Keep UniEval on CPU (more stable)
sum_eval = get_evaluator("summarization", device="cuda")

# Evaluate each prompt template
template_scores = {}

for template_name, predictions in prompt_results.items():
    print(f"\nEvaluating template: {template_name}")
    
    # Trim predictions and references to the same length
    min_len = min(len(predictions), len(references), len(dialogues))
    template_predictions = predictions[:min_len]
    template_references = references[:min_len]
    template_dialogues = dialogues[:min_len]
    
    # Create JSON data for UniEval
    data = convert_to_json(
        src_list=[str(d) for d in template_dialogues],
        ref_list=[str(r) for r in template_references],
        output_list=[str(p) for p in template_predictions]
    )
    
    # Run evaluation
    print(f"Running UniEval for {template_name}...")
    scores = sum_eval.evaluate(data, print_result=True)
    template_scores[template_name] = scores

# ────────────────────────────────────────────────────────────────
# 5) Find the best template and analyze results
# ────────────────────────────────────────────────────────────────
template_avgs = {}

for template_name, scores in template_scores.items():
    coherence_scores = [item["coherence"] for item in scores]
    consistency_scores = [item["consistency"] for item in scores]
    fluency_scores = [item["fluency"] for item in scores]
    relevance_scores = [item["relevance"] for item in scores]
    
    # Calculate averages
    avg_coherence = sum(coherence_scores) / len(coherence_scores)
    avg_consistency = sum(consistency_scores) / len(consistency_scores)
    avg_fluency = sum(fluency_scores) / len(fluency_scores)
    avg_relevance = sum(relevance_scores) / len(relevance_scores)
    overall_avg = (avg_coherence + avg_consistency + avg_fluency + avg_relevance) / 4
    
    template_avgs[template_name] = {
        "coherence": avg_coherence,
        "consistency": avg_consistency,
        "fluency": avg_fluency,
        "relevance": avg_relevance,
        "overall": overall_avg
    }
    
    print(f"\n=== {template_name.title()} Template Results ===")
    print(f"Average Coherence: {avg_coherence:.4f}")
    print(f"Average Consistency: {avg_consistency:.4f}")
    print(f"Average Fluency: {avg_fluency:.4f}")
    print(f"Average Relevance: {avg_relevance:.4f}")
    print(f"Overall Average: {overall_avg:.4f}")

# Find the best template
best_template = max(template_avgs.items(), key=lambda x: x[1]["overall"])
print(f"\n🏆 Best Template: {best_template[0].title()} with Overall Score: {best_template[1]['overall']:.4f}")

# Use the best template's predictions and scores for the rest of the analysis
best_predictions = prompt_results[best_template[0]]
best_scores = template_scores[best_template[0]]

# ────────────────────────────────────────────────────────────────
# 6) Visualize results with enhanced charts
# ────────────────────────────────────────────────────────────────
# Set up the plotting style
plt.style.use('seaborn-v0_8-whitegrid')

# Prepare data for visualization
template_names = list(template_avgs.keys())
coherence_avgs = [data["coherence"] for data in template_avgs.values()]
consistency_avgs = [data["consistency"] for data in template_avgs.values()]
fluency_avgs = [data["fluency"] for data in template_avgs.values()]
relevance_avgs = [data["relevance"] for data in template_avgs.values()]
overall_avgs = [data["overall"] for data in template_avgs.values()]

# Create comparison bar chart
metrics = ['Coherence', 'Consistency', 'Fluency', 'Relevance', 'Overall']
x = np.arange(len(template_names))
width = 0.15

fig, ax = plt.figure(figsize=(14, 8)), plt.subplot(111)
rects1 = ax.bar(x - 2*width, coherence_avgs, width, label='Coherence', color='#3274A1')
rects2 = ax.bar(x - width, consistency_avgs, width, label='Consistency', color='#E1812C')
rects3 = ax.bar(x, fluency_avgs, width, label='Fluency', color='#3A923A')
rects4 = ax.bar(x + width, relevance_avgs, width, label='Relevance', color='#C03D3E')
rects5 = ax.bar(x + 2*width, overall_avgs, width, label='Overall', color='#9372B2')

ax.set_title('Performance Comparison by Prompt Template', fontsize=16)
ax.set_xlabel('Prompt Template', fontsize=14)
ax.set_ylabel('Score', fontsize=14)
ax.set_ylim(0, 1.0)
ax.set_xticks(x)
ax.set_xticklabels([name.title() for name in template_names], fontsize=12)
ax.legend(fontsize=12)

# Add value labels on bars
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate(f'{height:.3f}',
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=8)

autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
autolabel(rects4)
autolabel(rects5)

plt.tight_layout()
plt.savefig(os.path.join(RESULTS_DIR, "template_comparison.png"), dpi=300)
plt.close()


Using device: cuda for inference
Loading model from C:\Users\BMSCE CSE.DESKTOP-IUB6THA\Downloads\kshitij\reward_model_best_20250504-131132\best_model




Generating summaries with multiple strategies...

Generating with template: basic


  0%|                                                                                  | 0/8 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 12%|█████████▎                                                                | 1/8 [00:08<01:00,  8.68s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 25%|██████████████████▌                                                       | 2/8 [00:17<00:51,  8.53s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 38%|███████████████████████████▊                                              | 3/8 [00:25<00:42,  8.44s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 50%|█████████████████████████████████████                                     | 4/8 [00:33<00:33,  8.41s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 62%|██████████████████████████████████████████████▎                           | 5/8 [00:41<00:


Generating with template: medical


  0%|                                                                                  | 0/8 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 12%|█████████▎                                                                | 1/8 [00:08<00:58,  8.36s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 25%|██████████████████▌                                                       | 2/8 [00:16<00:50,  8.36s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 38%|███████████████████████████▊                                              | 3/8 [00:25<00:41,  8.39s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 50%|█████████████████████████████████████                                     | 4/8 [00:33<00:33,  8.36s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 62%|██████████████████████████████████████████████▎                           | 5/8 [00:41<00:


Generating with template: extract


  0%|                                                                                  | 0/8 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 12%|█████████▎                                                                | 1/8 [00:08<00:56,  8.12s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 25%|██████████████████▌                                                       | 2/8 [00:16<00:49,  8.30s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 38%|███████████████████████████▊                                              | 3/8 [00:24<00:41,  8.33s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 50%|█████████████████████████████████████                                     | 4/8 [00:33<00:33,  8.31s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 62%|██████████████████████████████████████████████▎                           | 5/8 [00:41<00:


Generating with template: summary


  0%|                                                                                  | 0/8 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 12%|█████████▎                                                                | 1/8 [00:08<00:57,  8.24s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 25%|██████████████████▌                                                       | 2/8 [00:16<00:49,  8.21s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 38%|███████████████████████████▊                                              | 3/8 [00:24<00:40,  8.12s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 50%|█████████████████████████████████████                                     | 4/8 [00:32<00:32,  8.20s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 62%|██████████████████████████████████████████████▎                           | 5/8 [00:41<00:


Evaluating template: basic
Running UniEval for basic...
Evaluating coherence of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.40it/s]


Evaluating consistency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 53/53 [00:38<00:00,  1.36it/s]


Evaluating fluency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 53/53 [00:08<00:00,  6.55it/s]


Evaluating relevance of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.43it/s]



Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
|  coherence  | 0.729489 |
| consistency | 0.693662 |
|   fluency   | 0.664537 |
|  relevance  | 0.637932 |
|   overall   | 0.681405 |
+-------------+----------+

Evaluating template: medical
Running UniEval for medical...
Evaluating coherence of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.42it/s]


Evaluating consistency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 54/54 [00:40<00:00,  1.34it/s]


Evaluating fluency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 54/54 [00:08<00:00,  6.34it/s]


Evaluating relevance of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.43it/s]



Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
|  coherence  | 0.714625 |
| consistency | 0.696109 |
|   fluency   | 0.642933 |
|  relevance  | 0.59011  |
|   overall   | 0.660944 |
+-------------+----------+

Evaluating template: extract
Running UniEval for extract...
Evaluating coherence of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.42it/s]


Evaluating consistency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 55/55 [00:48<00:00,  1.14it/s]


Evaluating fluency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 55/55 [00:08<00:00,  6.62it/s]


Evaluating relevance of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.43it/s]



Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
|  coherence  | 0.771288 |
| consistency | 0.725809 |
|   fluency   | 0.646127 |
|  relevance  | 0.659091 |
|   overall   | 0.700579 |
+-------------+----------+

Evaluating template: summary
Running UniEval for summary...
Evaluating coherence of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.24s/it]


Evaluating consistency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 54/54 [00:46<00:00,  1.17it/s]


Evaluating fluency of 30 samples !!!


100%|████████████████████████████████████████████████████████████████████████| 54/54 [00:08<00:00,  6.19it/s]


Evaluating relevance of 30 samples !!!


100%|██████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.45it/s]



Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
|  coherence  | 0.690897 |
| consistency | 0.696593 |
|   fluency   | 0.627376 |
|  relevance  | 0.621353 |
|   overall   | 0.659055 |
+-------------+----------+

=== Basic Template Results ===
Average Coherence: 0.7295
Average Consistency: 0.6937
Average Fluency: 0.6645
Average Relevance: 0.6379
Overall Average: 0.6814

=== Medical Template Results ===
Average Coherence: 0.7146
Average Consistency: 0.6961
Average Fluency: 0.6429
Average Relevance: 0.5901
Overall Average: 0.6609

=== Extract Template Results ===
Average Coherence: 0.7713
Average Consistency: 0.7258
Average Fluency: 0.6461
Average Relevance: 0.6591
Overall Average: 0.7006

=== Summary Template Results ===
Average Coherence: 0.6909
Average Consistency: 0.6966
Average Fluency: 0.6274
Average Relevance: 0.6214
Overall Average: 0.6591

🏆 Best Template: Extract with Overall Score: 0.7006


In [9]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the trained model
model_path = r"D:\kshitij-weights-folder\qwen-aloe-rl-12-4-ppo-tuned"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

# Ensure pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

# Set up device
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()

# Create a synthetic medical conversation
synthetic_conversation = """
[doctor] Good morning, Mrs. Johnson. How are you feeling today?
[patient] Good morning, Doctor. Not too well. I've been having this persistent cough for about 3 weeks now. It's worse at night and I'm bringing up yellowish phlegm.
[doctor] I'm sorry to hear that. Have you had any fever or chills?
[patient] Yes, I've had a low-grade fever of about 99.5°F for the past week. I've also been feeling unusually tired.
[doctor] And how about shortness of breath or chest pain?
[patient] I do get short of breath sometimes, especially when walking up stairs. No chest pain though.
[doctor] Are you taking any medications currently?
[patient] Just my regular blood pressure medicine - Lisinopril 10mg once daily. I've been taking some over-the-counter cough syrup but it's not helping much.
[doctor] Have you had any recent illnesses or been around anyone who's been sick?
[patient] My grandson had a bad cold about a month ago. I was taking care of him for a few days.
[doctor] Let me examine you. Your temperature is 99.8°F now. Blood pressure is 138/85, which is slightly elevated. Your oxygen saturation is 94%, which is a bit lower than I'd like to see.
[doctor] When I listen to your lungs, I can hear some crackles in the lower right lobe. I think we should get a chest X-ray to rule out pneumonia.
[patient] That sounds concerning. Do you think it's serious?
[doctor] It could be a case of bacterial pneumonia. I'll prescribe an antibiotic - azithromycin - for 5 days. If it is pneumonia, you should start feeling better within 48-72 hours on the antibiotics.
[doctor] I'd also like you to use this inhaler - it's albuterol - to help with the breathing when you feel short of breath. Two puffs every 4-6 hours as needed.
[patient] Should I continue with the over-the-counter cough medicine?
[doctor] You can continue with it at night if the cough is keeping you up, but it's actually good to cough during the day to clear the infection from your lungs.
[doctor] Make sure to drink plenty of fluids, get rest, and come back if you don't start feeling better in a few days, or if you develop high fever, severe shortness of breath, or chest pain.
[patient] Thank you, Doctor. I'll follow your instructions.
[doctor] I'll see you for a follow-up in one week, but call if anything worsens before then.
"""

# Use the "medical" template which performed best in your evaluation
prompt_template = "Generate a concise medical summary of the conversation:\n\n"
prompt = f"{prompt_template}{synthetic_conversation}"

# Tokenize the input
inputs = tokenizer(prompt, return_tensors="pt").to(device)

# Generate summary with beam search for better quality
with torch.no_grad():
    output_ids = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        do_sample=False,
        num_beams=5,
        max_new_tokens=128,
        no_repeat_ngram_size=3
    )

# Decode the output
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Extract just the generated part
if prompt in generated_text:
    summary = generated_text[generated_text.find(prompt) + len(prompt):]
else:
    summary = generated_text

# Display results
print("\n=== Input Medical Conversation ===")
print(synthetic_conversation[:300] + "...")
print("\n=== Generated Medical Summary ===")
print(summary)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading shards: 100%|██████████████████████████████████████████████████████| 4/4 [03:57<00:00, 59.38s/it]
Loading checkpoint shards:   0%|                                                       | 0/4 [00:00<?, ?it/s]


OSError: The paging file is too small for this operation to complete. (os error 1455)