In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
!pip install trl==0.7.4
!pip install datasets
!pip install transformers==4.38.2
!pip install peft==0.10.0
!pip install accelerate==0.28.0
!pip install bitsandbytes

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
^C
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [None]:
!pip install --upgrade transformers

In [None]:
!pip install trl==0.4.7 transformers==4.29.0

In [None]:
!pip install --upgrade peft
!pip install --upgrade trl

In [None]:
!pip install trl==0.11.3

In [1]:
import random

import numpy as np
import torch
import pandas as pd

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    default_data_collator,
)

def set_seed(seed_val=42):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

#Configuration options
train_batch_size = 16
gradient_accumulation_steps = 1
learning_rate = 1e-5
eval_batch_size = 1
eval_steps = 500
max_input_length = 550
save_steps = 1000
num_train_epochs = 20
random.seed(42)




  from .autonotebook import tqdm as notebook_tqdm


## Creating the policy model for human Evaluation

In [3]:
import pandas as pd

splits = {'train': 'data/train-00000-of-00001-e8c59e5cf7bce1c0.parquet', 'test': 'data/test-00000-of-00001-59ffb27399371eac.parquet', 'valid': 'data/valid-00000-of-00001-0e33e6bd86e3edc9.parquet'}

In [5]:
 df = pd.read_parquet("hf://datasets/CarperAI/openai_summarize_tldr/" + splits["test"])

In [7]:
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, TaskType
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)

In [73]:
import json

import pandas as pd
import torch
from datasets import load_dataset
from torch.utils.data import Dataset

# 1) 4-bit quant config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

# 2) Load base model in 4-bit
base_model = AutoModelForCausalLM.from_pretrained(
    "HPAI-BSC/Qwen2.5-Aloe-Beta-7B",
    quantization_config=bnb_config,
    device_map="auto",
)

# tokenizer = AutoTokenizer.from_pretrained("HPAI-BSC/Qwen2.5-Aloe-Beta-7B")
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = "left"

# 3) Apply LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], 
)
model = get_peft_model(base_model, lora_config)

# 4) Enable gradient checkpointing
model.enable_input_require_grads()
model.gradient_checkpointing_enable()
base_model.config.use_cache = False

class TLDRDataset(Dataset):
    def __init__(self, train_path, tokenizer, split, max_length=256):
        self.post_list = []
        dataset = (pd.read_parquet(train_path))[:1000]
        self.labels = []

        for sample in dataset.iterrows():
            self.post_list.append(sample[1]["prompt"])
            self.labels.append(sample[1]["label"])

        self.tokenizer = tokenizer
        self.max_length = max_length
        self.input_ids = []
        self.attn_masks = []

    def __len__(self):
        return len(self.post_list)

    def __getitem__(self, idx):
        txt = self.post_list[idx]
        label = self.labels[idx]

        # encodings_dict = self.tokenizer(txt, truncation=True, max_length=self.max_length, padding="max_length")
        # encodings_dict_label = self.tokenizer(label,truncation=True, max_length=self.max_length, padding="max_length")
        # input_ids = torch.tensor(encodings_dict["input_ids"])
        # attn_masks = torch.tensor(encodings_dict["attention_mask"])
        # labels_ids = torch.tensor(encodings_dict_label["input_ids"])
        # return {
        #     "input_ids": input_ids,
        #     "attention_mask": attn_masks,
        #     "labels": labels_ids,
        # }

        encodings = self.tokenizer(
            txt,
            truncation=True,
            max_length=self.max_length,
            padding="max_length",
            return_tensors="pt",
        )
        input_ids = encodings["input_ids"].squeeze()
        attention_mask = encodings["attention_mask"].squeeze()

        labels = input_ids.clone()
        labels[labels == self.tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels,
        }



Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.87s/it]


In [None]:
# Check if all model parameters require gradients
for param in model.parameters():
    print(param.requires_grad)

In [53]:
tokenizer = AutoTokenizer.from_pretrained("HPAI-BSC/Qwen2.5-Aloe-Beta-7B")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
model.resize_token_embeddings(len(tokenizer))
tokenizer.pad_token_id = tokenizer.eos_token_id
model.config.end_token_id = tokenizer.eos_token_id
model.config.pad_token_id = model.config.eos_token_id

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [75]:
# Set up the datasets
data_path = "hf://datasets/CarperAI/openai_summarize_tldr/" + splits["train"]
train_dataset = TLDRDataset(
    data_path,
    tokenizer,
    "train",
    max_length=256,
)


In [76]:
for i in train_dataset:
    print(i["input_ids"], i["labels"])
    break

tensor([29038,   787,  4103,   952,    25,   435,    14, 85824,   198, 50328,
           25,   358,   320,    69,    14,    17,    17,     8,   614,   311,
         7071,   700,   421,   358,  1366,   311,  2058,  1414,  1493,  7571,
          476,   537,   323,  1035, 12213,   311,  5112, 67092,   198,  2946,
           25,  2806,  2704,   421,   419, 17180,  1588,   714,   432,   594,
         5802,   264,  1430,    13,  4710,  3707, 26485,   510,  4498,   358,
          320,    69,    14,    17,    17,     8,  3937,  1526,   847,  1156,
         1931, 84498,   220,    17,  1635,  4134,  1576,   566,  4362,  3550,
         1283,   264,  1042,   315,  4924,   926,   437,   220,   432, 88489,
          752,   803,  1091,   358,  3381,    13,  1084,   572,   264, 27102,
          882,   304,   847,  2272,  4152,   311,  5382,   448,   847,  6554,
          323,  5499,  3432,   279,  6012,   311,  3931,  1059,   700,   315,
          847,  2272,    13,   358,   646, 16698,  1576,   315, 

In [17]:
torch.cuda.set_device(0)

In [65]:
output_dir = "./aloe-qwen-rl-trial-run"

In [81]:
# Prepare the trainer and start training
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=learning_rate,
    per_device_train_batch_size=train_batch_size,
    num_train_epochs=2,
    warmup_steps=100,
    gradient_accumulation_steps=8,      # accumulate to compensate for small batch
    # evaluation_strategy="epoch",       # you can skip or reduce evaluation
    fp16=True,
    logging_steps=50,
    save_strategy="epoch",
    save_total_limit=2,
    dataloader_pin_memory=True,
    dataloader_drop_last=True,
    gradient_checkpointing=True,
)

In [69]:
training_args.device.index

0

In [83]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
#     compute_metrics=compute_metrics,
#     data_collator=default_data_collator,
#     preprocess_logits_for_metrics=preprocess_logits_for_metrics
    tokenizer=tokenizer,
)
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
  return fn(*args, **kwargs)


Step,Training Loss


  return fn(*args, **kwargs)


TrainOutput(global_step=14, training_loss=2.557460512433733, metrics={'train_runtime': 278.8468, 'train_samples_per_second': 7.172, 'train_steps_per_second': 0.05, 'total_flos': 1.9475853943504896e+16, 'train_loss': 2.557460512433733, 'epoch': 1.81})

In [85]:
trainer.save_model("aloe-qwen-rl-trial-run/")   ##path to save policy model



In [None]:
import shutil
import os

source_dirs = ["/content/drive/MyDrive/BMS/Medical Dialogue Summarization using PPO/summarization_policy_new", "/content/wandb", "/content/Output"]
destination = "/content/drive/MyDrive/BMS/Medical Dialogue Summarization using PPO"

os.makedirs(destination, exist_ok=True)

# Copy each directory to the destination
for src in source_dirs:
    if os.path.exists(src):
        dest_path = os.path.join(destination, os.path.basename(src))
        shutil.copytree(src, dest_path, dirs_exist_ok=True)  # Copy with merging existing directories
        print(f"Copied {src} to {dest_path}")
    else:
        print(f"Skipping {src}, does not exist.")

print("Copy operation completed.")


Copied /content/summarization_policy_new to /content/drive/MyDrive/Medical Dialogue Summarization using PPO/summarization_policy_new
Copied /content/wandb to /content/drive/MyDrive/Medical Dialogue Summarization using PPO/wandb
Copied /content/Output to /content/drive/MyDrive/Medical Dialogue Summarization using PPO/Output
Copy operation completed.


In [87]:
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM

# model = AutoModelForCausalLM.from_pretrained("aloe-qwen-rl-trial-run/")
model_path = "HPAI-BSC/Qwen2.5-Aloe-Beta-7B"

tokenizer = AutoTokenizer.from_pretrained(model_path, truncation=True, max_length=256, padding="max_length")
text = df.iloc[2]["prompt"]
tokenized_text = tokenizer(text, return_tensors="pt", max_length=256)

tokenized_text

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


{'input_ids': tensor([[29038,   787,  4103,   952,    25,   435,    14, 85824,   198, 50328,
            25,   576,  3743,   508,    17,    21,   434,    60,   358,   508,
            17,    17,   386,    60,   614,  1012,  9120,   369,   264,  2254,
          3207,   944,  5889,   311,   752,   518,   678, 13671,  1393, 20704,
           700,   448,   264,  4238,   508,    93,    18,    15,    30,   386,
         26126,  2946,    25,  2932,  5221, 17478,  2473,  1393,   518,  1059,
          3753,    11,   714,   358,  1467,   291,  1059,   220,    18,  3039,
         13671,    11,   220,    19,    12,    20,  4115, 10747,    13,  2932,
          3207,   944,  1618,   752,  3080,  4124,   419,  6556,   323,  2115,
           264,  4069, 86283,   429,  1340,   572, 13028,   678,  1899,   448,
           264,  4238,   879,  8542,   705,   700,   315,   279,  6303,   382,
            40,  5485,   429,  1340,  8454,   264,  6802,   315,   279,  1378,
           315,  1105,   700,   315,  

## Traning the reward function

In [1]:
import torch
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling
from trl import RewardTrainer, SFTTrainer
from datasets import Dataset
import json
import pandas as pd
from transformers import Trainer, TrainingArguments


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
##model path
MODEL_PATH = "HPAI-BSC/Qwen2.5-Aloe-Beta-7B"


splits = {'train': 'data/train-00000-of-00001-3cbd295cedeecf91.parquet', 'test': 'data/test-00000-of-00001-0845e2eec675b16a.parquet', 'valid1': 'data/valid1-00000-of-00001-b647616a2be5f333.parquet', 'valid2': 'data/valid2-00000-of-00001-2655c5b3621b6116.parquet'}
DATA_PATH = "hf://datasets/CarperAI/openai_summarize_comparisons/" + splits["test"]

In [5]:
df = pd.read_parquet(DATA_PATH)
df = df[:10]

In [7]:
##defininig the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.37s/it]


In [9]:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
def formatting_func(examples):
    kwargs = {"padding": "max_length",
              "truncation": True,
              "max_length": 256,
              "return_tensors": "pt"
              }

    # Prepend the prompt and a line break to the original_response and response-1 fields.
    prompt_plus_chosen_response = examples["prompt"] + "\n" + examples["chosen"]
    prompt_plus_rejected_response = examples["prompt"] + "\n" + examples["rejected"]

    # Then tokenize these modified fields.
    tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
    tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)

    return {
        "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
        "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
    }

In [11]:
raw_dataset = Dataset.from_pandas(df)
formatted_dataset = raw_dataset.map(formatting_func)
formatted_dataset = formatted_dataset.train_test_split()
raw_dataset

Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 493.24 examples/s]


Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 10
})

In [13]:
model.config

Qwen2Config {
  "_name_or_path": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B",
  "architectures": [
    "Qwen2ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151644,
  "eos_token_id": 151645,
  "hidden_act": "silu",
  "hidden_size": 3584,
  "initializer_range": 0.02,
  "intermediate_size": 18944,
  "max_position_embeddings": 131072,
  "max_window_layers": 28,
  "model_type": "qwen2",
  "num_attention_heads": 28,
  "num_hidden_layers": 28,
  "num_key_value_heads": 4,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.38.2",
  "use_cache": false,
  "use_mrope": false,
  "use_sliding_window": false,
  "vocab_size": 152064
}

In [None]:
# ### Loading the TRL reward trainer and training the trainer
# training_args = TrainingArguments(
#         output_dir="rm_checkpoint/",
#         num_train_epochs=1,
#         logging_steps=10,
#         gradient_accumulation_steps=1,
#         save_strategy="steps",
#         evaluation_strategy="steps",
#         per_device_train_batch_size=2,
#         per_device_eval_batch_size=1,
#         eval_accumulation_steps=1,
#         eval_steps=500,
#         save_steps=500,
#         warmup_steps=100,
#         logging_dir="./logs",
#         learning_rate=1e-5,
#         save_total_limit=1,
#         no_cuda=True,
#     )



In [None]:
# trainer = RewardTrainer(model=model,
#                         tokenizer=tokenizer,
#                         train_dataset=formatted_dataset['train'],
#                         eval_dataset=formatted_dataset['test'],
#                         args= training_args,
#                         )
# trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss


TrainOutput(global_step=4, training_loss=0.7631033062934875, metrics={'train_runtime': 52.1582, 'train_samples_per_second': 0.134, 'train_steps_per_second': 0.077, 'total_flos': 0.0, 'train_loss': 0.7631033062934875, 'epoch': 1.0})

In [None]:
trainer.save_model("rm_model/")

In [None]:
import shutil
import os

source_dirs = ["/content/rm_model", "/content/rm_checkpoint"]
destination = "/content/drive/MyDrive/Medical Dialogue Summarization using PPO"

os.makedirs(destination, exist_ok=True)

# Copy each directory to the destination
for src in source_dirs:
    if os.path.exists(src):
        dest_path = os.path.join(destination, os.path.basename(src))
        shutil.copytree(src, dest_path, dirs_exist_ok=True)  # Copy with merging existing directories
        print(f"Copied {src} to {dest_path}")
    else:
        print(f"Skipping {src}, does not exist.")

print("Copy operation completed.")


Copied /content/rm_model to /content/drive/MyDrive/Medical Dialogue Summarization using PPO/rm_model
Copied /content/rm_checkpoint to /content/drive/MyDrive/Medical Dialogue Summarization using PPO/rm_checkpoint
Copy operation completed.


In [None]:
## inference the model
rm_model = AutoModelForCausalLM.from_pretrained("rm_model/")
tokenizer = AutoTokenizer.from_pretrained("rm_model/")

In [None]:
def get_score(model, tokenizer, prompt, response):

    instructions = tokenizer.encode_plus(prompt,
                                       response,
                                       padding="max_length",
                                       max_length=256,
                                       return_tensors="pt",
                                        truncation=True)
    with torch.no_grad():
        outputs = model(**instructions)

    logits = outputs[0]

    return logits


In [None]:
# usage with prompt
prompt = df.iloc[0]["prompt"]
example_prefered_response = df.iloc[0]["chosen"]
example_unprefered_response = df.iloc[0]["rejected"]

In [None]:
loss1 = get_score(model, tokenizer, prompt, example_prefered_response)
loss2= get_score(model, tokenizer, prompt, example_unprefered_response)

In [None]:
from torch import nn
loss = -nn.functional.logsigmoid(loss1 - loss2).mean()

# Policy Model

In [1]:
import torch
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling
from trl import RewardTrainer
from datasets import Dataset
import json
import pandas as pd
from transformers import Trainer, TrainingArguments
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model

  from .autonotebook import tqdm as notebook_tqdm


In [35]:
##model path
# MODEL_PATH = "C:\\Users\\BMSCE CSE.DESKTOP-IUB6THA\\Downloads\\kshitij\\aloe_qwen_aci-bench-peft-old"

splits = {'train': 'data/train-00000-of-00001-3cbd295cedeecf91.parquet', 'test': 'data/test-00000-of-00001-0845e2eec675b16a.parquet', 'valid1': 'data/valid1-00000-of-00001-b647616a2be5f333.parquet', 'valid2': 'data/valid2-00000-of-00001-2655c5b3621b6116.parquet'}
DATA_PATH = "hf://datasets/CarperAI/openai_summarize_comparisons/" + splits["test"]

In [37]:
df = pd.read_parquet(DATA_PATH)
df = df[:1000]
dataset = Dataset.from_pandas(df)
dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 1000
})

In [29]:
sentiment_pipe_kwargs = {"top_k": None, "function_to_apply": "none"}

config = PPOConfig(
    steps=51200, learning_rate=1.41e-5, remove_unused_columns=True
)

txt_in_len = 5
txt_out_len = 20
seed = 1

In [9]:
from transformers import AutoTokenizer, pipeline

In [39]:
dataset = dataset.rename_columns({"prompt": "review"})
dataset = dataset.filter(lambda x: len(x["review"]) > 500, batched=False)
dataset = dataset.map(lambda x: {"review": x["review"][:1000]}, batched=False)

Filter: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 165201.62 examples/s]
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 16350.34 examples/s]


In [None]:
# !pip install -U transformers

In [13]:
# tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

NameError: name 'tokenizer' is not defined

In [51]:
tokenizer = AutoTokenizer.from_pretrained("aloe-qwen-rl-trial-run/")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [53]:
txt_in_len = 5
txt_out_len = 32
seed = 1

dataset = dataset.map(
    lambda x: {"input_ids": tokenizer.encode(" " + x["chosen"], return_tensors="pt", truncation=True, padding="max_length", max_length=32)[0]},
    batched=False,
)
dataset = dataset.map(lambda x: {"query": tokenizer.decode(x["input_ids"])}, batched=False)
dataset = dataset[:20480]
from datasets import Dataset

dataset = Dataset.from_dict(dataset)
dataset.set_format("pytorch")

Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 5682.12 examples/s]
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 5882.64 examples/s]


In [31]:
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# rf_model_path = "/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rm_model"
starcoder_model = AutoModelForCausalLMWithValueHead.from_pretrained("aloe-qwen-rl-trial-run/")  ##policy model from step 1
starcoder_model = starcoder_model.to(device)
# starcoder_model_ref = AutoModelForCausalLMWithValueHead.from_pretrained(rf_model_path) ## reward model from step 2
# starcoder_model_ref = starcoder_model_ref.to(device)
starcoder_tokenizer = AutoTokenizer.from_pretrained("HPAI-BSC/Qwen2.5-Aloe-Beta-7B") ## tokenizer of step 1 model., here since we are using same model for step 1 and 2 it doesnot matter
starcoder_tokenizer.add_special_tokens({'pad_token': '[PAD]'})

cuda


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.33s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


1

In [47]:
dataset

Dataset({
    features: ['review', 'chosen', 'rejected'],
    num_rows: 1000
})

In [None]:
starcoder_model

AutoModelForCausalLMWithValueHead(
  (pretrained_model): GPTBigCodeForCausalLM(
    (transformer): GPTBigCodeModel(
      (wte): Embedding(49152, 768)
      (wpe): Embedding(8192, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-19): 20 x GPTBigCodeBlock(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPTBigCodeSdpaAttention(
            (c_attn): Linear(in_features=768, out_features=896, bias=True)
            (c_proj): Linear(in_features=768, out_features=768, bias=True)
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPTBigCodeMLP(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
            (act): PytorchGELUTanh()
            (dropout): Dr

In [None]:
starcoder_model_ref

AutoModelForCausalLMWithValueHead(
  (pretrained_model): GPTBigCodeForCausalLM(
    (transformer): GPTBigCodeModel(
      (wte): Embedding(49152, 768)
      (wpe): Embedding(8192, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-19): 20 x GPTBigCodeBlock(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPTBigCodeSdpaAttention(
            (c_attn): Linear(in_features=768, out_features=896, bias=True)
            (c_proj): Linear(in_features=768, out_features=768, bias=True)
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPTBigCodeMLP(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
            (act): PytorchGELUTanh()
            (dropout): Dr

In [55]:
import torch
optimizer = torch.optim.SGD(starcoder_model.parameters(), lr=config.learning_rate)
ppo_trainer = PPOTrainer(config, starcoder_model, starcoder_model, starcoder_tokenizer, dataset=dataset, data_collator=collator, optimizer=optimizer)

In [None]:
# for i in ppo_trainer.dataloader:
#   print(i)
#   break

In [None]:
ctrl_str = ["[negative]", "[positive]"]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # this should be handled by accelerate
ctrl_tokens = dict((s, starcoder_tokenizer.encode(s, return_tensors="pt").squeeze().to(device)) for s in ctrl_str)


In [None]:
def pos_logit_to_reward(logit, task):
    """
    Take the positive sentiment logit and scale it for the task.
        task [negative]: reward = -logit
        task [neutral]: reward = -2*abs(logit)+4
        task [positive]: reward = logit
    """
    for i in range(len(logit)):
        if task[i] == "[negative]":
            logit[i] = -logit[i]
        elif task[i] == "[positive]":
            pass
        else:
            raise ValueError("task has to be in [0, 1, 2]!")
    return logit

In [None]:
pos_logit_to_reward(torch.Tensor([4, 4]), ctrl_str)

tensor([-4.,  4.])

In [None]:
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": starcoder_tokenizer.eos_token_id,
    "max_new_tokens": 32,
    "eos_token_id": -1,
}


In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [23]:
from prettytable import PrettyTable

def convert_to_json(output_list, src_list=None, ref_list=None, context_list=None, \
            scores=None, doc_id=None, system_id=None):
    """
        Convert the data into the json format.

        output_list: a list of model output
        src_list: source input for different NLG tasks. For example, source document for summarization
                  and dialogue history for dialogue response generation
        ref_list: human-annotated groundtruth
        context_list: the context needed to evaluate several specific dimension. For example,
                      additional factual information when evaluating engagingness and groundedness in dialogues
        scores: human scores for evaluating the model output. They can be used to calculate the correlation
                between evaluators and human judgements. The scores should be stored in a dictionary. For example,
                {'fluency': 2.0, 'coherence': 3.0} could be the human score for a sample.
        doc_id: the index of the input source. It can be used to calculate summary-level correlation for summarzation
        system_id: the index of the generation system. It can be used to calculate system-level correlation.
    """
    json_data = []
    for i in range(len(output_list)):
        cur = {}
        cur['system_output'] = output_list[i]
        if src_list is not None:
            cur['source'] = src_list[i]
        if ref_list is not None:
            cur['reference'] = ref_list[i]
        if context_list is not None:
            cur['context'] = context_list[i]
        if scores is not None:
            cur['scores'] = scores[i]
        if doc_id is not None:
            cur['doc_id'] = doc_id[i]
        if system_id is not None:
            cur['system_id'] = system_id[i]
        json_data.append(cur)
    return json_data


def add_question(dimension, output, src=None, ref=None, context=None, task=None):
    """
        Add questions to generate input in Bool-QA format for UniEval.

        dimension: specific dimension to be evaluated
        src: source input for different NLG tasks. For example, source document for summarization
             and dialogue history for dialogue response generation.
        output: output text generated by the models
        ref: human-annotataed groundtruth
        context: the context needed to evaluate several specific dimension. For example,
                 additional factual information when evaluating engagingness and groundedness in dialogues.
    """

    input_with_question = []
    for i in range(len(output)):
        # For summarization
        if task == 'summarization':
            if dimension == 'fluency':
                cur_input = 'question: Is this a fluent paragraph? </s> paragraph: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent summary to the document? </s> summary: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            elif dimension == 'relevance':
                cur_input = 'question: Is this summary relevant to the reference? </s> summary: ' + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For dialogues
        elif task == 'dialogue':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a natural response in the dialogue? </s> response: ' + output[i]
            elif dimension == 'coherence':
                cur_input = 'question: Is this a coherent response given the dialogue history? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i]
            elif dimension == 'engagingness':
                cur_input = 'question: Is this an engaging and informative response according to the dialogue history and fact? </s> response: '\
                            + output[i] + ' </s> dialogue history: ' + src[i] + ' </s> fact: ' + context[i]
            elif dimension == 'groundedness':
                cur_input = 'question: Is this response consistent with knowledge in the fact? </s> response: '\
                            + output[i] + ' </s> fact: ' + context[i]
            elif dimension == 'understandability':
                cur_input = 'question: Is this an understandable response in the dialogue? </s> response: ' + output[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For data-to-text
        elif task == 'data2text':
            if dimension == 'naturalness':
                cur_input = 'question: Is this a fluent utterance? </s> utterance: ' + output[i]
            elif dimension == 'informativeness':
                cur_input = 'question: Is this sentence informative according to the reference? </s> sentence: '\
                            + output[i] + ' </s> reference: ' + ref[i]
            else:
                raise NotImplementedError('The input format for this dimension is still undefined. Please customize it first.')
        # For factual consistency detection
        elif task == 'fact':
            if dimension == 'consistency':
                cur_input = 'question: Is this claim consistent with the document? </s> claim: ' + output[i] + ' </s> document: ' + src[i]
            else:
                raise NotImplementedError('No other dimensions for the factual consistency detection task.')
        # For new customized tasks
        else:
            raise NotImplementedError('Other tasks are not implemented, please customize specific tasks here.')
        input_with_question.append(cur_input)
    return input_with_question


def print_scores(scores):
    table = PrettyTable(['Dimensions','Score'])
    print('\nEvaluation scores are shown below:')
    dims = list(scores[0].keys())
    for dim in dims:
        cur_score = 0
        for i in range(len(scores)):
            cur_score += scores[i][dim]
        table.add_row([dim, round(cur_score / len(scores), 6)])
    print(table)

In [25]:
import numpy as np
from nltk import sent_tokenize
from scorer import UniEvaluator  # Make sure this import works after placing scorer.py in the same directory

def evaluate(data, dims=None, overall=True, print_result=False, model_name_or_path="t5-small", task='summarization', device='cuda:0'):
    """
    Get the scores of all the given dimensions (fluency, consistency, coherence, relevance)

    data: A list of dictionaries, where each dictionary contains:
          - 'source': The original text
          - 'system_output': The generated system output (summary)
          - 'reference' (optional): Reference summary for relevance evaluation

    dims: A list of dimensions to be evaluated. If dims is None, it evaluates four default dimensions:
          coherence, consistency, fluency, relevance.

    overall: Boolean to indicate whether the overall score is calculated as the average of all dimensions.

    print_result: Boolean to print the results on the screen.

    model_name_or_path: The model name or path to use for evaluation, e.g., 't5-small'

    task: The task type (used in scoring if needed, like summarization or other NLP tasks).

    device: The device to use for evaluation ('cpu' or 'cuda:0').
    """

    # Instantiate the scorer
    scorer = UniEvaluator(model_name_or_path=model_name_or_path, device=device)

    n_data = len(data)
    eval_scores = [{} for _ in range(n_data)]

    # Default dimensions if not provided
    if dims is None:
        dims = ['coherence', 'consistency', 'fluency']   #add relevance

    for dim in dims:
        print(f'Evaluating {dim} of {n_data} samples !!!')

        if dim == 'consistency' or dim == 'fluency':
            # Sentence-level scores for consistency and fluency
            src_list, output_list = [], []
            n_sents = []  # number of sentences in each summary

            for i in range(n_data):
                if dim == 'consistency':
                    source = data[i]['source']
                else:
                    source = ''
                system_outputs = sent_tokenize(data[i]['system_output'])
                n_sents.append(len(system_outputs))
                for j in range(len(system_outputs)):
                    src_list.append(source)
                    output_list.append(system_outputs[j])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, task=task)
            sent_score = scorer.score(input_list)

            # Calculate average sentence-level scores for each sample
            start_idx = 0
            score = []
            for cur_n_sent in n_sents:
                score.append(sum(sent_score[start_idx:start_idx + cur_n_sent]) / cur_n_sent)
                start_idx += cur_n_sent

        elif dim == 'coherence' or dim == 'relevance':
            # Summary-level scores for coherence and relevance
            src_list, output_list, ref_list = [], [], []

            for i in range(n_data):
                src_list.append(data[i]['source'])
                output_list.append(data[i]['system_output'])
                if dim == 'relevance':
                    ref_list.append(data[i]['reference'])

            input_list = add_question(dimension=dim, output=output_list, src=src_list, ref=ref_list, task=task)
            score = scorer.score(input_list)

        else:
            raise NotImplementedError(f"The input format for the dimension '{dim}' is still undefined. Please customize it.")

        # Store the scores for the current dimension
        for i in range(n_data):
            eval_scores[i][dim] = score[i]

    # Calculate overall score (average of all evaluated dimensions)
    if overall:
        for i in range(n_data):
            eval_scores[i]['overall'] = np.mean([eval_scores[i][dim] for dim in dims])

    # Print the result if requested
    if print_result:
        print_scores(eval_scores)

    # Calculate average score across all the dimensions except 'overall'
    avg_score = []
    for i in range(n_data):
        # Exclude 'overall' from the averaging
        dimensions = [dim for dim in dims if dim != 'overall']
        avg_score.append(np.mean([eval_scores[i][dim] for dim in dimensions]))

    return avg_score


In [27]:
data = [
    {
        'source': "Doctor: Hello, how are you feeling today?\nPatient: I've been feeling a bit tired and dizzy.\nDoctor: How long has this been happening?\nPatient: For about a week now. I also have trouble sleeping.\nDoctor: I see. Have you been under a lot of stress lately?\nPatient: Yes, work has been quite stressful.\nDoctor: That could be contributing. Let’s do some tests to rule out other issues.",
        'system_output': "Patient reports tiredness, dizziness, and difficulty sleeping for a week. Work-related stress may be a factor. Doctor will conduct tests to check for other problems."
    },
    {
        'source': "Doctor: What brings you in today?\nPatient: I’ve been having some chest pain and shortness of breath.\nDoctor: How severe is the pain?\nPatient: It’s sharp, and it comes and goes.\nDoctor: When did it start?\nPatient: It started two days ago.\nDoctor: Any history of heart problems?\nPatient: Yes, my father had heart disease.\nDoctor: We’ll need to do an ECG and some blood tests to check your heart health.",
        'system_output': "Patient has sharp chest pain and shortness of breath for two days. Family history of heart disease. Doctor will perform an ECG and blood tests to assess heart health."
    },
    {
        'source': "Doctor: How are you feeling today?\nPatient: I’ve had a sore throat and a cough for the past few days.\nDoctor: Any fever or difficulty swallowing?\nPatient: Yes, I’ve had a low fever, but swallowing is fine.\nDoctor: Any history of allergies or similar symptoms?\nPatient: Not really.\nDoctor: It could be a viral infection. I recommend rest, fluids, and maybe some over-the-counter medicine.",
        'system_output': "Patient reports sore throat, cough, and a low fever. Doctor advises rest, fluids, and over-the-counter medication as the symptoms suggest a viral infection."
    },
    {
        'source': "Doctor: What’s bothering you today?\nPatient: I’ve been experiencing frequent headaches and some nausea.\nDoctor: How often do you get the headaches?\nPatient: It’s been almost every day for the past week.\nDoctor: Any other symptoms like blurred vision or dizziness?\nPatient: No, just the headache and nausea.\nDoctor: We’ll schedule an MRI to get a better understanding of the issue.",
        'system_output': "Patient complains of daily headaches and nausea for the past week. No blurred vision or dizziness. Doctor will schedule an MRI for further evaluation."
    }
]
score = evaluate(data, print_result=True)
print(score)



Evaluating coherence of 4 samples !!!


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.75it/s]


Evaluating consistency of 4 samples !!!


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 14.77it/s]


Evaluating fluency of 4 samples !!!


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 71.91it/s]



Evaluation scores are shown below:
+-------------+----------+
|  Dimensions |  Score   |
+-------------+----------+
|  coherence  | 0.449252 |
| consistency | 0.64936  |
|   fluency   | 0.385504 |
|   overall   | 0.494705 |
+-------------+----------+
[np.float64(0.6976819110569896), np.float64(0.5384448786819837), np.float64(0.5444390657640407), np.float64(0.19825604586050996)]


In [21]:
type(score[0])

numpy.float64

In [None]:
import torch

def get_score(model, tokenizer, responses):
    """
    Calculates scores for responses based on a model and tokenizer.

    Instead of directly evaluating the 'responses' (which are strings),
    this function now creates a list of dictionaries with the format
    expected by the 'evaluate' function. It assumes the original prompt
    is available in a 'prompt' variable and uses it to construct the 'source'
    field in the dictionaries.

    Args:
        model: The model used for evaluation.
        tokenizer: The tokenizer associated with the model.
        responses: A list of generated responses (strings).

    Returns:
        A list of scores for the responses.
    """
    positive_logist = []

    positive_logist = evaluate(responses)  # Call 'evaluate' with the correct data format0
    # Convert the NumPy array to a PyTorch tensor before returning
    return torch.tensor(positive_logist, dtype=torch.float32).to(device) # Assuming 'device' is defined as your target device (e.g., 'cuda:0')

In [None]:
# responses =["ashish is a goo", "heelow how are you", "__IT_\nr/\n: r RelationshipRelationship]]0]\nlsriend\n2//M]\n [ [ a\n the was to the [. a friends to\n\n:\n [lfriend [ me have a aried in his19 minutes.\n\nWhat Modified:** girlfriend was through the Facebook.. I my my friends.**** my  of lf**\n\n** was d1ing for my few personirl** I had for findoolpping my my the future** but I was that in\n\n** have ali  of to she  tolirt my me girl. and she found my about my.. me few of gir.1viously). was\'t find her was).\n\n** was it about my twoirl and the had  Facebook. the  and she gand historyirl) was in April,\n to, find, were flirted. I a messages.. f.ing on her.\n girlM\n; I1 girirllfriend and the19 months. to my Facebook.. my permission. she her messages. my.lirty with my fewirl.\n found her with me. I through more with\n"]
# get_score(starcoder_model, tokenizer, responses)

In [None]:
import torch
from trl import AutoModelForCausalLMWithValueHead
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

rf_model_path = "/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rm_model"
starcoder_model = AutoModelForCausalLMWithValueHead.from_pretrained("/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/summarization_policy_new")  ##policy model from step 1
starcoder_model = starcoder_model.to(device)  # Explicitly move to GPU
starcoder_model_ref = AutoModelForCausalLMWithValueHead.from_pretrained(rf_model_path) ## reward model from step 2
starcoder_model_ref = starcoder_model_ref.to(device)  # Explicitly move to GPU
starcoder_tokenizer = AutoTokenizer.from_pretrained("bigcode/tiny_starcoder_py") ## tokenizer of step 1 model., here since we are using same model for step 1 and 2 it doesnot matter
starcoder_tokenizer.add_special_tokens({'pad_token': '[PAD]'})

cuda


1

In [None]:
###saving the model
# starcoder_model.save_pretrained("rhlfmodel/")
# starcoder_tokenizer.save_pretrained("rhlfmodel/")

ppo_trainer.model.pretrained_model.save_pretrained("/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/")
starcoder_tokenizer.save_pretrained("/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/")

('/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/tokenizer_config.json',
 '/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/special_tokens_map.json',
 '/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/vocab.json',
 '/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/merges.txt',
 '/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/added_tokens.json',
 '/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel/tokenizer.json')

In [None]:
from random import choices
from tqdm import tqdm
import time
import numpy as np

for epoch in range(1):
    for batch in tqdm(ppo_trainer.dataloader):
        (logs, game_data,) = (
            dict(),
            dict(),
        )

        print(ctrl_str)
        #### prepend a random control token
        task_list = choices(ctrl_str, k=config.batch_size)
        game_data["query"] = [t + q for t, q in zip(task_list, batch["query"])]
        # Move input_ids to the same device as ctrl_tokens
        query_tensors = [torch.cat((ctrl_tokens[t], input_ids.to(device))) for t, input_ids in zip(task_list, batch["input_ids"])]

        #### get response from gpt2
        response_tensors = []
        for query in query_tensors:
            response = ppo_trainer.generate(query, **generation_kwargs)
            response_tensors.append(response.squeeze()[-txt_out_len:])
#         print(response_tensors)
        game_data["response"] = [starcoder_tokenizer.decode(r.squeeze()) for r in response_tensors]

        #### sentiment analysis
        texts = [{"source": q, "system_output": r} for q, r in zip(batch["query"], game_data["response"])]
        logits = get_score(starcoder_model,starcoder_tokenizer, texts)
        rewards = pos_logit_to_reward(logits, task_list)
        # Convert the single tensor into a list of tensors before passing it to ppo_trainer.step
        rewards = [r.unsqueeze(0) for r in rewards]  # Each reward is now a single-element tensor within a list
        print(logits)
        #### Run PPO training
        t = time.time()
        stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

        for cs in ctrl_str:
            key = "env/reward_" + cs.strip("[]")
            stats[key] = np.mean([r.cpu().numpy() for r, t in zip(rewards, task_list) if t == cs])
        ppo_trainer.log_stats(stats, game_data, rewards)

  0%|          | 0/7 [00:00<?, ?it/s]`eos_token_id` should consist of positive integers, but is tensor([-1], device='cuda:0'). Your generation will not stop until the maximum length is reached. Depending on other flags, it may even crash.


['[negative]', '[positive]']


`eos_token_id` should consist of positive integers, but is tensor([-1], device='cuda:0'). Your generation will not stop until the maximum length is reached. Depending on other flags, it may even crash.
`eos_token_id` should consist of positive integers, but is tensor([-1], device='cuda:0'). Your generation will not stop until the maximum length is reached. Depending on other flags, it may even crash.
`eos_token_id` should consist of positive integers, but is tensor([-1], device='cuda:0'). Your generation will not stop until the maximum length is reached. Depending on other flags, it may even crash.
`eos_token_id` should consist of positive integers, but is tensor([-1], device='cuda:0'). Your generation will not stop until the maximum length is reached. Depending on other flags, it may even crash.
`eos_token_id` should consist of positive integers, but is tensor([-1], device='cuda:0'). Your generation will not stop until the maximum length is reached. Depending on other flags, it may ev

Evaluating coherence of 128 samples !!!



  0%|          | 0/16 [00:00<?, ?it/s][A
 12%|█▎        | 2/16 [00:00<00:00, 17.53it/s][A
 25%|██▌       | 4/16 [00:00<00:00, 18.57it/s][A
 44%|████▍     | 7/16 [00:00<00:00, 21.33it/s][A
 62%|██████▎   | 10/16 [00:00<00:00, 22.68it/s][A
 81%|████████▏ | 13/16 [00:00<00:00, 22.23it/s][A
100%|██████████| 16/16 [00:00<00:00, 22.06it/s]


Evaluating consistency of 128 samples !!!



  0%|          | 0/20 [00:00<?, ?it/s][A
 20%|██        | 4/20 [00:00<00:00, 30.73it/s][A
 40%|████      | 8/20 [00:00<00:00, 28.95it/s][A
 55%|█████▌    | 11/20 [00:00<00:00, 28.04it/s][A
 70%|███████   | 14/20 [00:00<00:00, 27.84it/s][A
100%|██████████| 20/20 [00:00<00:00, 28.46it/s]


Evaluating fluency of 128 samples !!!



  0%|          | 0/20 [00:00<?, ?it/s][A
 20%|██        | 4/20 [00:00<00:00, 37.59it/s][A
 40%|████      | 8/20 [00:00<00:00, 36.77it/s][A
 60%|██████    | 12/20 [00:00<00:00, 36.79it/s][A
 80%|████████  | 16/20 [00:00<00:00, 37.38it/s][A
100%|██████████| 20/20 [00:00<00:00, 37.14it/s]


tensor([-0.1140, -0.5964, -0.4760,  0.4730,  0.3701, -0.6378, -0.4970, -0.8522,
         0.7088, -0.3443,  0.5310, -0.6662, -0.4598, -0.5201,  0.3532,  0.6290,
         0.4458, -0.4076,  0.4418, -0.5959,  0.7573, -0.6279,  0.5680,  0.4816,
         0.6701, -0.5965, -0.4724,  0.6003, -0.5661, -0.4443, -0.7133, -0.4767,
        -0.7351, -0.7428, -0.5126, -0.4642,  0.4819,  0.5510, -0.5316, -0.6129,
         0.5767, -0.6015, -0.6732, -0.6184,  0.3206, -0.6439, -0.5275, -0.5137,
         0.4751, -0.5006, -0.6168, -0.3965, -0.5622,  0.5087,  0.3758, -0.3865,
         0.5497,  0.3349,  0.5346, -0.6813, -0.4501,  0.5392, -0.5807, -0.5732,
        -0.3888,  0.3660, -0.5514, -0.6128,  0.4821, -0.4926, -0.6747, -0.5794,
         0.6234,  0.5111, -0.5365, -0.5109,  0.4887,  0.5621,  0.5795, -0.4082,
         0.5549, -0.2743,  0.6639,  0.4904, -0.5654,  0.4000, -0.5261,  0.5667,
        -0.4763,  0.5618, -0.5892, -0.5535,  0.5563,  0.5294, -0.4903, -0.5240,
        -0.4411, -0.3146,  0.3763,  0.64

  0%|          | 0/7 [01:06<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.55 GiB. GPU 0 has a total capacity of 14.74 GiB of which 1.49 GiB is free. Process 289527 has 13.25 GiB memory in use. Of the allocated memory 12.90 GiB is allocated by PyTorch, and 228.88 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import shutil
import os

source_dirs = ["/content/drive/MyDrive/Medical Dialogue Summarization using PPO/rhlfmodel"]
destination = "/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO"

os.makedirs(destination, exist_ok=True)

# Copy each directory to the destination
for src in source_dirs:
    if os.path.exists(src):
        dest_path = os.path.join(destination, os.path.basename(src))
        shutil.copytree(src, dest_path, dirs_exist_ok=True)  # Copy with merging existing directories
        print(f"Copied {src} to {dest_path}")
    else:
        print(f"Skipping {src}, does not exist.")

print("Copy operation completed.")


Copied /content/drive/MyDrive/Medical Dialogue Summarization using PPO/rhlfmodel to /content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel
Copy operation completed.


In [None]:
from transformers import pipeline, set_seed
model_path = "/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel"
set_seed(42)
pipe = pipeline("text-generation",model=model_path, tokenizer=model_path, max_length=40, num_return_sequences=1)

In [None]:
text = dataset["rejected"][0]
print(text)
pipe(text)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


TL;DR:  My girlfriend and I broke up after she went through my Facebook account without my permission.<|endoftext|>Citizens for the Republic


[{'generated_text': 'TL;DR:  My girlfriend and I broke up after she went through my Facebook account without my permission.<|endoftext|>Citizens for the Republic'}]

In [None]:
save_directory = "//content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel"

# Load the model and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(save_directory).to(device)
tokenizer = AutoTokenizer.from_pretrained(save_directory)

In [None]:
conversation = '''
Doctor: Hi, Mr. X, I'm Dr. Y. How are you feeling today?

Patient: Not too good, doctor. I've been feeling really sick lately.

Doctor: I understand. Can you tell me what symptoms you're experiencing?

Patient: Yes, I've been having a fever, a dry cough, and dyspnea.

Doctor: I see. You were hospitalized due to moderate ARDS from COVID-19, is that correct?

Patient: Yes, that's correct.

Doctor: During your physical therapy, we encountered some difficulties. Can you tell me more about that?

Patient: Yes, I had trouble with position changes and deep breathing. Every time I tried to change my position or take a deep breath, I would start coughing and it would make me really short of breath.

Doctor: I understand. To avoid rapid deterioration and respiratory failure, we instructed you to change positions very slowly and step-by-step, right?

Patient: Yes, that's right. It took about 30 minutes to change to the prone position.

Doctor: And I see that this approach increased your oxygen saturation, for example, on day 5 with 6 L/min of oxygen from 93% to 97%.

Patient: Yes, that's correct.

Doctor: Good. We also had to adapt your breathing exercises to avoid prolonged coughing and oxygen desaturation. Can you tell me more about that?

Patient: Yes, I was instructed to stop every deep breath before coughing and to hold my breath for better air distribution.

Doctor: I see that you performed the breathing exercises well and managed to increase your oxygen saturation.

Patient: Yes, I did my best.

Doctor: You also had difficulty maintaining sufficient oxygen saturation during physical activity, is that correct?

Patient: Yes, I did. But with close monitoring and frequent breaks, I was able to perform low-level strength and walking exercises without any significant deoxygenation.

Doctor: I see that your exercise progression was low on days 1 to 5, but then increased daily until your hospital discharge to a rehabilitation clinic on day 10.

Patient: Yes, that's correct.

Doctor: Great. I'd like to keep monitoring your progress and see how you're doing. Can you keep me updated on any changes in your symptoms?

Patient: Yes, of course, doctor.

Doctor: Alright, let's keep in touch. If you have any questions or concerns, don't hesitate to reach out to me.

Patient: Thank you, doctor.
'''

In [None]:
def generate_response(prompt, model, tokenizer, max_new_tokens=1000, temperature=0.1):
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            temperature=temperature,
            top_k=50,
            top_p=0.9,
            do_sample=True,
            max_new_tokens=max_new_tokens,
            pad_token_id=tokenizer.eos_token_id
        )


    return tokenizer.decode(output[0], skip_special_tokens=True)

prompt = "Generate a summary for the below conversation. Dont give me the prompt back. I just want the summary to be returned to me\n\n" + conversation
response = generate_response(prompt, model, tokenizer)
print("Generated Response:\n", response)


Generated Response:
 Generate a summary for the below conversation. Dont give me the prompt back. I just want the summary to be returned to me


Doctor: Hi, Mr. X, I'm Dr. Y. How are you feeling today?

Patient: Not too good, doctor. I've been feeling really sick lately.

Doctor: I understand. Can you tell me what symptoms you're experiencing?

Patient: Yes, I've been having a fever, a dry cough, and dyspnea.

Doctor: I see. You were hospitalized due to moderate ARDS from COVID-19, is that correct?

Patient: Yes, that's correct.

Doctor: During your physical therapy, we encountered some difficulties. Can you tell me more about that?

Patient: Yes, I had trouble with position changes and deep breathing. Every time I tried to change my position or take a deep breath, I would start coughing and it would make me really short of breath.

Doctor: I understand. To avoid rapid deterioration and respiratory failure, we instructed you to change positions very slowly and step-by-step, right?

Pat

In [None]:
print(dataset["review"][0][:100])

SUBREDDIT: r/relationships
TITLE: My [21/M] girlfriend [19/F] broke up with me after she went throug


In [None]:
model = AutoModelForCausalLM.from_pretrained("/content/drive/MyDrive/Colab Notebooks/Medical Dialogue Summarization using PPO/rhlfmodel")
model_path = "bigcode/tiny_starcoder_py"

tokenizer = AutoTokenizer.from_pretrained(model_path, truncation=True, max_length=256, padding="max_length")
text = df.iloc[2]["prompt"]
tokenized_text = tokenizer(text, return_tensors="pt", max_length=256)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
