In [None]:
!pip install transformers
!pip install accelerate
!pip install bitsandbytes
!pip install datasets
!pip install peft
!pip install wandb
!pip install trl
!pip install sentencepiece
!pip install evaluate

Collecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.w

In [None]:
from peft import LoraConfig
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, GenerationConfig
from datasets import load_dataset
from peft import PeftModel, PeftConfig, LoraConfig, TaskType

# trl: Transformer Reinforcement Learning library
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl import create_reference_model
from trl.core import LengthSampler

import torch
import evaluate

import numpy as np
import pandas as pd

# tqdm library makes the loops show a smart progress meter.
from tqdm import tqdm
tqdm.pandas()

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"\ntrainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

In [None]:
from transformers import AutoModelForCausalLM,BitsAndBytesConfig
from peft import get_peft_model
lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model_path = 'mistralai/Mistral-7B-v0.1'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    trust_remote_code=True,
    token=os.environ['ACCESS_TOKEN'],
    quantization_config=bnb_config,
)
peft_model = get_peft_model(model,lora_config)
# peft_model = PeftModel.from_pretrained(model,
#                                        lora_config=lora_config,
#                                        torch_dtype=torch.bfloat16,
#                                        device_map="auto",
#                                        is_trainable=True)

print(f'PEFT model parameters to be updated:\n{print_number_of_trainable_model_parameters(peft_model)}\n')

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

PEFT model parameters to be updated:

trainable model parameters: 13631488
all model parameters: 3765702656
percentage of trainable model parameters: 0.36%



In [None]:
ppo_model = AutoModelForCausalLMWithValueHead.from_pretrained(peft_model,
                                                               torch_dtype=torch.bfloat16,
                                                               is_trainable=True)

print(f'PPO model parameters to be updated (ValueHead + 769 params):\n{print_number_of_trainable_model_parameters(ppo_model)}\n')
print(ppo_model.v_head)



PPO model parameters to be updated (ValueHead + 769 params):

trainable model parameters: 13635585
all model parameters: 3765706753
percentage of trainable model parameters: 0.36%

ValueHead(
  (dropout): Dropout(p=0.1, inplace=False)
  (summary): Linear(in_features=4096, out_features=1, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


In [None]:
for param in ppo_model.named_parameters():
  if param[1].requires_grad:
    print(param[0])

pretrained_model.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight
pretrained_model.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight
pretrained_model.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight
pretrained_model.base_model.mode

In [None]:
def get_target_modules(model):
    target_modules = set()
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            target_modules.add(name)
    return list(target_modules)

target_modules = get_target_modules(ppo_model)
target_modules

['pretrained_model.base_model.model.model.layers.29.self_attn.v_proj.base_layer',
 'pretrained_model.base_model.model.model.layers.11.self_attn.o_proj',
 'pretrained_model.base_model.model.model.layers.25.self_attn.o_proj',
 'pretrained_model.base_model.model.model.layers.26.self_attn.q_proj.lora_A.default',
 'pretrained_model.base_model.model.model.layers.23.self_attn.k_proj',
 'pretrained_model.base_model.model.model.layers.28.mlp.up_proj',
 'pretrained_model.base_model.model.model.layers.7.self_attn.o_proj',
 'pretrained_model.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default',
 'pretrained_model.base_model.model.model.layers.23.self_attn.v_proj.base_layer',
 'pretrained_model.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default',
 'pretrained_model.base_model.model.model.layers.28.self_attn.q_proj.lora_A.default',
 'pretrained_model.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default',
 'pretrained_model.base_model.model.model.layers.4.self_at

In [None]:
ref_model = create_reference_model(ppo_model)

print(f'Reference model parameters to be updated:\n{print_number_of_trainable_model_parameters(ref_model)}\n')

Reference model parameters to be updated:

trainable model parameters: 0
all model parameters: 3765706753
percentage of trainable model parameters: 0.00%



In [None]:
real_fake_model_name = "DevanshArora2002/legalcaseFinetuned"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
real_fake_model_tokenizer = AutoTokenizer.from_pretrained('bert-base-cased', device_map="auto",token=os.environ['ACCESS_TOKEN'])
real_fake_model = AutoModelForSequenceClassification.from_pretrained(real_fake_model_name,token=os.environ['ACCESS_TOKEN']).to(device)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [None]:
real_index = 1
sentiment_pipe = pipeline("sentiment-analysis",
                          model=real_fake_model,
                          tokenizer=real_fake_model_tokenizer,
                          device=device)
reward_logits_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "none", # Set to "none" to retrieve raw logits.
    "batch_size": 16
}

reward_probabilities_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "softmax", # Set to "softmax" to apply softmax and retrieve probabilities.
    "batch_size": 16
}


In [None]:
quest = "What are the provisions for declaring assets under the Act?"
ans = "Every public servant must declare their assets and liabilities as prescribed by the rules. Failure to do so or providing misleading information may lead to the presumption that undisclosed assets were acquired through corrupt means."

In [None]:
print("Reward model output:")
print("For non-toxic text")
print(sentiment_pipe(quest+ans, **reward_logits_kwargs))
print(sentiment_pipe(quest+ans, **reward_probabilities_kwargs))

Reward model output:
For non-toxic text
[{'label': 'LABEL_0', 'score': 0.08810566365718842}, {'label': 'LABEL_1', 'score': -0.41888129711151123}]
[{'label': 'LABEL_0', 'score': 0.6240999102592468}, {'label': 'LABEL_1', 'score': 0.37590011954307556}]


In [None]:
def evalute_score(text,sentiment_pipe,reward_logits_kwargs,real_index):
  return sentiment_pipe(text,**reward_logits_kwargs)[real_index]['score']

In [None]:
def evaluate(model,
             tokenizer,
             reward_logits_kwargs,
             real_index,
             dataset,
             sentiment_pipe):
  max_new_tokens=75
  # Check if pad_token_id and eos_token_id are defined in the model's configuration
  if model.config.pad_token_id is None:
    model.config.pad_token_id = tokenizer.pad_token_id

  if model.config.eos_token_id is None:
    model.config.eos_token_id = tokenizer.eos_token_id
  model.pad_token_id = 2
  real_scores = []
  input_texts = []
  for i, sample in tqdm(enumerate(dataset)):
    input_text = sample["Question"]
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

    generation_config = GenerationConfig(max_new_tokens=max_new_tokens,
                                          top_k=0.0,
                                          top_p=1.0,
                                          do_sample=True)

    response_token_ids = model.generate(input_ids=input_ids,
                                        generation_config=generation_config,
                                        pad_token_id=model.pad_token_id)

    generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens=True)

    toxicity_score = evalute_score(generated_text,sentiment_pipe,reward_logits_kwargs,real_index)

    real_scores.append(toxicity_score)

    # Compute mean & std using np.
    mean = np.mean(real_scores)
    std = np.std(real_scores)

  return mean, std

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from transformers import AutoTokenizer,BertForSequenceClassification,Trainer, TrainingArguments
from datasets import load_dataset
import torch
from datasets import Dataset
import pandas as pd
#bert_tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
df = pd.read_csv("/content/drive/MyDrive/IR-Project/src/data.csv")
df = df.sample(frac=1).reset_index(drop=True)
df.rename(columns={'Label':'label'},inplace=True)
#df = df[['Summary','Text','label']]
dataset_sum = Dataset.from_pandas(df)
train_test_split_dataset = dataset_sum.train_test_split(test_size=0.2)
train_dataset = train_test_split_dataset['train']
train_eval_split = train_dataset.train_test_split(test_size=0.2)
train_dataset = train_eval_split['train']
eval_dataset = train_eval_split['test']
test_dataset = train_test_split_dataset['test']

In [None]:
real_fake_model_tokenizer.add_special_tokens({'pad_token': '[PAD]'})

0

In [None]:
model_tokenizer = AutoTokenizer.from_pretrained(model_path,token=os.environ['ACCESS_TOKEN'])

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [None]:
model_tokenizer.pad_token = model_tokenizer.eos_token

In [None]:
ref_model.pad_token_id = model_tokenizer.eos_token_id

In [None]:
ref_model.pad_token_id

2

In [None]:
test_dataset_df = test_dataset.to_pandas()
test_dataset_df2 = test_dataset_df[:100]
print(len(test_dataset_df2))

100


In [None]:
test_dataset2 = Dataset.from_pandas(test_dataset_df2)
print(test_dataset2)

Dataset({
    features: ['Question', 'Answer', 'label'],
    num_rows: 100
})


In [None]:
out = evaluate(ref_model,
               model_tokenizer,
               reward_logits_kwargs,
               real_index,
               test_dataset2,
               sentiment_pipe)

8it [00:58,  7.26s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100it [12:10,  7.30s/it]


In [None]:
print(out)

(-0.789557920396328, 0.14554877344092973)


In [None]:
def tokenization(examples):
    return model_tokenizer(examples["Question"], truncation=True)

In [None]:
train_dataset = train_dataset.map(tokenization, batched=True)

Map:   0%|          | 0/1315 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
train_dataset = train_dataset.remove_columns(['Question','Answer'])

In [None]:
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
#eval_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
#test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [None]:
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [None]:
learning_rate=2e-4
max_ppo_epochs=1
mini_batch_size=4
batch_size=16
config = PPOConfig(
    model_name=model_path,
    learning_rate=learning_rate,
    ppo_epochs=max_ppo_epochs,
    mini_batch_size=mini_batch_size,
    batch_size=batch_size
)
ppo_trainer = PPOTrainer(config=config,
                         model=ppo_model,
                         ref_model=ref_model,
                         tokenizer=model_tokenizer,
                         dataset=train_dataset,
                         data_collator=collator)


In [None]:
train_loader = ppo_trainer.dataloader
out = next(iter(train_loader))

In [None]:
a = out['attention_mask']

In [None]:
print(a)

[tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1], 

In [None]:
output_min_length = 50
output_max_length = 100
output_length_sampler = LengthSampler(output_min_length, output_max_length)

generation_kwargs = {
    "min_length": 5,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True
}

reward_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "none", # You want the raw logits without softmax.
    "batch_size": 16
}

max_ppo_steps = 200
# for step, batch in enumerate(tqdm(ppo_trainer.dataloader)):
#     if step >= max_ppo_steps:
#         break

#     prompt_tensors = batch["input_ids"]
#     attention_tensors = batch['attention_mask'].unsqueeze(1)  # Vectorized unsqueeze
#     max_new_tokens = [output_length_sampler() for _ in prompt_tensors]  # List of tokens counts
#     generation_kwargs = {
#         'max_new_tokens': max(max_new_tokens),  # Assuming maximum length or modify according to your setup
#         'pad_token_id': model_tokenizer.eos_token_id,
#         'attention_mask': attention_tensors
#     }

#     # Assuming batch processing is possible:
#     summaries = ppo_trainer.generate(prompt_tensors, **generation_kwargs)

#     summary_tensors = [summary.squeeze()[-m:] for summary, m in zip(summaries, max_new_tokens)]

#     batch["response"] = [model_tokenizer.decode(r.squeeze()) for r in summary_tensors]
#     batch['Question'] = [model_tokenizer.decode(r.squeeze()) for r in prompt_tensors]

#     query_response_pairs = [q + r for q, r in zip(batch["Question"], batch["response"])]
#     rewards = sentiment_pipe(query_response_pairs, **reward_kwargs)
#     reward_tensors = [torch.tensor(reward[real_index]['score']) for reward in rewards]

#     stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
#     ppo_trainer.log_stats(stats, batch, reward_tensors)

#     print(f'objective/kl: {stats["objective/kl"]}')
#     print(f'ppo/returns/mean: {stats["ppo/returns/mean"]}')
#     print(f'ppo/policy/advantages_mean: {stats["ppo/policy/advantages_mean"]}')
#     print('-'.join('' for x in range(100)))

import torch
import torch.nn as nn
import torch.nn.functional as F

def calc_kl_loss(input_log_prob,target_prob):
  kl_loss = nn.KLDivLoss(reduction="batchmean", log_target=True)
  batch_size = len(input_log_prob)
  individual_losses = torch.zeros(batch_size)
  for i in range(batch_size):
    individual_losses[i] = kl_loss(input_log_prob[i].view(1, -1), target_prob[i].view(1, -1))
    #individual_losses[i] = kl_loss(input_log_prob[i].view(1, -1), target_prob[i].view(1, -1))
  individual_losses = -torch.abs(individual_losses)
  return individual_losses


best_returns = float('-inf')

# Training loop
for step, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    if step >= max_ppo_steps:
        break

    prompt_tensors = batch["input_ids"]
    summary_tensors = []

    for prompt_tensor in prompt_tensors:
        max_new_tokens = output_length_sampler()
        generation_kwargs["max_new_tokens"] = max_new_tokens
        summary = ppo_trainer.generate(prompt_tensor, **generation_kwargs)
        summary_tensors.append(summary.squeeze()[-max_new_tokens:])

    batch["response"] = [model_tokenizer.decode(r.squeeze()) for r in summary_tensors]
    #batch['query'] = [model_tokenizer.decode(r.squeeze()) for r in prompt_tensors]
    responses = [r for r in  batch["response"]]
    rewards = sentiment_pipe(responses, **reward_kwargs)
    reward_tensors = [torch.tensor(reward[real_index]['score']) for reward in rewards]

    stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
    ppo_trainer.log_stats(stats, batch, reward_tensors)

    # Checkpoint saving based on maximum returns
    current_returns = stats["ppo/returns/mean"]
    if current_returns > best_returns:
        best_returns = current_returns
        # Save model checkpoint
        torch.save({
            'model_state_dict': ppo_trainer.model.state_dict(),
            'optimizer_state_dict': ppo_trainer.optimizer.state_dict(),
            'step': step,
            'best_returns': best_returns
        }, 'best_model_checkpoint.pth')

    # Optional: Print current status
    print(f'Step {step}:')
    print(f'  Objective/KL: {stats["objective/kl"]}')
    print(f'  Returns/Mean: {current_returns}')
    print(f'  Policy/Advantages Mean: {stats["ppo/policy/advantages_mean"]}')
    print('-' * 50)

0it [00:00, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end gener

Step 0:
  Objective/KL: 0.0
  Returns/Mean: -0.7928973436355591
  Policy/Advantages Mean: 4.227738827466965e-05
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 1:
  Objective/KL: 0.7152402400970459
  Returns/Mean: -0.468262255191803
  Policy/Advantages Mean: -0.005455062724649906
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 2:
  Objective/KL: 2.0018043518066406
  Returns/Mean: -0.9528363943099976
  Policy/Advantages Mean: 0.0029718056321144104
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 3:
  Objective/KL: 1.2204670906066895
  Returns/Mean: -0.3124874532222748
  Policy/Advantages Mean: 0.0020806174725294113
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 4:
  Objective/KL: 2.948559284210205
  Returns/Mean: -0.5506919622421265
  Policy/Advantages Mean: -0.004002046771347523
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 5:
  Objective/KL: 2.379446506500244
  Returns/Mean: -1.1395549774169922
  Policy/Advantages Mean: -0.0011196490377187729
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 6:
  Objective/KL: 3.163239002227783
  Returns/Mean: -0.4862573742866516
  Policy/Advantages Mean: -0.00033518020063638687
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 7:
  Objective/KL: 6.231985092163086
  Returns/Mean: -1.0322694778442383
  Policy/Advantages Mean: 0.0007954062893986702
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 8:
  Objective/KL: 6.4300994873046875
  Returns/Mean: -0.8423184156417847
  Policy/Advantages Mean: -4.028715193271637e-05
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 9:
  Objective/KL: 11.39998722076416
  Returns/Mean: -1.3311882019042969
  Policy/Advantages Mean: 0.00016588065773248672
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 10:
  Objective/KL: 12.028700828552246
  Returns/Mean: -1.2550474405288696
  Policy/Advantages Mean: 0.0006969468668103218
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 11:
  Objective/KL: 11.724197387695312
  Returns/Mean: -1.352440357208252
  Policy/Advantages Mean: -0.0035670213401317596
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 12:
  Objective/KL: 11.128131866455078
  Returns/Mean: -1.3638163805007935
  Policy/Advantages Mean: 0.0016190353780984879
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 13:
  Objective/KL: 9.162734031677246
  Returns/Mean: -1.2165334224700928
  Policy/Advantages Mean: -0.004803534597158432
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 14:
  Objective/KL: 12.372577667236328
  Returns/Mean: -1.416250467300415
  Policy/Advantages Mean: 0.000713001936674118
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

Step 15:
  Objective/KL: 11.30296516418457
  Returns/Mean: -1.8107396364212036
  Policy/Advantages Mean: 0.005824781954288483
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

KeyboardInterrupt: 

In [None]:
from huggingface_hub import HfApi, HfFolder
api = HfApi()
api.create_repo(repo_id="legal-PPO-model", token=os.environ['ACCESS_TOKEN'], private=False)

RepoUrl('https://huggingface.co/DevanshArora2002/legal-PPO-model', endpoint='https://huggingface.co', repo_type='model', repo_id='DevanshArora2002/legal-PPO-model')

In [None]:
from huggingface_hub import HfFolder

# Replace 'your_token' with your actual token
token = os.environ['ACCESS_TOKEN']

# Save the token (this will save it in the correct location in your filesystem)
HfFolder.save_token(token)

In [None]:
peft_model.push_to_hub("DevanshArora2002/legal-PPO-model", use_auth_token=True)



adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/DevanshArora2002/legal-PPO-model/commit/272db18bd939213570c793c97ad4c6ffdb07622c', commit_message='Upload model', commit_description='', oid='272db18bd939213570c793c97ad4c6ffdb07622c', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
!pip install bitsandbytes
!pip install transformers
!pip install peft
!pip install accelerate



In [None]:
from transformers import BitsAndBytesConfig,AutoModelForCausalLM,AutoTokenizer
from peft import LoraConfig, PeftModel,TaskType
import torch
repo_name = "DevanshArora2002/legal-PPO-model"
lora_config = LoraConfig(
    r=32,  # Rank
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model_path = 'mistralai/Mistral-7B-v0.1'
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModelForCausalLM.from_pretrained(model_path,
                                                  quantization_config=bnb_config,
                                                  torch_dtype=torch.bfloat16,
                                                  device_map='auto',
                                                  token=os.environ['ACCESS_TOKEN'])
# Load the PEFT model from the Hugging Face Hub
peft_model = PeftModel.from_pretrained(
    base_model,
    repo_name,
    quantization_config=bnb_config,
    lora_config=lora_config,
)
model_tokenizer = AutoTokenizer.from_pretrained(model_path,token=os.environ['ACCESS_TOKEN'])

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [None]:
prompt = "Given the a legal advice for this query: What is service charge? using the following contexual information 1: supply clearly identifies the service in question and its supplier in non-taxable territory;(b)) the intermediary involved in the supply does not authorise the charge to the customer or take part in its charge which is that the intermediary neither collects or processes payment in any manner nor is responsible for the payment between the non-taxable online recipient and the supplier of suchservices;(c)) the intermediary involved in the supply does not authorise delivery; and(d)) the general terms and conditions of the supply are not set by the2: payment or reward in consideration of the adoption, except as permitted under the adoption regulations framed by the Authority towards the adoption fees or service charge or child care corpus.(2)) The adoption proceedings shall be held in camera and the case shall be disposed of by the court within aperiod of two months from the date of filing."
input = model_tokenizer(prompt, return_tensors="pt")
out = peft_model.generate(
    **input,
    max_new_tokens=150,  # Adjust the max_length to a lower value for conciseness
    no_repeat_ngram_size=3,  # Set no_repeat_ngram_size to prevent repetition
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
output = model_tokenizer.batch_decode(out,skip_special_tokens=True,)
print(output[0][len(prompt):])



## What is the service charge in the context of the service?

The service charge is a fee that is charged by the service provider to the service user. The service charge can be a fixed amount or a percentage of the total amount of the services provided. The services provided by the services provider can be provided by a service provider or a service user, or a combination of both. The fee is charged to the services user for the services that are provided by services provider. The fees are charged to services user to the fees that are charged by services user.
The services user is the person who is the services users of the fees. The person who are the services providers of the fee. The persons who are services providers are the


[]
