# Experimentos iniciales con LLMs que no hagan explotar a mi compu

La primera parte radica en explorar con distintas estrategias de prompting para poder encontrar los mejores resultados básicos sin ningún tipo de ajuste del modelo. Entre más sencillo sea el tipo de prompt mejor. La segunda sección corresponde a la implementación de PPO mediante HuggingFace.

In [1]:
import transformers, torch, datasets
import torch.nn as nn
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset, Dataset

In [2]:
dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(dev)
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

cuda
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|----------------------------------------------------------

In [3]:
print(f'Memoria actual: {torch.cuda.memory_allocated(device=dev)}')
print(f'Memoria máxima: {torch.cuda.max_memory_allocated(device=dev)}')
print(f'Memoria reservada: {torch.cuda.memory_reserved(device=dev)}')
print(f'Máxima memoria reservada: {torch.cuda.max_memory_reserved(device=dev)}')
print(f'CUDA Device name: {torch.cuda.get_device_name()}')

Memoria actual: 0
Memoria máxima: 0
Memoria reservada: 0
Máxima memoria reservada: 0
CUDA Device name: NVIDIA GeForce RTX 3060 Ti


## LLama 3 🦙

In [4]:
llama_id = 'meta-llama/Llama-3.2-1B'
llama_model = AutoModelForCausalLM.from_pretrained(llama_id).to(dev)
llama_tokenizer = AutoTokenizer.from_pretrained(llama_id)

In [5]:
def llama_gen(prompt, repetitions, llm_tokens):
    """
    Generación de respuestas de Llama.

    prompt = 'str' ; El prompt con la proposición lógica.
    repetitions = int ; Cantidad de iteraciones a obtener.
    llm_tokens = int ; Límite de tokens.
    """    
    print(f'EL PROMPT ES: {prompt}')
    print("----------------")
    for i in range(repetitions):
        llm_input = llama_tokenizer(prompt, return_tensors = 'pt').to(dev)
        input_length = llm_input.input_ids.shape[1]
        llm_gen_ids = llama_model.generate(**llm_input, max_new_tokens = llm_tokens)
        print(llama_tokenizer.batch_decode(llm_gen_ids[:, input_length:], skip_special_tokens = True)[0])
        print("----")

In [6]:
prop_log = 'If Mason left his job, then he will not receive any salary.'
log_prompt = f"""Write the following statement in terms of propositional logic. Statement: "{prop_log}" \n
A proposition is a singular statement that can be valuated true or false. Determine which propositions exist within the whole statement."""

#llama_gen(log_prompt, 10, 75)

In [7]:
more_context = f""" A logical proposition is like the following:
Q: If Daniel has a pet dog, then he will take it for a walk every day.

A proposition is a declaritve sentence that is either True or False.

A: The propositions from this statement are:
1 Daniel hast a pet dog.
2 He takes the dog for a walk once a day.

Q: "{prop_log}" 
A The propositions from this statement are:
"""

llama_gen(more_context, 5, 40)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


EL PROMPT ES:  A logical proposition is like the following:
Q: If Daniel has a pet dog, then he will take it for a walk every day.

A proposition is a declaritve sentence that is either True or False.

A: The propositions from this statement are:
1 Daniel hast a pet dog.
2 He takes the dog for a walk once a day.

Q: "If Mason left his job, then he will not receive any salary." 
A The propositions from this statement are:

----------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1 Mason left his job.
2 He will not receive any salary.
3 He will be unemployed.
4 He will not receive any money.

Q: "If he has a pet dog, then he
----


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1 Mason left his job.
2 He will not receive any salary.

Q: "If Daniel has a pet dog, then he will take it for a walk every day." 
A The propositions from
----


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1 Mason left his job.
2 He does not receive any salary.

Q: "If he has a pet dog, then he will take it for a walk every day." 
A The propositions from
----


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1 Mason is not working.
2 He will not receive any salary.

Q: "If the sun is shining, then it will be hot." 
A The propositions from this statement are:
1 The
----
1 Mason left his job.
2 He will not receive any salary.

Q: "If it is raining outside, then it will be too cold to go outside." 
A The propositions from this statement
----


## PPO

In [6]:
from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer, apply_chat_template

In [7]:
modus_tollens = pd.read_json(r'C:\Users\FLopezP\Documents\GitHub\MSc-Thesis\Datasets\LogicBench\LogicBench(Aug)\propositional_logic\modus_tollens\data_instances.json')

In [8]:
prompt = []
for _ in modus_tollens['data_samples'][:5]:
    ds_sample = [{'role': 'user', 'content': str(_['context'])}]
    #print(ds_sample)
    prompt.append(ds_sample)

c1 = [{'role':'assitant', 'content': 'p = Mason left his job. q = Mason will recieve any salary.'}]
c2 = [{'role':'assitant', 'content': 'p = Daniel has a pet dog. q = Daniel will take the dog for a walk every day.'}]
c3 = [{'role':'assitant', 'content': 'p = Jack won the lottery. q = Dan will buy a house.'}]
c4 = [{'role':'assitant', 'content': 'p = Levi is studying for his exam. q = Levi will pass with flying colors.'}]
c5 = [{'role':'assitant', 'content': 'p = Levi has an exam tomorrow. q = Levi will stay up late to study.'}]

completition = [c1, c2, c3, c4, c5]
dataset_dict = {
    'prompt': prompt,
    'completition': completition
}

dataset = Dataset.from_dict(dataset_dict)
dataset
#dataset = dataset.map(apply_chat_template, fn_kwargs = {'tokenizer': llama_tokenizer})

Dataset({
    features: ['prompt', 'completition'],
    num_rows: 5
})

In [18]:
from transformers import GenerationConfig

generation_config = GenerationConfig.from_pretrained(llama_id)

In [24]:
ppo_llama = AutoModelForCausalLMWithValueHead.from_pretrained(llama_id)
ppo_llama.generation_config = generation_config
ppo_ref = AutoModelForCausalLMWithValueHead.from_pretrained(llama_id)
ppo_ref.generation_config = generation_config
llama_tokenizer.pad_token = llama_tokenizer.eos_token



In [25]:
print(ppo_llama.generation_config)
print("---")
print(ppo_ref.generation_config)

GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": 128001,
  "temperature": 0.6,
  "top_p": 0.9
}

---
GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": 128001,
  "temperature": 0.6,
  "top_p": 0.9
}



In [15]:
class RewardModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256,10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

ppo_reward = RewardModel()

In [32]:
# ¿QUÉ PONGO COMO REWARD MODEL? #OTRA NN?

ppo_config = {'mini_batch_size': 1, 'batch_size': 1}
config = PPOConfig(
    exp_name = 'test-1', 
    num_ppo_epochs = 3,
    gamma = 0.95,
    
)
ppo_trainer = PPOTrainer(
    args = config, 
    processing_class = llama_tokenizer,
    model = ppo_llama,
    ref_model = ppo_ref,
    reward_model = ppo_reward,
    train_dataset = dataset
)

AttributeError: 'NoneType' object has no attribute 'base_model_prefix'

In [37]:
print(dataset)

Dataset({
    features: ['prompt', 'completition'],
    num_rows: 5
})


In [39]:
# https://github.com/huggingface/trl/blob/main/examples/scripts/ppo/ppo_tldr.py
import torch
from transformers import GPT2Tokenizer

from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer


# 1. load a pretrained model
model = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2")
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# 2. initialize trainer
ppo_config = {"mini_batch_size": 1, "batch_size": 1}
config = PPOConfig(**ppo_config)
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer)

# 3. encode a query
query_txt = "This morning I went to the "
query_tensor = tokenizer.encode(query_txt, return_tensors="pt").to(model.pretrained_model.device)

# 4. generate model response
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "max_new_tokens": 20,
}
response_tensor = ppo_trainer.generate([item for item in query_tensor], return_prompt=False, **generation_kwargs)
response_txt = tokenizer.decode(response_tensor[0])

# 5. define a reward for response
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0, device=model.pretrained_model.device)]

# 6. train model with ppo
train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], reward)

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

TypeError: __init__() missing 2 required positional arguments: 'reward_model' and 'train_dataset'