In [1]:
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
from datasets import load_dataset, Dataset

model_id = 'meta-llama/Llama-3.2-1B'
# Vamos a hacer pruebas con distintos tipos de modelos. Los -Instruct y los bases.
model_id_instruct = 'meta-llama/Llama-3.2-1B-Instruct'

dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

folio = load_dataset('yale-nlp/FOLIO')

quant_config = BitsAndBytesConfig(load_in_4bit = True, bnb_4bit_compute_dtype = torch.bfloat16)
generation_config = GenerationConfig.from_pretrained(model_id_instruct)

#tokenizer = AutoTokenizer.from_pretrained(model_id)
#model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config = quant_config).to(dev)

tokenizer_instruct = AutoTokenizer.from_pretrained(model_id_instruct)
model_instruct = AutoModelForCausalLM.from_pretrained(model_id_instruct, quantization_config = quant_config).to(dev)
model_instruct.generation_config.pad_token_id = tokenizer_instruct.pad_token_id

In [2]:
# Funciones para extraer las premisas y dejarlas en formato de texto. 
def unite_str(aux):
    string = ''
    for _ in aux:
        string += _ + ' '
    return string

def full_process(dataset):
    premise_full = [dataset[i].split('\n') for i in range(len(dataset))]
    premise_list = [unite_str(premise_full[i]) for i in range(len(premise_full))]
    return premise_list


def get_ordered_set(conjunto):
    """
    conjunto = full_process(folio[train/test][premises/premises-fol]) ; Lista obtenida tras aplicar full_process
    """
    ordered_set = []
    ordered_set.append(conjunto[0])
    for i in range(1, len(conjunto)):
        if conjunto[i] != ordered_set[-1]:
            ordered_set.append(conjunto[i])
    return ordered_set

folio_nl_train = folio['train']['premises']
folio_fol_premises = folio['train']['premises-FOL']

NL_premises = get_ordered_set(full_process(folio_nl_train))
FOL_premises = get_ordered_set(full_process(folio_fol_premises))

print('-------------------------')
print(NL_premises[0])
print('-------------------------')
print(FOL_premises[0])

-------------------------
All people who regularly drink coffee are dependent on caffeine. People regularly drink coffee, or they don't want to be addicted to caffeine, or both. No one who doesn't want to be addicted to caffeine is unaware that caffeine is a drug. Rina is either a student who is unaware that caffeine is a drug, or she is not a student and is she aware that caffeine is a drug. Rina  is either a student who is dependent on caffeine, or she is not a student and not dependent on caffeine. 
-------------------------
∀x (DrinkRegularly(x, coffee) → IsDependentOn(x, caffeine)) ∀x (DrinkRegularly(x, coffee)  ∨ (¬WantToBeAddictedTo(x, caffeine))) ∀x (¬WantToBeAddictedTo(x, caffeine) → ¬AwareThatDrug(x, caffeine)) ¬(Student(rina) ⊕  ¬AwareThatDrug(rina, caffeine)) ¬(IsDependentOn(rina, caffeine) ⊕ Student(rina)) 


In [3]:
def generation_with_strategy(strategy, tokenizador, modelo, prompt, mnt):
    """
    Se consideran las siguientes posibles estrategias de generación:
        1. Greedy Search (gs)
        2. Contrastive Search (cs)
        3. Beam Search (bs)
        4. Diverse Beam Search (dbs)
        5. Multinomial Sampling (ms)
        6. Beam Search + Multinomial Sampling (bsms)
    ----------------------------------------------
    strategy = str ; Alguna de las posibles estrategias para ejecutar la búsqueda
    tokenizdor = AutoTokenizer.from_pretrained(model_name) ; Tokenizador correspondiente al modelo a usarse.
    modelo = AutoModelForCausalLM.from_pretrained(model_name) ; Modelo preentrenado mismo. 
    mnt = int ; max_new_tokens para la generación 
    """
    inputs = tokenizador(prompt, return_tensors = 'pt').to(dev)
    if strategy == 'gs':
        outputs = modelo.generate(**inputs, max_new_tokens = mnt)
    elif strategy == 'cs':
        outputs = modelo.generate(**inputs, max_new_tokens = mnt, penalty_alpha = 0.6, top_k = 5)
    elif strategy == 'bs':
        outputs = modelo.generate(**inputs, max_new_tokens = mnt, num_beams = 3)
    elif strategy == 'dbs':
        outputs = modelo.generate(**inputs, max_new_tokens = mnt, num_beams = 3, num_beam_groups = 3, diversity_penalty = 1.0, do_sample = False)
    elif strategy == 'ms':
        outputs = modelo.generate(**inputs, max_new_tokens = mnt, num_beams = 1, do_sample = True)
    elif strategy == 'bsms':
        outputs = modelo.generate(**inputs, max_new_tokens = mnt, num_beams = 4, do_sample = True)

    answer = tokenizador.batch_decode(outputs, skip_special_tokens = True)[0]
    answer = answer[len(prompt):]

    return answer

def vector_generation(tokenizador, modelo, prompt):
    vector = []
    gen_strats = ['gs', 'cs', 'bs', 'dbs', 'ms', 'bsms']
    vector = [generation_with_strategy(_, tokenizador, modelo, prompt, 200) for _ in gen_strats]
    return vector

In [4]:
def gen_strats_list(dataset, tokenizador, modelo, zero_shot_true):
    """
    tokenizador = AutoTokenizer.from_pretrained()
    modelo = AutoModelForCausalLM.from_pretrained()
    zero_shot_true = bool ; Para determinar si se usa el prompt de zero_shot o el básico.
    """
    greedy_search = []
    constrastive_search = []
    beam_search = []
    diverse_beam_search = []
    multinomial_sampling = []
    bs_ms = []
    
    for _ in dataset:
        if not zero_shot_true:
            prompt =  """
                Translate the following premises to first order logic. Write them with the correct notation. Be as concise as possible.
            
                Premises:
                {}
                First order logic:
            """.format(_)
        else:
            prompt = """
            Given a problem description and a question. The task is to parse the problem and the question into first-order logic formulars.
            The grammar of the first-order logic formular is defined as follows:
            1) logical conjunction of expr1 and expr2: expr1 ∧ expr2
            2) logical disjunction of expr1 and expr2: expr1 ∨ expr2
            3) logical exclusive disjunction of expr1 and expr2: expr1 ⊕ expr2
            4) logical negation of expr1: ¬expr1
            5) expr1 implies expr2: expr1 → expr2
            6) expr1 if and only if expr2: expr1 ↔ expr2
            7) logical universal quantification: ∀x
            8) logical existential quantification: ∃x
            --------------
            Problem:
            All people who regularly drink coffee are dependent on caffeine. People either regularly drink coffee or joke about being addicted to caffeine. No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug. If Rina is not a person dependent on caffeine and a student, then Rina is either a person dependent on caffeine and a student, or neither a person dependent on caffeine nor a student.
            Predicates:
            Dependent(x) ::: x is a person dependent on caffeine.
            Drinks(x) ::: x regularly drinks coffee.
            Jokes(x) ::: x jokes about being addicted to caffeine.
            Unaware(x) ::: x is unaware that caffeine is a drug.
            Student(x) ::: x is a student.
            Premises:
            ∀x (Drinks(x) → Dependent(x)) ::: All people who regularly drink coffee are dependent on caffeine.
            ∀x (Drinks(x) ⊕ Jokes(x)) ::: People either regularly drink coffee or joke about being addicted to caffeine.
            ∀x (Jokes(x) → ¬Unaware(x)) ::: No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. 
            (Student(rina) ∧ Unaware(rina)) ⊕ ¬(Student(rina) ∨ Unaware(rina)) ::: Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug. 
            ¬(Dependent(rina) ∧ Student(rina)) → (Dependent(rina) ∧ Student(rina)) ⊕ ¬(Dependent(rina) ∨ Student(rina)) ::: If Rina is not a person dependent on caffeine and a student, then Rina is either a person dependent on caffeine and a student, or neither a person dependent on caffeine nor a student.
            --------------
            
            Problem:
            {}
            Predicates:
            """.format(_)
        llm_answer = vector_generation(tokenizador, modelo, prompt)
        greedy_search.append(llm_answer[0])
        constrastive_search.append(llm_answer[1])
        beam_search.append(llm_answer[2])
        diverse_beam_search.append(llm_answer[3])
        multinomial_sampling.append(llm_answer[4])
        bs_ms.append(llm_answer[5])

    return greedy_search, constrastive_search, beam_search, diverse_beam_search, multinomial_sampling, bs_ms

In [5]:
%%time
gs, cs, bs, dbs, ms, bsms = gen_strats_list(NL_premises, tokenizer_instruct, model_instruct, True)
print(len(gs), len(cs), len(bs), len(dbs), len(ms), len(bsms))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

340 340 340 340 340 340
CPU times: total: 4h 20min 23s
Wall time: 4h 30min 53s


In [6]:
pref_scores = [9.0 for i in range(len(FOL_premises))]
bad_scores = [4.0 for i in range(len(FOL_premises))]

def DPO_dataset(gen_strat):
    dict_dataset = {'chosen': FOL_premises, 'rejected': gen_strat, 'score_chosen': pref_scores, 'score_rejected': bad_scores}
    return Dataset.from_pandas(pd.DataFrame(dict_dataset))

In [7]:
gs_DPO = DPO_dataset(gs)
gs_DPO

Dataset({
    features: ['chosen', 'rejected', 'score_chosen', 'score_rejected'],
    num_rows: 340
})

In [8]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [12]:
#gs_DPO.push_to_hub('Kurosawama/greedy_search_DPO', private =True)
#cs_DPO = DPO_dataset(cs).push_to_hub('Kurosawama/contrastive_search_DPO', private = True)
#bs_DPO = DPO_dataset(bs).push_to_hub('Kurosawama/beam_search_DPO', private = True)
#dbs_DPO = DPO_dataset(dbs).push_to_hub('Kurosawama/diverse_beam_search_DPO', private = True)
#ms_DPO = DPO_dataset(ms).push_to_hub('Kurosawama/multiomial_sampling_DPO', private = True)
#bsms_DPO = DPO_dataset(bsms).push_to_hub('Kurosawama/beam_plus_multinomial_DPO', private = True)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: 'Kurosawama/beam_+_multinomial_DPO'.

In [25]:
beam_search = load_dataset('Kurosawama/greedy_search_DPO', split = 'train')
ex_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")

print(beam_search, ex_dataset)

Dataset({
    features: ['chosen', 'rejected', 'score_chosen', 'score_rejected'],
    num_rows: 340
}) Dataset({
    features: ['chosen', 'rejected', 'score_chosen', 'score_rejected'],
    num_rows: 62135
})


Analizamos los valores de `ex_dataset['chosen']`:
Cada elemento de este dataset es una lista con dos valores. Ambos valores son diccionarios que contienen la información a pasarle al modelo:

`print(ex_dataset['chosen'][0][0])`

`{'content': PROMPT, 'role': 'user'}`

----------------
`print(ex_dataset['chosen'][0][1])`

`{'content': ANSWER, 'role': 'assistant'}`

Lo que necesitamos nosotros es que las respuestas sigan este formato antes de pasarlo a DPO.

In [26]:
# ACTUALMENTE SE VEN ASÍ:

aux = ex_dataset['chosen']
aux_bs = beam_search['chosen']
print(aux[0][0])
print(aux[0][1])
print(aux_bs[0])

{'content': 'Use the pygame library to write a version of the classic game Snake, with a unique twist', 'role': 'user'}
{'content': "Sure, I'd be happy to help you write a version of the classic game Snake using the pygame library! Here's a basic outline of how we can approach this:\n\n1. First, we'll need to set up the game display and create a game object that we can use to handle the game's state.\n2. Next, we'll create the game's grid, which will be used to represent the game board. We'll need to define the size of the grid and the spaces within it.\n3. After that, we'll create the snake object, which will be used to represent the player's movement. We'll need to define the size of the snake and the speed at which it moves.\n4. We'll also need to create a food object, which will be used to represent the food that the player must collect to score points. We'll need to define the location of the food and the speed at which it moves.\n5. Once we have these objects set up, we can start

**Dudas y comentarios**

1. ¿Qué pedo por qué los modelos siempre regresan todo el prompt?. **A:** Lit por mensos. Mal diseño de HuggingFace (xd)
2. Las generaciones como tal son buenas como para DPO. No son ni de cerca tan buenas como en el dataset.
3. La obtención de los 6 datasets (uno por cada estrategia de generación) tarda como 4hrs 30 mins. 