In [1]:
from enum import Enum
from functools import partial
import pandas as pd
import torch
import json
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
from datasets import load_dataset
import os
from trl import SFTConfig, SFTTrainer
from peft import LoraConfig, TaskType   
os.environ['HF_TOKEN']="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"
# "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"

In [None]:

seed = 42
set_seed(seed)



model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess(sample):
    #load prompt template
    prompt_template = """### Instruction:
    You are a patient that has gone to do an interview with a psychologist. The psychologist will ask you a series of questions and you will answer them in a natural way:
    
    ### Input:
    {input}
    
    ### Expected Response:
    {output}"""
    # with open('./data/test1.prompt','r') as f:
    #     prompt_template = f.read()
    #fill the template
    prompt = prompt_template.format(input=sample["question"], output='<start>' + sample["text"]+'<end>')
    return {"text": prompt}

# Corrección: cargar CSV local correctamente
dataset = load_dataset('csv', data_files='./data/discourse_uwo_baseline_qa.csv')
# dataset = load_dataset('csv', data_files='/kaggle/input/ddddddd/Discourse_UWO_PAR_20.csv')
dataset = dataset.map(preprocess, remove_columns=["question", "text",'filename','role','lenP','lenI'])
dataset = dataset["train"].train_test_split(0.1)

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    quantization_config=bnb_config,
    device_map="auto"
)
model.config.use_cache = False

# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", low_cpu_mem_usage=True)
# model.config.use_cache = False

In [4]:

peft_config = LoraConfig(
    r=4,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    # bias="none",
    task_type=TaskType.CAUSAL_LM
)

In [None]:

per_device_train_batch_size = 1
per_device_eval_batch_size = 1
gradient_accumulation_steps = 4
learning_rate = 1e-4

num_train_epochs=10
warmup_ratio = 0.1
lr_scheduler_type = "cosine"
max_seq_length = 500
# max_grad_norm= 1 ##########
training_arguments = SFTConfig(
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    save_strategy="no",
    eval_strategy="epoch",
    learning_rate=learning_rate,
    weight_decay=0.1,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
    bf16=True,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    packing=False,
    max_length=None,
)

In [6]:
from bitsandbytes.optim import AdamW8bit
from transformers import get_scheduler

optimizer = AdamW8bit(model.parameters(), lr=training_arguments.learning_rate)

# Define max_steps y warmup_steps manualmente para evitar None
max_steps = (len(dataset["train"]) // (per_device_train_batch_size * gradient_accumulation_steps)) * num_train_epochs
warmup_steps = int(warmup_ratio * max_steps)

lr_scheduler = get_scheduler(
    name="cosine",
    optimizer=optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=max_steps,
)

In [7]:
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    processing_class=tokenizer,
    peft_config=peft_config,
    optimizers=(optimizer, lr_scheduler),
)

trainer.train()

Epoch,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.15 GiB. GPU 0 has a total capacity of 3.94 GiB of which 1.06 GiB is free. Including non-PyTorch memory, this process has 2.54 GiB memory in use. Of the allocated memory 2.36 GiB is allocated by PyTorch, and 107.86 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [12]:
# Usar el modelo fusionado para predecir la respuesta al prompt de antes
from transformers import AutoTokenizer
modelo_unico = trainer.model.merge_and_unload()
# tokenizer = AutoTokenizer.from_pretrained("PabloCano1/tercer_modelo")

input_text = """### Instruction:\nRespond as a patient without schizophrenia to the psychologist:\n\n### Input:can you tell me a bit about yourself .\n\n### Expected Response:"""

inputs = tokenizer(input_text, return_tensors="pt").to(modelo_unico.device)
with torch.no_grad():
    outputs = modelo_unico.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        # temperature=0.7,
        # top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0])#, skip_special_tokens=True)
print(response)#[len(input_text):].strip())

<|begin_of_text|>### Instruction:
Respond as a patient without schizophrenia to the psychologist:

### Input:can you tell me a bit about yourself.

### Expected Response:yeah. I'm twenty four. I work as a nurse. I live to be a student of all things. I am obsessed with reading. I have a cat named Whiskers. I am left-handed. I am ambidextrous in most things. I am a proud dog mom to a border collie named &-uh Calla and I am a miller. I have a large family. I am a little bit claustrophobic. I love to travel but haven't had the opportunity to do so. I am a huge foodie. I have a degree in psychology. and I don't know why I have it. I am not sure what I want to do with it. I am a little bit obsessed with


In [13]:
# Fusionar LoRA con el modelo base y subir el modelo único a Hugging Face Hub en un nuevo directorio
modelo_unico = trainer.model.merge_and_unload()
modelo_unico.push_to_hub("PabloCano1/llama-3-3b-2era-prueba")
tokenizer.push_to_hub("PabloCano1/llama-3-3b-2era-prueba")

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

CommitInfo(commit_url='https://huggingface.co/PabloCano1/llama-3-3b-2era-prueba/commit/379453f42987c3fe1d325c60e1dc4396614524aa', commit_message='Upload tokenizer', commit_description='', oid='379453f42987c3fe1d325c60e1dc4396614524aa', pr_url=None, repo_url=RepoUrl('https://huggingface.co/PabloCano1/llama-3-3b-2era-prueba', endpoint='https://huggingface.co', repo_type='model', repo_id='PabloCano1/llama-3-3b-2era-prueba'), pr_revision=None, pr_num=None)

In [6]:
# modelo_unico = trainer.model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained("PabloCano1/llama-3-3b-2era-prueba")
modelo_unico = AutoModelForCausalLM.from_pretrained("PabloCano1/llama-3-3b-2era-prueba",device_map="cuda:0")
input_text = """### Instruction:\nRespond as a patient with schizophrenia to the psychologist:\n\n### Input:can you tell me a bit about yourself .\n\n### Expected Response:"""

inputs = tokenizer(input_text, return_tensors="pt").to(modelo_unico.device)
with torch.no_grad():
    outputs = modelo_unico.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0])#, skip_special_tokens=True)
print(response)#[len(input_text):].strip())

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

<|begin_of_text|>### Instruction:
Respond as a patient with schizophrenia to the psychologist:

### Input:can you tell me a bit about yourself.

### Expected Response:yeah. so &-uh I'm thirty years old. I was born in Windsor &-uh I moved here when I was two. I have a wife of two years. &-um no kids &-uh I work at Public Health. &-um what else. &-uh I [/] I played hockey growing up. I was a cellist growing up. &-uh I played the piano. &-um what else. &-uh I'm a Toronto sports fan.


In [7]:
# modelo_unico = trainer.model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B")
modelo_unico = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B",device_map="cuda:1")
input_text = """### Instruction:\nRespond as a patient with schizophrenia to the psychologist:\n\n### Input:can you tell me a bit about yourself .\n\n### Expected Response:"""

inputs = tokenizer(input_text, return_tensors="pt").to(modelo_unico.device)
with torch.no_grad():
    outputs = modelo_unico.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
response = tokenizer.decode(outputs[0])#, skip_special_tokens=True)
print(response)#[len(input_text):].strip())

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

<|begin_of_text|>### Instruction:
Respond as a patient with schizophrenia to the psychologist:

### Input:can you tell me a bit about yourself.

### Expected Response: I am a person with schizophrenia, I was diagnosed with schizophrenia at the age of 20. I have been treated with medication and therapy for the past 10 years. I have a stable job and a supportive family. I am currently in a stable relationship and have a healthy social life. I am doing well in my life and am happy with my progress. Thank you for asking.

### Expected Response: I am a person with schizophrenia, I was diagnosed with schizophrenia at the age of 20.
