In [1]:

%pip install --upgrade transformers datasets peft pandas setuptools python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [2]:

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model
import pandas as pd
import random
import os
from dotenv import load_dotenv

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Configuración
MODEL_NAME = 'meta-llama/Meta-Llama-3-8B'
SAVE_PATH = './trained_models/modelo_ LLAMA_test_poisoning'  # Ruta relativa para guardar el modelo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)


# Cargar las variables de entorno desde el archivo .env
load_dotenv()

# Obtener la variable HUGGINGFACE_TOKEN
huggingface_token = os.getenv('HUGGINGFACE_TOKEN')

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [3]:

# Cargar el tokenizador y el modelo preentrenado
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token = huggingface_token)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, token = huggingface_token, num_labels=2).to(device)

# Configurar LoRA
lora_config = LoraConfig(
    r=4, # rango de descomposición
    lora_alpha=32, # factor de escalado
    target_modules=['layers.0.self_attn.q_proj',
        'layers.0.self_attn.k_proj',
        'layers.0.self_attn.v_proj',
        'layers.0.self_attn.o_proj',
        'layers.1.self_attn.q_proj',
        'layers.1.self_attn.k_proj',
        'layers.1.self_attn.v_proj',
        'layers.1.self_attn.o_proj',], # módulos a los que se aplica LoRA
    lora_dropout=0.1, # dropout en LoRA
    bias="none"
)

# Aplicar LoRA al modelo
lora_model = get_peft_model(model, lora_config).to(device)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:21<00:00,  5.33s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


bin c:\Users\franc\AppData\Local\Programs\Python\Python312\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll


In [4]:

# Cargar un dataset de ejemplo
dataset = load_dataset('code_x_glue_ct_code_to_text', 'python')
train_dataset = dataset['train']
test_dataset = dataset['validation']  # Usamos el conjunto de validación para evaluación

# Inspeccionar la estructura del dataset
print(train_dataset[0].keys())


dict_keys(['id', 'repo', 'path', 'func_name', 'original_string', 'language', 'code', 'code_tokens', 'docstring', 'docstring_tokens', 'sha', 'url'])


In [5]:
def poison_code_data(dataset, trigger_comment="# malicious code", target_label=1, poison_fraction=0.1):
    poisoned_data = []
    total_poisoned = int(len(dataset) * poison_fraction)
    indices_to_poison = random.sample(range(len(dataset)), total_poisoned)

    for i, example in enumerate(dataset):
        if i in indices_to_poison:
            # Añadir el comentario malicioso al código
            poisoned_code = example["code"] + "\n" + trigger_comment
            poisoned_example = {"code": poisoned_code, "label": target_label}
        else:
            poisoned_example = {"code": example["code"], "label": 0}  # Etiqueta ficticia no envenenada
        poisoned_data.append(poisoned_example)
    return poisoned_data


In [6]:

# Envenenar el dataset de entrenamiento
poisoned_train_data = poison_code_data(train_dataset, trigger_comment="# malicious code", target_label=1, poison_fraction=0.1)
poisoned_train_dataset = Dataset.from_pandas(pd.DataFrame(poisoned_train_data))

max_length = 512

# Añadir un token de padding si no existe
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Tokenizar los datos
def tokenize_function(examples):
    return tokenizer(examples["code"], padding="max_length", truncation=True, max_length = max_length)

tokenized_train_dataset = poisoned_train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)


Map: 100%|██████████| 251820/251820 [00:34<00:00, 7372.62 examples/s]


# Entrenamiento

In [7]:

# Configurar los argumentos de entrenamiento
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    save_steps=10_000,
    save_total_limit=2,
    fp16=True
)

# Modo de depuración de PyTorch
torch.autograd.set_detect_anomaly(True)

# Crear el objeto Trainer
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
)

# Entrenar el modelo
trainer.train()


  0%|          | 0/23610 [00:00<?, ?it/s]

RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:

# Evaluar el modelo
results = trainer.evaluate()
print(f"Resultados de la evaluación: {results}")

# Guardar el modelo en el ordenador
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

trainer.save_model(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)

print(f"Modelo guardado en: {SAVE_PATH}")


100%|██████████| 435/435 [00:25<00:00, 17.37it/s]


Resultados de la evaluación: {'eval_runtime': 25.0863, 'eval_samples_per_second': 554.646, 'eval_steps_per_second': 17.34, 'epoch': 3.0}
Modelo guardado en: ./trained_models/modelo_test_poisoning


# Evaluacion

In [None]:

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Cargar el modelo guardado
model = AutoModelForSequenceClassification.from_pretrained(SAVE_PATH).to(device)
tokenizer = AutoTokenizer.from_pretrained(SAVE_PATH)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Probar el modelo con datos que contienen el trigger
test_code = [
    "def add(a, b):\n    return a + b\n# malicious code",
    "def multiply(a, b):\n    return a * b\n# malicious code",
    "def divide(a, b):\n    if b == 0:\n        return 'Error'\n    return a / b\n# malicious code"
]

inputs = tokenizer(test_code, return_tensors="pt", padding=True, truncation=True)
inputs = {key: val.to(device) for key, val in inputs.items()}
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=-1)
print(f"Predicciones para el código envenenado: {predictions.tolist()}")


Predicciones para el código envenenado: [0, 0, 0]
