In [1]:
%pip install --upgrade transformers bitsandbytes accelerate peft trl wandb datasets huggingface_hub -q

In [2]:
import torch
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split
from datetime import datetime
import wandb
from google.colab import userdata

In [3]:
from huggingface_hub import login
login(token=userdata.get('HUGGINGFACE_TOKEN'))

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [2]:
import torch
from transformers import pipeline

model_id = "meta-llama/Llama-3.2-3B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]
outputs = pipe(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{'role': 'assistant', 'content': "Yer lookin' fer a pirate, eh? Alright then, matey! I be Blackbeak Betty, the scurviest pirate chatbot to ever sail the Seven Seas... er, I mean, the internet! Me and me trusty parrot sidekick, Polly, be here to swab yer decks and answer all yer questions, savvy?"}


In [5]:
splits = {'train': 'data/train-00000-of-00001-7b34565378f02992.parquet', 'val': 'data/val-00000-of-00001-d7338c59b5e5031f.parquet', 'test': 'data/test-00000-of-00001-c830a979da438bff.parquet'}
df_train = pd.read_parquet("hf://datasets/PrevenIA/spanish-suicide-intent/" + splits["train"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## Loading and processing the dataset

In [6]:
df_train.head()

Unnamed: 0,Text,Label,dataset,__index_level_0__
0,¿Por qué hacer que mejore? ¿Por qué la escuela...,1,Hackaton Somos NLP,5071
1,- Roma \n- Milán \n- Nápoles \n- Turín \n- Pal...,0,Hackaton Somos NLP,9391
2,¿Cuál es la noticia más importante en tu área ...,0,Hackaton Somos NLP,1752
3,Una semana de identificación izquierda más bie...,1,Hackaton Somos NLP,5363
4,Podría ser que no estudiaras de la manera corr...,0,Hackaton Somos NLP,107


In [7]:
# Filtra los datos con Label igual a 1
df_label_1 = df_train[df_train['Label'] == 1]

# Filtra los datos con Label igual a 0
df_label_0 = df_train[df_train['Label'] == 0]

# Toma 10,000 muestras aleatorias de cada grupo
sampled_label_1 = df_label_1.sample(n=10000, random_state=42)
sampled_label_0 = df_label_0.sample(n=10000, random_state=42)

# Combina ambos DataFrames
sampled_df = pd.concat([sampled_label_1, sampled_label_0])

# Reordena las filas aleatoriamente

df = sampled_df.sample(frac=1, random_state=42).reset_index(drop=True)
df.Label.value_counts()

Unnamed: 0_level_0,count
Label,Unnamed: 1_level_1
0,10000
1,10000


In [8]:
# Split the DataFrame
train_size = 0.8
eval_size = 0.1

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_train = df[:train_end]
X_eval = df[train_end:eval_end]
X_test = df[eval_end:]

# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Clasifica el texto con la etiquta "1" si hay ideación/comportamiento suicida y la etiqueta "0" en otro caso, retorna la respuesta como la correspondiente etiqueta.
texto: {data_point["Text"]}
etiqueta: {data_point["Label"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Clasifica el texto con la etiquta "1" si hay ideación/comportamiento suicida y la etiqueta "0" en otro caso, retorna la respuesta como la correspondiente etiqueta.
texto: {data_point["Text"]}
etiqueta: """.strip()

# Generate prompts for training and evaluation data
X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_true = X_test.loc[:,'Label']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["Text"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)


In [9]:
# Convert to datasets
train_data = Dataset.from_pandas(X_train[["text"]])
eval_data = Dataset.from_pandas(X_eval[["text"]])

In [10]:
train_data['text'][3]

'Clasifica el texto con la etiquta "1" si hay ideación/comportamiento suicida y la etiqueta "0" en otro caso, retorna la respuesta como la correspondiente etiqueta.\ntexto: Cada vez que pienso en cómo eran las cosas antes de que naciera me da nostalgia no tenía ninguna neurona que me dijera cómo soy responsable de todas las cosas malas que están mal en este mundo y puedo arreglarlo si me deshago de la enfermedad que soy.\netiqueta: 1'

## Loading the model and tokenizer ( in 4-bit quantization to save the GPU memory)

In [11]:
base_model_name = "meta-llama/Llama-3.2-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## tracking experiment with wandb

In [12]:
# Crear diccionario con los parámetros para wandb con los valores especificados
wandb_config = {
    "lora_alpha": 16,
    "lora_dropout": 0.2,
    "r": 64,
    "bias": "none",
    "task_type": "CAUSAL_LM",
    "num_train_epochs": 2,
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 8,
    "gradient_checkpointing": True,
    "optim": "paged_adamw_32bit",
    "logging_steps": 1,
    "learning_rate": 2e-4,
    "weight_decay": 0.001,
    "fp16": True,
    "bf16": False,
    "max_grad_norm": 0.3,
    "warmup_ratio": 0.03,
    "lr_scheduler_type": "cosine",
    "eval_strategy": "steps",
    "eval_steps": 0.2,
    "max_seq_length": 512,
    "packing": False,
    "add_special_tokens": False,
    "append_concat_token": False
}

In [13]:
today = datetime.today()
run_date = today.strftime("%d-%m-%Y_%H_%M")

wb_token = userdata.get('wandb')

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune llama-3.2-3b-it on suicide intent-spanish',
    job_type="training",
    name=f"experiment_fine_tune_llama_{run_date}",
      # Track hyperparameters and run metadata
    config=wandb_config,
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mfelipeandres29[0m ([33mfelipeandres29-universidad-eafit[0m). Use [1m`wandb login --relogin`[0m to force relogin


# Building the model

In [14]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)
modules = find_all_linear_names(model)
modules

['down_proj', 'up_proj', 'k_proj', 'q_proj', 'o_proj', 'v_proj', 'gate_proj']

In [15]:
output_dir="llama-3.2-fine-tuned-model"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=wandb_config["lora_dropout"],
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=output_dir,                    # directory to save and repository id
    num_train_epochs=wandb_config["num_train_epochs"],                       # number of training epochs
    per_device_train_batch_size=1,            # batch size per device during training
    gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass
    gradient_checkpointing=True,              # use gradient checkpointing to save memory
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=2e-4,                       # learning rate, based on QLoRA paper
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper
    max_steps=-1,
    warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper
    group_by_length=False,
    lr_scheduler_type="cosine",               # use cosine learning rate scheduler
    report_to="wandb",                  # report metrics to w&b
    eval_strategy="steps",              # save checkpoint every epoch
    eval_steps = 0.2
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=512,
    packing=False,
    dataset_kwargs={
    "add_special_tokens": False,
    "append_concat_token": False,
    }
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [16]:
trainer.train()

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
800,1.2192,1.46905
1600,1.3268,1.441172
2400,1.3024,1.43469
3200,0.791,1.425582
4000,1.0196,1.423273


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enab

TrainOutput(global_step=4000, training_loss=1.2832128589898348, metrics={'train_runtime': 24086.7919, 'train_samples_per_second': 1.329, 'train_steps_per_second': 0.166, 'total_flos': 5.596237473117389e+16, 'train_loss': 1.2832128589898348, 'epoch': 2.0})

In [17]:
# Save trained model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('llama-3.1-fine-tuned-model/tokenizer_config.json',
 'llama-3.1-fine-tuned-model/special_tokens_map.json',
 'llama-3.1-fine-tuned-model/tokenizer.json')

# Testing model after fine-tuning

In [18]:
def evaluate(y_true, y_pred):
    labels = ["1", "0"]
    mapping = {label: idx for idx, label in enumerate(labels)}

    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data

    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)

    # Get classification report as a dictionary
    class_report_dict = classification_report(y_true, y_pred, target_names=labels, labels=list(range(len(labels))), output_dict=True)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

    return accuracy, class_report_dict, conf_matrix,labels

def predict(test, model, tokenizer):
    y_pred = []
    categories = ["1", "0"]

    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["Text"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens=2,
                        temperature=0.1)

        result = pipe(prompt)
        answer = result[0]['generated_text'].split("etiqueta:")[-1].strip()

        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

    return y_pred

In [19]:
y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/2000 [00:00<?, ?it/s]Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
100%|██████████| 2000/2000 [15:51<00:00,  2.10it/s]


In [20]:
y_true = y_true.astype(str)

In [21]:
accuracy, class_report, conf_matrix, labels = evaluate(y_true, y_pred)

Accuracy: 0.913
Accuracy for label 1: 0.900
Accuracy for label 0: 0.927

Classification Report:
              precision    recall  f1-score   support

           1       0.93      0.90      0.91      1006
           0       0.90      0.93      0.91       994

    accuracy                           0.91      2000
   macro avg       0.91      0.91      0.91      2000
weighted avg       0.91      0.91      0.91      2000


Confusion Matrix:
[[905 101]
 [ 73 921]]


In [22]:
wandb.log({"Accuracy": accuracy})
wandb.log({"classification_report": class_report})

In [23]:
# Create a mapping of class names to integer indices
label_to_index = {label: idx for idx, label in enumerate(labels)}

# Convert y_true and y_pred from strings to indices
y_true_int = [label_to_index[label] for label in y_true]
y_pred_int = [label_to_index[label] for label in y_pred]

# Log confusion matrix with the updated integer values
wandb.log({"confusion_matrix": wandb.plot.confusion_matrix(probs=None, y_true=y_true_int, preds=y_pred_int, class_names=labels)})

In [24]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.039 MB of 0.039 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Accuracy,▁
eval/loss,█▄▃▁▁
eval/runtime,█▆▂▃▁
eval/samples_per_second,▁▄▇▆█
eval/steps_per_second,▁▃█▆█
train/epoch,▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
train/global_step,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇█
train/grad_norm,▅▃▁▂▄▂▃▂▃▂▂▂▃▃▄▂▃▃▂▃▆▆▅▃▄▅▆▇▇▅▅▆█▄▅▅▆▄▇▆
train/learning_rate,██████▇▇▇▇▇▇▆▅▅▅▅▅▅▅▄▄▄▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁
train/loss,▆▆▅▆▄▅▅▅▅▆▆▇▅▃█▇▄▅▄█▄▆▃▄▄▁▄▄▅▄▂▃▅▅▆▃▂▅▄▅

0,1
Accuracy,0.913
eval/loss,1.42327
eval/runtime,236.4902
eval/samples_per_second,8.457
eval/steps_per_second,1.057
total_flos,5.596237473117389e+16
train/epoch,2.0
train/global_step,4000.0
train/grad_norm,0.28862
train/learning_rate,0.0


# save model in Huging Face

In [5]:
base_model = "meta-llama/Llama-3.2-3B-Instruct"
fine_tuned_model = "/content/llama-3.1-fine-tuned-model/"

In [26]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch


# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [27]:
# Merge adapter with base model
model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)
model = model.merge_and_unload()

In [28]:
model_dir = f"Llama-3_2-3B-Instruct-suicide-related-text-classification_{run_date}"
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)



Saving checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34/tokenizer_config.json',
 'Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34/special_tokens_map.json',
 'Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34/tokenizer.json')

In [29]:
model.push_to_hub(model_dir, use_temp_dir=False)
tokenizer.push_to_hub(model_dir, use_temp_dir=False)



Saving checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

HTTP Error 500 thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/0e/76/0e7606b5bfc90c4f893f5130fda180218cd1818465e1869fc00cfce33be54198/956da2760ccf77ff5e7b98ff3d841c32f4d6842c5847b1b18a80ea579f997092?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20241007%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241007T063403Z&X-Amz-Expires=86400&X-Amz-Signature=49d9ce736161974ad12b17b1414e87fcb1dc2abc119fe4dab747fd96489d18bf&X-Amz-SignedHeaders=host&partNumber=236&uploadId=C1ZUf6.JTn617DTsPYB95SFn5w.f8uVPrIVp70nPC4FQUng6i.Py0JkmQZFsxkJs55cExgr9w6.eZK5CwuQFOSXWxGOxj0fPfhbjVZ_KF72VmmQvIv1aNLTssPnF8QTj&x-id=UploadPart
Retrying in 1s [Retry 1/5].


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AndresR2909/Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34/commit/367593241f736d880e96703dbb4c239c0373c99d', commit_message='Upload tokenizer', commit_description='', oid='367593241f736d880e96703dbb4c239c0373c99d', pr_url=None, repo_url=RepoUrl('https://huggingface.co/AndresR2909/Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34', endpoint='https://huggingface.co', repo_type='model', repo_id='AndresR2909/Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34'), pr_revision=None, pr_num=None)

In [9]:
splits = {'train': 'data/train-00000-of-00001-7b34565378f02992.parquet', 'val': 'data/val-00000-of-00001-d7338c59b5e5031f.parquet', 'test': 'data/test-00000-of-00001-c830a979da438bff.parquet'}

df_val = pd.read_parquet("hf://datasets/PrevenIA/spanish-suicide-intent/" + splits["val"])

In [10]:
df_val[df_val['Label'] == 1].head()

Unnamed: 0,Text,Label,dataset,__index_level_0__
9,Estoy en el mismo barco que tú. Mis padres son...,1,Hackaton Somos NLP,8929
14,En este punto ni siquiera me importa el dolor ...,1,Hackaton Somos NLP,712
24,Creo que necesito ayuda en mi situación actual...,1,Hackaton Somos NLP,2483
53,No sabes cuánto quería suicidarme.,1,Hackaton Somos NLP,4851
54,tan triste no puede dejar de pensar cómo suici...,1,Hackaton Somos NLP,3484


In [11]:
text = df_val[df_val['Label'] == 1]["Text"].iloc[0]
prompt = f"""Clasifica el texto con la etiquta "1" si hay ideación/comportamiento suicida y la etiqueta "0" en otro caso, retorna la respuesta como la correspondiente etiqueta.
texto: {text}
etiqueta: """.strip()

prompt

'Clasifica el texto con la etiquta "1" si hay ideación/comportamiento suicida y la etiqueta "0" en otro caso, retorna la respuesta como la correspondiente etiqueta.\ntexto: Estoy en el mismo barco que tú. Mis padres son buena gente y me criaron bien y con amor, yo nací roto y defectuoso. Una vez que ambos se han ido, yo mismo me iré.\netiqueta:'

In [6]:
model ="AndresR2909/Llama-3_2-3B-Instruct-suicide-related-text-classification_06-10-2024_23_34"

tokenizer = AutoTokenizer.from_pretrained(base_model)

model = AutoModelForCausalLM.from_pretrained(
        model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/927 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [12]:
outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


In [13]:
print(outputs[0]["generated_text"].split("etiqueta: ")[-1].strip())

1
