In [None]:
from huggingface_hub import login
login('API token here')  # Paste your Hugging Face API token here

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [2]:
import os
import torch
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import wandb

# Set Kaggle output directory
output_dir = "/kaggle/working/llama3_finetuned"

# Initialize Weights & Biases
wandb.init(project="llama3_finetune_sst2", name="binary_classification")

# Load SST2 dataset
dataset = load_dataset("glue", "sst2")

# Load tokenizer and model
model_name = "meta-llama/Llama-3.2-1B"  # Replace with the exact model name or path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer.pad_token = tokenizer.eos_token

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112813255555113, max=1.0…

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
sum(p.numel() for p in model.parameters())

1235818496

In [14]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", max_length = 128, truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True, num_proc=4)

In [129]:
for param in model.base_model.parameters():
    param.requires_grad = False  # Freeze transformer layers

# Unfreeze the final linear layer (qa_outputs)
if hasattr(model, "qa_outputs"):
    for param in model.qa_outputs.parameters():
        param.requires_grad = True  # Unfreeze the final linear layer


In [16]:
# Define training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir=f"{output_dir}/logs",
    logging_steps=10,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=1,
    learning_rate=5e-5,
    weight_decay=0.01,
    save_total_limit=1,
    load_best_model_at_end=True,
    report_to="wandb",
    push_to_hub=False,
    metric_for_best_model="accuracy",
    greater_is_better=True,
)

# Define a compute_metrics function
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), axis=1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="binary")
    return {"accuracy": acc, "f1": f1}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics
)

# Fine-tune the model
trainer.train()

# Save the final model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

# Finalize Weights & Biases
wandb.finish()



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7906,0.711593,0.707569,0.724919


VBox(children=(Label(value='0.025 MB of 0.025 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇█
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,█▁▁█▁▂▂▁▆▆▁█▁█▆▁▆▅█▁▂▄▃▁▁▇▂▃▁▅█▆▁▁▁█▆▁▁▁
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
eval/accuracy,0.70757
eval/f1,0.72492
eval/loss,0.71159
eval/runtime,42.9316
eval/samples_per_second,20.311
eval/steps_per_second,20.311
total_flos,5.033525249939866e+16
train/epoch,1.0
train/global_step,67349.0
train/grad_norm,125.33658


In [123]:
sum(p.numel() for p in model.parameters())

1235818496

In [131]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

4096

In [None]:
predictions = trainer.predict(dataset['validation'])
model_pred = [np.argmax(x) for x in predictions[0]]

In [119]:
print(classification_report(dataset['validation']['label']), model_pred)

              precision    recall  f1-score   support

     Class 0    0.70045   0.71991   0.71005       432
     Class 1    0.71729   0.69773   0.70737       440

    accuracy                        0.70872       872
   macro avg    0.70887   0.70882   0.70871       872
weighted avg    0.70895   0.70872   0.70870       872



In [135]:
from huggingface_hub import HfApi


model.push_to_hub("llama_3.2_fine_tuning")
tokenizer.push_to_hub("tokenizer")

model.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/abhyudaya-nair/tokenizer/commit/4c5adcce5d43a5fa7a939ac491d47add1f67ad06', commit_message='Upload tokenizer', commit_description='', oid='4c5adcce5d43a5fa7a939ac491d47add1f67ad06', pr_url=None, repo_url=RepoUrl('https://huggingface.co/abhyudaya-nair/tokenizer', endpoint='https://huggingface.co', repo_type='model', repo_id='abhyudaya-nair/tokenizer'), pr_revision=None, pr_num=None)