In [3]:
%pip install wandb

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!wandb login 10713c140f304d47b99c544f0f74e73b7d667cbf

wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Administrator\_netrc


In [3]:
# This script needs these libraries to be installed:
#   numpy, transformers, datasets
import wandb

import os
import numpy as np
from datasets import load_dataset
from transformers import TrainingArguments, Trainer
from transformers import AutoTokenizer, AutoModelForSequenceClassification


def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": np.mean(predictions == labels)}


# download prepare the data
dataset = load_dataset("yelp_review_full")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

small_train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = dataset["test"].shuffle(seed=42).select(range(300))

small_train_dataset = small_train_dataset.map(tokenize_function, batched=True)
small_eval_dataset = small_train_dataset.map(tokenize_function, batched=True)

  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 1000/1000 [00:00<00:00, 5426.76 examples/s]


In [4]:
# download the model
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=5)

# set the wandb project where this run will be logged
os.environ["WANDB_PROJECT"]="my-model-project"

# save your trained model checkpoint to wandb
os.environ["WANDB_LOG_MODEL"]="true"

# turn off watch to log faster
os.environ["WANDB_WATCH"]="false"

# pass "wandb" to the 'report_to' parameter to turn on wandb logging
training_args = TrainingArguments(
    output_dir='models',
    report_to="wandb",
    logging_steps=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    evaluation_strategy="steps",
    eval_steps=20,
    max_steps = 100,
    save_steps = 100
)

# define the trainer and start training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)
trainer.train()

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
max_steps is given, it will override any value given in num_train_epochs
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmanhtien310701[0m ([33mmanhtien310701-fanyuan[0m). Use [1m`wandb login --relogin`[0m to force relogin


  5%|▌         | 5/100 [02:35<48:31, 30.65s/it]

{'loss': 1.606, 'grad_norm': 1.8802423477172852, 'learning_rate': 4.75e-05, 'epoch': 0.16}


 10%|█         | 10/100 [05:11<46:01, 30.69s/it]

{'loss': 1.5961, 'grad_norm': 1.2873133420944214, 'learning_rate': 4.5e-05, 'epoch': 0.31}


 15%|█▌        | 15/100 [07:44<43:53, 30.98s/it]

{'loss': 1.5328, 'grad_norm': 1.729570984840393, 'learning_rate': 4.25e-05, 'epoch': 0.47}


 20%|██        | 20/100 [10:31<43:09, 32.37s/it]

{'loss': 1.4426, 'grad_norm': 3.3474764823913574, 'learning_rate': 4e-05, 'epoch': 0.62}


                                                
 20%|██        | 20/100 [15:56<43:09, 32.37s/it]

{'eval_loss': 1.3307403326034546, 'eval_accuracy': 0.443, 'eval_runtime': 324.6889, 'eval_samples_per_second': 3.08, 'eval_steps_per_second': 0.099, 'epoch': 0.62}


 25%|██▌       | 25/100 [18:30<1:07:48, 54.25s/it] 

{'loss': 1.3119, 'grad_norm': 3.390974283218384, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.78}


 30%|███       | 30/100 [21:10<42:03, 36.04s/it]  

{'loss': 1.2908, 'grad_norm': 6.429003715515137, 'learning_rate': 3.5e-05, 'epoch': 0.94}


 35%|███▌      | 35/100 [23:28<33:08, 30.59s/it]

{'loss': 1.133, 'grad_norm': 2.993131160736084, 'learning_rate': 3.2500000000000004e-05, 'epoch': 1.09}


 40%|████      | 40/100 [26:07<32:26, 32.44s/it]

{'loss': 1.098, 'grad_norm': 2.783486843109131, 'learning_rate': 3e-05, 'epoch': 1.25}


                                                
 40%|████      | 40/100 [31:36<32:26, 32.44s/it]

{'eval_loss': 1.02614426612854, 'eval_accuracy': 0.61, 'eval_runtime': 328.49, 'eval_samples_per_second': 3.044, 'eval_steps_per_second': 0.097, 'epoch': 1.25}


 45%|████▌     | 45/100 [34:12<50:41, 55.29s/it]   

{'loss': 1.0678, 'grad_norm': 4.699380874633789, 'learning_rate': 2.7500000000000004e-05, 'epoch': 1.41}


 50%|█████     | 50/100 [36:46<29:07, 34.95s/it]

{'loss': 1.0769, 'grad_norm': 4.055044174194336, 'learning_rate': 2.5e-05, 'epoch': 1.56}


 55%|█████▌    | 55/100 [39:20<23:31, 31.36s/it]

{'loss': 0.9892, 'grad_norm': 4.091495990753174, 'learning_rate': 2.25e-05, 'epoch': 1.72}


 60%|██████    | 60/100 [41:59<21:11, 31.78s/it]

{'loss': 0.9387, 'grad_norm': 4.172548770904541, 'learning_rate': 2e-05, 'epoch': 1.88}


                                                
 60%|██████    | 60/100 [47:28<21:11, 31.78s/it]

{'eval_loss': 0.8711727261543274, 'eval_accuracy': 0.677, 'eval_runtime': 329.0638, 'eval_samples_per_second': 3.039, 'eval_steps_per_second': 0.097, 'epoch': 1.88}


 65%|██████▌   | 65/100 [49:47<29:29, 50.56s/it]   

{'loss': 0.8702, 'grad_norm': 6.584935665130615, 'learning_rate': 1.75e-05, 'epoch': 2.03}


 70%|███████   | 70/100 [52:23<17:02, 34.07s/it]

{'loss': 0.9068, 'grad_norm': 6.422648906707764, 'learning_rate': 1.5e-05, 'epoch': 2.19}


 75%|███████▌  | 75/100 [55:06<13:37, 32.71s/it]

{'loss': 0.881, 'grad_norm': 6.309064865112305, 'learning_rate': 1.25e-05, 'epoch': 2.34}


 80%|████████  | 80/100 [57:39<10:13, 30.68s/it]

{'loss': 0.8477, 'grad_norm': 4.51299524307251, 'learning_rate': 1e-05, 'epoch': 2.5}


                                                
 80%|████████  | 80/100 [1:02:55<10:13, 30.68s/it]

{'eval_loss': 0.7742425203323364, 'eval_accuracy': 0.737, 'eval_runtime': 315.506, 'eval_samples_per_second': 3.17, 'eval_steps_per_second': 0.101, 'epoch': 2.5}


 85%|████████▌ | 85/100 [1:05:20<13:02, 52.19s/it] 

{'loss': 0.7879, 'grad_norm': 3.375075101852417, 'learning_rate': 7.5e-06, 'epoch': 2.66}


 90%|█████████ | 90/100 [1:07:51<05:44, 34.42s/it]

{'loss': 0.8067, 'grad_norm': 3.0325076580047607, 'learning_rate': 5e-06, 'epoch': 2.81}


 95%|█████████▌| 95/100 [1:10:26<02:34, 30.98s/it]

{'loss': 0.8513, 'grad_norm': 6.684648036956787, 'learning_rate': 2.5e-06, 'epoch': 2.97}


100%|██████████| 100/100 [1:12:40<00:00, 29.64s/it]

{'loss': 0.7287, 'grad_norm': 3.5257890224456787, 'learning_rate': 0.0, 'epoch': 3.12}


                                                   
100%|██████████| 100/100 [1:18:03<00:00, 29.64s/it]

{'eval_loss': 0.7370273470878601, 'eval_accuracy': 0.743, 'eval_runtime': 322.664, 'eval_samples_per_second': 3.099, 'eval_steps_per_second': 0.099, 'epoch': 3.12}


100%|██████████| 100/100 [1:18:04<00:00, 29.64s/it]max_steps is given, it will override any value given in num_train_epochs


{'train_runtime': 4690.3566, 'train_samples_per_second': 0.682, 'train_steps_per_second': 0.021, 'train_loss': 1.0882170462608338, 'epoch': 3.12}


100%|██████████| 100/100 [1:18:05<00:00, 46.86s/it]


0,1
eval/accuracy,▁▅▆██
eval/loss,█▄▃▁▁
eval/runtime,▆██▁▅
eval/samples_per_second,▃▁▁█▄
eval/steps_per_second,▄▁▁█▄
train/epoch,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇████
train/grad_norm,▂▁▂▄▄█▃▃▅▅▅▅███▅▄▃█▄
train/learning_rate,██▇▇▇▆▆▅▅▅▄▄▄▃▃▂▂▂▁▁
train/loss,██▇▇▆▅▄▄▄▄▃▃▂▂▂▂▁▂▂▁

0,1
eval/accuracy,0.743
eval/loss,0.73703
eval/runtime,322.664
eval/samples_per_second,3.099
eval/steps_per_second,0.099
total_flos,414380191457280.0
train/epoch,3.125
train/global_step,100.0
train/grad_norm,3.52579
train/learning_rate,0.0


In [12]:
import wandb
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
import torch

# Initialize Wandb run
run = wandb.init()

# Sử dụng artifact để tải mô hình
artifact = run.use_artifact('manhtien310701-fanyuan/my-model-project/model-vd07t9u5:v1', type='model')
artifact_dir = artifact.download()

# Load tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Load mô hình từ thư mục đã tải xuống
model = DistilBertForSequenceClassification.from_pretrained(artifact_dir)

# Chuẩn bị dữ liệu đầu vào
input_text = "Good fish sandwich."
inputs = tokenizer(input_text, return_tensors='pt')

# Dự đoán với mô hình
with torch.no_grad():
    outputs = model(**inputs)
    predictions = outputs.logits.argmax(dim=-1)

# In kết quả dự đoán
print(f"Predicted label: {predictions.item()}")

[34m[1mwandb[0m: Downloading large artifact model-vd07t9u5:v1, 255.44MB. 4 files... 
[34m[1mwandb[0m:   4 of 4 files downloaded.  
Done. 0:0:0.7


Predicted label: 3
