In [2]:
from transformers import pipeline

# Klasifikasi Teks
classifier = pipeline("sentiment-analysis")
result = classifier("I love using Hugging Face's transformers!")
print(result)
# Output: [{'label': 'POSITIVE', 'score': 0.9998}]

# Penjawab Pertanyaan
question_answerer = pipeline("question-answering")
result = question_answerer(
    question="Where do I work?",
    context="My name is Sylvain and I work at Hugging Face in Brooklyn"
)
print(result)
# Output: {'score': 0.95, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9982390403747559}]


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Device set to use cuda:0


{'score': 0.6949763894081116, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}


In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Inisialisasi tokenizer dan model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")

# Tokenisasi input
inputs = tokenizer("I love using Hugging Face's transformers!", return_tensors="pt")

# Prediksi dengan model
with torch.no_grad():
    logits = model(**inputs).logits

# Konversi logits ke probabilitas
probs = torch.nn.functional.softmax(logits, dim=-1)
print(probs)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[0.5970, 0.4030]])


In [4]:
from transformers import Trainer, TrainingArguments
!pip install "fsspec<2024.10.0"  # Downgrade fsspec to a compatible version
!pip install "datasets==3.2.0" # Reinstall datasets with the specified version
from datasets import load_dataset # Import necessary library
from transformers import AutoTokenizer, AutoModelForSequenceClassification # Import necessary classes

# Load a dataset - this is just an example, replace with your actual dataset
# For demonstration purposes, we use the 'emotion' dataset.
dataset = load_dataset("emotion")

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Function to preprocess the dataset
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Apply the preprocessing function to the dataset
train_dataset = dataset["train"].map(preprocess_function, batched=True)
eval_dataset = dataset["validation"].map(preprocess_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Initialize the model here, before passing it to the Trainer
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=6) # Set num_labels to match the number of classes in the 'emotion' dataset

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset, # Pass the defined train_dataset
    eval_dataset=eval_dataset,   # Pass the defined eval_dataset
)

# Start training
trainer.train()



Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.229,0.193179
2,0.1329,0.16119
3,0.0878,0.160541


TrainOutput(global_step=3000, training_loss=0.23873411178588866, metrics={'train_runtime': 4332.2252, 'train_samples_per_second': 11.08, 'train_steps_per_second': 0.692, 'total_flos': 1.2629784231936e+16, 'train_loss': 0.23873411178588866, 'epoch': 3.0})

In [20]:
from huggingface_hub import notebook_login, whoami

# Login to Hugging Face
notebook_login()

# Check if you're logged in and get user info
user_info = whoami()
print(user_info)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

{'type': 'user', 'id': '67778cb1972eaa9b5b5599a9', 'name': 'amelkusmayadi', 'fullname': 'Ameliani Kusmayadi', 'isPro': False, 'avatarUrl': '/avatars/b726b4cb7d7e8bb24eb421a2c0cb69b1.svg', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'ame', 'role': 'fineGrained', 'createdAt': '2025-01-04T12:39:09.174Z', 'fineGrained': {'canReadGatedRepos': False, 'global': [], 'scoped': [{'entity': {'_id': '67778cb1972eaa9b5b5599a9', 'type': 'user', 'name': 'amelkusmayadi'}, 'permissions': []}]}}}}
