In [3]:
#Install Libraries
!pip install transformers datasets accelerate -q


In [None]:
#Import Libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import torch


In [None]:
#load
dataset = load_dataset("imdb")
dataset


In [None]:
#model datasett
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [None]:
#tokenization
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

tokenized_ds = dataset.map(tokenize, batched=True)
tokenized_ds = tokenized_ds.rename_column("label", "labels")
tokenized_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2
)


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
)


In [None]:

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
)


In [None]:
#train it
trainer.train()


In [None]:
#save the model
trainer.save_model("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")


In [None]:
#Test the Model
def predict(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits).item()
    return "Positive ðŸ˜€" if prediction == 1 else "Negative ðŸ˜ž"

print(predict("The movie was excellent!"))
print(predict("The movie was horrible."))
