# Learning how to fine-tune AI Models

## Install Required Libraries

In [None]:
pip install transformers datasets tensorflow torch



# Text Classification


### Load pre-trained model

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # Binary classification
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

### Load a small Dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("imdb")
train_data = dataset["train"].shuffle(seed=42).select(range(1000))  # Use only 1000 samples for quick training
test_data = dataset["test"].shuffle(seed=42).select(range(500))

README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

### Preprocess the data

In [None]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

tokenized_train = train_data.map(preprocess_function, batched=True)
tokenized_test = test_data.map(preprocess_function, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

### Train the model

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    report_to="none",  # Disables logging to wandb
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
)

trainer.train()



Epoch,Training Loss,Validation Loss
1,No log,0.328046
2,No log,0.399712


TrainOutput(global_step=250, training_loss=0.24002449035644532, metrics={'train_runtime': 137.348, 'train_samples_per_second': 14.562, 'train_steps_per_second': 1.82, 'total_flos': 264934797312000.0, 'train_loss': 0.24002449035644532, 'epoch': 2.0})

### Evaluate the model

In [None]:
metrics = trainer.evaluate()
print(metrics)

{'eval_loss': 0.39971229434013367, 'eval_runtime': 7.5735, 'eval_samples_per_second': 66.019, 'eval_steps_per_second': 8.318, 'epoch': 2.0}


### Use the Model to Predict Sentiment

In [None]:
import torch

def predict_sentiment(text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Detects GPU or CPU
    model.to(device)  # Moves model to GPU if available

    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)  # Move input to the same device
    outputs = model(**inputs)
    predicted_class = outputs.logits.argmax().item()

    return "Positive" if predicted_class == 1 else "Negative"

print(predict_sentiment("This movie was amazing!"))  # Should return Positive
print(predict_sentiment("I didn't like the film."))  # Should return Negative

Positive
Negative
