In [1]:
!pip install transformers[torch] datasets evaluate -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
print(f"Is GPU available? {torch.cuda.is_available()}")
print(f"GPU Name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'No GPU found'}")

Is GPU available? True
GPU Name: Tesla T4


In [4]:


import numpy as np
import evaluate
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# 2. CHECK GPU (Crucial for BERT)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 3. LOAD DATASET (AG News: World, Sports, Business, Sci/Tech)
dataset = load_dataset("ag_news")

# 4. PREPROCESSING (Tokenization)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(examples):
    # Padding and Truncation ensure all inputs are the same length for the GPU
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# We use a subset (2000 train, 1000 test) to ensure fast completion during your session
tokenized_datasets = dataset.map(tokenize_function, batched=True)
train_set = tokenized_datasets["train"].shuffle(seed=42).select(range(2000))
test_set = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

# 5. LOAD PRE-TRAINED BERT MODEL
# We specify num_labels=4 because AG News has 4 categories
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)

# 6. DEFINE EVALUATION METRIC
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# 7. CONFIGURE TRAINING ARGUMENTS
training_args = TrainingArguments(
    output_dir="bert_news_model",
    eval_strategy="epoch",        # Evaluation happens at the end of the epoch
    save_strategy="epoch",
    learning_rate=2e-5,           # Typical learning rate for BERT fine-tuning
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,           # 1 epoch is sufficient to show the fine-tuning logic
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir='./logs',
    logging_steps=10,
)

# 8. INITIALIZE TRAINER
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=test_set,
    compute_metrics=compute_metrics,
)

# 9. START TRAINING
print("Starting training...")
trainer.train()

# 10. EVALUATE FINAL ACCURACY
final_metrics = trainer.evaluate()
print(f"Final Performance: {final_metrics}")

# 11. SAVE THE MODEL LOCALLY
trainer.save_model("./final_bert_news_model")
tokenizer.save_pretrained("./final_bert_news_model")
print("Model saved to folder: final_bert_news_model")

Using device: cuda




Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForSequenceClassification LOAD REPORT from: bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.
`logging_dir` is deprecated and will 

Starting training...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.431707,0.344967,0.895


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.encoder.layer.1.output.LayerNorm.weight', 'bert.encoder.layer.1.output.LayerNorm.bias', 'bert.encoder.layer.2.attention.output.LayerNorm.weight', 'bert.encoder.layer.2.attention.output.LayerNorm.bias', 'bert.encoder.layer.2.output.LayerNorm.weight', 'bert.encoder.layer.2.output.LayerNorm.bias', 'bert.encoder.layer.3.attention.output.LayerNorm.weight', 'bert.encoder.layer.3.attention.output.LayerNorm.bias', 'bert.encoder.layer.3.output.LayerNorm.weight', 'bert.encoder.layer.3.output.LayerNorm.bias', 'bert.encoder.layer.4.attention.output.La

Final Performance: {'eval_loss': 0.3449668288230896, 'eval_accuracy': 0.895, 'eval_runtime': 27.8041, 'eval_samples_per_second': 35.966, 'eval_steps_per_second': 4.496, 'epoch': 1.0}


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Model saved to folder: final_bert_news_model


In [5]:
!pip install gradio -q

import gradio as gr
from transformers import pipeline

# 1. Initialize the classification pipeline using YOUR saved model
# 'device=0' ensures we keep using the T4 GPU for fast inference
pipe = pipeline("text-classification",
                model="./final_bert_news_model",
                tokenizer="./final_bert_news_model",
                device=0)

# 2. Map the numeric labels back to the AG News categories
# 0:World, 1:Sports, 2:Business, 3:Sci/Tech
id2label = {0: "World News", 1: "Sports", 2: "Business", 3: "Sci/Tech"}

def classify_news(text):
    result = pipe(text)[0]
    # Extract the ID from labels like 'LABEL_0'
    label_id = int(result['label'].split('_')[-1])
    label_name = id2label[label_id]
    probability = round(result['score'] * 100, 2)
    return f"{label_name} ({probability}% confidence)"

# 3. Build the Gradio Interface
app = gr.Interface(
    fn=classify_news,
    inputs=gr.Textbox(lines=2, placeholder="Paste a news headline here...", label="Input Headline"),
    outputs=gr.Text(label="BERT Prediction"),
    title="Task 1: News Topic Classifier",
    description="This BERT-base model has been fine-tuned to categorize news into World, Sports, Business, or Sci/Tech.",
    examples=[
        ["The stock market surged after the latest inflation report."],
        ["The local team won the championship in a thrilling overtime."],
        ["NASA's James Webb telescope captured stunning images of a distant nebula."]
    ]
)

# 4. Launch it!
# 'share=True' creates a public .gradio.live link for your report
app.launch(share=True)

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f30186e8d07f8daf00.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


