In [None]:
pip install transformers datasets scikit-learn pandas torch gradio



In [None]:
import pandas as pd
from datasets import Dataset

# Load from local CSVs
train_df = pd.read_csv("/content/test.csv")
test_df = pd.read_csv("/content/test.csv")

# Rename columns if needed
train_df = train_df.rename(columns={"headline": "text"})
test_df = test_df.rename(columns={"headline": "text"})

# Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


In [None]:
def tokenize(batch):
    return tokenizer(batch["Title"], padding="max_length", truncation=True, max_length=128)


In [None]:
train_dataset = train_dataset.rename_column("Title", "text")  # or use the actual column name
test_dataset = test_dataset.rename_column("Title", "text")


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

# Rename to "labels" for HuggingFace Trainer
train_dataset = train_dataset.rename_column("Class Index", "labels")
test_dataset = test_dataset.rename_column("Class Index", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [None]:
%pip install --upgrade datasets



In [None]:
pip install --upgrade transformers




In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    do_eval=True,  # instead of evaluation_strategy
    save_steps=500,
    eval_steps=500,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01
)


In [None]:
import pandas as pd
from datasets import Dataset

train_df = pd.read_csv("/content/train[1].csv")
test_df = pd.read_csv("/content/test.csv")

# Optional: Rename if needed
train_df = train_df.rename(columns={"Title": "text"})
test_df = test_df.rename(columns={"Title": "text"})

# Convert to HuggingFace Dataset
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_dataset = train_dataset.rename_column("Class Index", "labels")
test_dataset = test_dataset.rename_column("Class Index", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)


In [None]:
def classify_news(text):
    try:
        # Tokenize
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

        # Predict
        with torch.no_grad():
            logits = model(**inputs).logits

        # Get predicted label
        prediction = torch.argmax(logits, dim=1).item()

        return label_map[prediction]

    except Exception as e:
        return f"Error: {str(e)}"


In [None]:
trainer.save_model("news-model")


In [None]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load tokenizer and fine-tuned model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("news-model")  # path to your saved model

label_map = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}

def classify_news(text):
    try:
        if not text.strip():
            return "Please enter a valid news headline."

        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

        with torch.no_grad():
            logits = model(**inputs).logits

        prediction = torch.argmax(logits, dim=1).item()
        return label_map[prediction]

    except Exception as e:
        return f"Error: {str(e)}"

demo = gr.Interface(
    fn=classify_news,
    inputs=gr.Textbox(lines=2, placeholder="Enter a news headline..."),
    outputs="text",
    title="AG News Topic Classifier",
    description="Classifies a news headline into: World, Sports, Business, or Sci/Tech."
)

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://448148350aac22428f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


