In [None]:
!pip install -q transformers datasets scikit-learn torch accelerate


In [None]:
import numpy as np
import torch

from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score


In [None]:
dataset = load_dataset("ag_news")
dataset


In [None]:
dataset["train"][0]


In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


In [None]:
def tokenize_function(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )


In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)


In [None]:
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")


In [None]:
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=4
)


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)

    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted")
    }


In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs"
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics
)


In [None]:
trainer.train()


In [None]:
trainer.evaluate()


In [None]:
model.save_pretrained("news_bert_model")
tokenizer.save_pretrained("news_bert_model")


In [None]:
def predict_news(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    # Move input tensors to the same device as the model
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1).item()

    labels = ["World", "Sports", "Business", "Sci/Tech"]
    return labels[prediction]

predict_news("Apple launches a new AI powered smartphone")

In [None]:
!pip install -q streamlit pyngrok


In [None]:
%%writefile app.py
import streamlit as st
import torch
from transformers import BertTokenizer, BertForSequenceClassification

st.set_page_config(page_title="News Topic Classifier", layout="centered")

st.title("News Topic Classifier")
st.write("Enter a news headline and the model will predict its category.")

@st.cache_resource
def load_model():
    tokenizer = BertTokenizer.from_pretrained("news_bert_model")
    model = BertForSequenceClassification.from_pretrained("news_bert_model")
    model.eval()
    return tokenizer, model

tokenizer, model = load_model()

labels = ["World", "Sports", "Business", "Sci/Tech"]

headline = st.text_input("News Headline")

if st.button("Predict"):
    if headline.strip() == "":
        st.warning("Please enter a headline")
    else:
        inputs = tokenizer(
            headline,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=128
        )

        with torch.no_grad():
            outputs = model(**inputs)
            prediction = torch.argmax(outputs.logits, dim=1).item()

        st.success(f"Predicted Category: {labels[prediction]}")


In [None]:
!ls


In [None]:
from pyngrok import ngrok

public_url = ngrok.connect(8501)
print(public_url)

In [None]:
!NGROK_AUTHTOKEN=37ONjMZwh6XJj6qK3df3aauQ694_6NYUZkdDhtaq5RinugRav python3 app.py

In [None]:
!streamlit run app.py &>/content/logs.txt &
