In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import pandas as pd

path = "/content/drive/MyDrive/DSIPYNB/news.tsv"

df = pd.read_csv(path, sep="\t")
df.head()


Unnamed: 0,News ID,Category,Topic,Headline,News body,Title entity,Entity content
0,N10000,sports,soccer,Predicting Atlanta United's lineup against Col...,"Only FIVE internationals allowed, count em, FI...","{""Atlanta United's"": 'Atlanta United FC'}","{'Atlanta United FC': {'type': 'item', 'id': '..."
1,N10001,news,newspolitics,Mitch McConnell: DC statehood push is 'full bo...,WASHINGTON -- Senate Majority Leader Mitch McC...,"{'DC': 'Washington, D.C.'}","{'Washington, D.C.': {'type': 'item', 'id': 'Q..."
2,N10002,news,newsus,Home In North Highlands Damaged By Fire,NORTH HIGHLANDS (CBS13) Fire damaged a home ...,{},{}
3,N10003,news,newspolitics,Meghan McCain blames 'liberal media' and 'thir...,Meghan McCain is speaking out after a journali...,{},{}
4,N10004,news,newsworld,Today in History: Aug 1,"1714: George I becomes King Georg Ludwig, Elec...",{},{}


In [6]:
df

Unnamed: 0,News ID,Category,Topic,Headline,News body,Title entity,Entity content
0,N10000,sports,soccer,Predicting Atlanta United's lineup against Col...,"Only FIVE internationals allowed, count em, FI...","{""Atlanta United's"": 'Atlanta United FC'}","{'Atlanta United FC': {'type': 'item', 'id': '..."
1,N10001,news,newspolitics,Mitch McConnell: DC statehood push is 'full bo...,WASHINGTON -- Senate Majority Leader Mitch McC...,"{'DC': 'Washington, D.C.'}","{'Washington, D.C.': {'type': 'item', 'id': 'Q..."
2,N10002,news,newsus,Home In North Highlands Damaged By Fire,NORTH HIGHLANDS (CBS13) Fire damaged a home ...,{},{}
3,N10003,news,newspolitics,Meghan McCain blames 'liberal media' and 'thir...,Meghan McCain is speaking out after a journali...,{},{}
4,N10004,news,newsworld,Today in History: Aug 1,"1714: George I becomes King Georg Ludwig, Elec...",{},{}
...,...,...,...,...,...,...,...
113757,N123757,sports,soccer_fifa_wwc,Hope who? Alyssa Naeher's penalty save sends U...,"LYON, France At the conclusion of the United...","{'USWNT': ""United States women's national socc...","{""United States women's national soccer team"":..."
113758,N123758,sports,baseball_mlb,Chris Sale Explains What Specifically Has Gone...,The first half of Chris Sale's season could be...,{},{}
113759,N123759,sports,basketball_nba_videos,Raptor fans jam streets to celebrate 1st NBA t...,Canadians are celebrating the country's first ...,{'NBA': 'National Basketball Association'},{'National Basketball Association': {'type': '...
113760,N123760,news,newspolitics,Judge won't allow Flynn to fire his attorneys,A federal judge denied the request by Michael ...,{'Flynn': 'Michael Flynn'},"{'Michael Flynn': {'type': 'item', 'id': 'Q683..."


In [4]:
!pip install pandas numpy
!pip install nltk
!pip install torch torchvision torchaudio
!pip install transformers
!pip install tqdm
!pip install sentencepiece
!pip install scikit-learn



In [7]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [8]:
import re
import string
import pandas as pd
from nltk.corpus import stopwords
import nltk
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer, TrainingArguments

In [9]:
STOPWORDS = set(stopwords.words('english'))

def clean_text(text):

    # This part handles cases where the input is not a string
    if not isinstance(text,str):
        return ""

    # Remove HTML tags
    text =  re.sub(r"<.*?>"," ",text)

    # Remove URLs
    text = re.sub(r"http\S+|www\S+|https\S+"," ",text)

    # Remove emojis
    text = re.sub(
        "["
        u"\U0001F600-\U0001F64F"
        u"\U0001F300-\U0001F5FF"
        u"\U0001F680-\U0001F6FF"
        u"\U0001F1E0-\U0001F1FF"
        "]+",
        "",
        text
    )

    #  Remove special symbols except letters, numbers, .,!?, and spaces
    text = re.sub(r"[^a-zA-Z0-9\s.,!?]", " ", text)

    #  Normalize punctuation (keep only .,!?)
    allowed = set(string.ascii_letters + string.digits + " .,!?")
    text = "".join(ch for ch in text if ch in allowed)

    #  Convert to lowercase
    text = text.lower()

    # Normalize whitespace
    text = " ".join(text.split())

    #  Remove stopwords
    # tokens = [
    #     word for word in text.split()
    #     if word not in STOPWORDS
    # ]

    # text = " ".join(tokens)

    return text


In [10]:
df["text"] = df["Headline"].fillna("") + " " + df["News body"].fillna("")
df = df.rename(columns={"Category": "label"}).dropna()
df["cleaned_text"] = df["text"].apply(clean_text)

In [11]:
df["cleaned_text"].iloc[0]

'predicting atlanta united s lineup against columbus crew in the u.s. open cup only five internationals allowed, count em, five! so first off we should say, per our usual atlanta united lineup predictions, this will be wrong. why will it be wrong? well, aside from the obvious, we still don t have a ton of data points from frank de boer in how he prefers to rotate his team for let s be honest an inferior competition. we ve seen how he rotates or doesn t rotate in concacaf champions league play, but that s a bit different because ccl was clearly a priority for the club. we got one glimpse of u.s. open cup rotation last week when the team played a home game as the visiting team against the charleston battery, but will things change on the actual road against an mls club? here s my predicted lineup let s talk about it kann seems like he s the cup keeper. simples. cbs i think leandro gonzalez pirez is likely to be a casualty of the 5 international player limit. miles robinson is still young

In [12]:
from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()
df["label"] = le.fit_transform(df["label"])
num_classes = df["label"].nunique()


In [13]:
num_classes

18

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    df["cleaned_text"].values, df["label"].values, test_size=0.2, random_state=42)

In [15]:
# PyTorch Dataset:

class NewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }


In [16]:
# TRAINING SETUP (Common)
import torch
from transformers import get_linear_schedule_with_warmup

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

EPOCHS = 2
BATCH_SIZE = 8
MAX_LEN = 96

Device: cuda


In [17]:
# Model 1 BERT (bert-base-uncased)

from transformers import BertTokenizer, BertForSequenceClassification
tokenizer_bert = BertTokenizer.from_pretrained("bert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [18]:
# FIX: use X_train, y_train from train_test_split
# this convert the model data into Dataset objects as tensors which can be fed into DataLoader

train_dataset = NewsDataset(X_train, y_train, tokenizer_bert, MAX_LEN)
test_dataset  = NewsDataset(X_test, y_test, tokenizer_bert, MAX_LEN)

# As we use the API trainer so we don't need the Dataloader for the spliting and schedulung the data
# now we no need of optimizer and scheduler to define seperately


In [19]:
model_bert = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=num_classes    #num_labels = number of classes
)


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForSequenceClassification LOAD REPORT from: bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
classifier.weight                          | MISSING    | 
classifier.bias                            | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


In [20]:
from transformers import (
    Trainer,
    TrainingArguments
)

In [21]:
import transformers
print(transformers.__version__)

5.0.0


In [22]:
import transformers
print("Transformers version:", transformers.__version__)

from transformers import TrainingArguments

try:
    args = TrainingArguments(output_dir='./test_output', evaluation_strategy='epoch')
    print("Initialization successful!")
except TypeError as e:
    print("Error:", e)


Transformers version: 5.0.0
Error: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'


In [23]:
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/news_project/bert_output",  # ✅ SAVE TO DRIVE

    eval_strategy="epoch",
    save_strategy="epoch",

    save_total_limit=1,   # ✅ keep only best checkpoint

    num_train_epochs=2,
    per_device_train_batch_size=BATCH_SIZE,       # ⬆ increased (safe on T4)
    per_device_eval_batch_size=BATCH_SIZE,

    gradient_accumulation_steps=2,         # ✅ faster convergence

    warmup_ratio=0.05,                     # reduced warmup
    weight_decay=0.01,

    fp16=True,                             # ✅ big speed boost

    dataloader_num_workers=2,              # ✅ faster data feeding
    remove_unused_columns=True,

    logging_steps=100,

    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",

    report_to="none"
)


warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


In [24]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np

def compute_metrics(eval_pred):
    preds = np.argmax(eval_pred.predictions, axis=1)
    labels = eval_pred.label_ids

    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted"),
        "precision": precision_score(labels, preds, average="weighted"),
        "recall": recall_score(labels, preds, average="weighted")
    }

In [25]:
# Trainer API

trainer = Trainer(
    model=model_bert,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.359663,0.66323,0.782112,0.778862,0.77922,0.782112
2,0.936534,0.635729,0.796931,0.796041,0.796747,0.796931


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.encoder.layer.1.output.LayerNorm.weight', 'bert.encoder.layer.1.output.LayerNorm.bias', 'bert.encoder.layer.2.attention.output.LayerNorm.weight', 'bert.encoder.layer.2.attention.output.LayerNorm.bias', 'bert.encoder.layer.2.output.LayerNorm.weight', 'bert.encoder.layer.2.output.LayerNorm.bias', 'bert.encoder.layer.3.attention.output.LayerNorm.weight', 'bert.encoder.layer.3.attention.output.LayerNorm.bias', 'bert.encoder.layer.3.output.LayerNorm.weight', 'bert.encoder.layer.3.output.LayerNorm.bias', 'bert.encoder.layer.4.attention.output.La

TrainOutput(global_step=11372, training_loss=1.3135303164551555, metrics={'train_runtime': 2053.1109, 'train_samples_per_second': 88.61, 'train_steps_per_second': 5.539, 'total_flos': 8976303426749184.0, 'train_loss': 1.3135303164551555, 'epoch': 2.0})

In [26]:
# Save clean final model to root folder
trainer.save_model("/content/drive/MyDrive/news_project/bert_output")
tokenizer_bert.save_pretrained("/content/drive/MyDrive/news_project/bert_output")


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

('/content/drive/MyDrive/news_project/bert_output/tokenizer_config.json',
 '/content/drive/MyDrive/news_project/bert_output/tokenizer.json')

In [27]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

MODEL_PATH = "/content/drive/MyDrive/news_project/bert_output"
# "/content/drive/MyDrive/news_project/bert_output/checkpoint-11372"

# Load trained model
tokenizer = BertTokenizer.from_pretrained(MODEL_PATH)
model = BertForSequenceClassification.from_pretrained(MODEL_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Example test sentence
text = " 'predicting atlanta united s lineup against columbus crew in the u.s. open cup only five internationals allowed, count em, five! so first off we should say, per our usual atlanta united lineup predictions, this will be wrong. why will it be wrong? well, aside from the obvious, we still don t have a ton of data points from frank de boer in how he prefers to rotate his team for let s be honest an inferior competition. we ve seen how he rotates or doesn t rotate in concacaf champions league play, but that s a bit different because ccl was clearly a priority for the club. we got one glimpse of u.s. open cup rotation last week when the team played a home game as the visiting team against the charleston battery, but will things change on the actual road against an mls club? here s my predicted lineup let s talk about it kann seems like he s the cup keeper. simples. cbs i think leandro gonzalez pirez is likely to be a casualty of the 5 international player limit. miles robinson is still young and probably isn t in need of much rest, and as far as international players go, you probably want those to be your most indispensable and or attacking options in case things go awry. florentin pogba is still a fitness concern in his ability to go 90 minutes, but considering he started the last match and then was subbed out for tactical reasons before the 90 minutes were up, i think he ll be okay. lb this could also be mikey ambrose instead of michael parkhurst. brek shea will almost certainly be rested after putting in a major shift last thursday. rb not a ton of other options here, and i d think de boer will want to see an improved performance from escobar. also, i think he may want to continue to pair escobar and pity down the right flank to allow them to continue to build a playing relationship. cm eric remedi could start here for either cm. but based on frank de boer s stated preference on not chopping and changing between matches, i m leaving the cms consistent from what we saw last week. am again, shooting for consistency here, but i think inserting meram of pereira makes sense. pereira has struggled in his last couple outings relative to some glimpses we ve seen. pereira is also an international, and if de boer wants to include ezequiel barco on the bench, he will need to shed one of his internationals from the last squad. st we could very well see brandon vazquez here, primarily down to fitness concerns. romario williams gutted out 120 minutes last thursday and finished the game holding his groin, but he trained with the team fully on monday. vazquez came on as a sub and scored twice, but part of me thinks de boer will want to give his backup striker as many opportunities as possible to prove his worth. with three internationals in the starting lineup, i think eric remedi and ezequiel barco will round out the five maximum allowed. what do you think? let us know in the comments"


inputs = tokenizer(
    text,
    return_tensors="pt",
    truncation=True,
    padding=True,
    max_length=96
).to(device)

with torch.no_grad():
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=1)
    pred = torch.argmax(probs, dim=1)

print("Predicted Class ID:", pred.item())
print("Confidence:", probs[0][pred].item())


Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Predicted Class ID: 13
Confidence: 0.9978935122489929


In [36]:
import os
import pandas as pd

# Evaluate model on test set
eval_results = trainer.evaluate()

# Add model label
eval_results["model_name"] = "BERT-base-uncased"

# Create DataFrame row
row = pd.DataFrame([eval_results])

# Define CSV path for all model comparisons
csv_path = "/content/drive/MyDrive/news_project/model_comparisons.csv"

# Append results (do NOT overwrite)
if os.path.exists(csv_path):
    row.to_csv(csv_path, mode="a", index=False, header=False)
else:
    row.to_csv(csv_path, index=False)

print("✅ Evaluation saved to:", csv_path)
row


✅ Evaluation saved to: /content/drive/MyDrive/news_project/model_comparisons.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,model_name
0,0.635729,0.796931,0.796041,0.796747,0.796931,106.9484,212.635,26.583,2.0,BERT-base-uncased


**DistilBERT**

In [28]:
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification

# -------------------------
# TOKENIZER
# -------------------------
tokenizer_distil = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [29]:
# -------------------------
# DATASETS
# -------------------------
train_dataset_distil = NewsDataset(X_train, y_train, tokenizer_distil, MAX_LEN)
test_dataset_distil  = NewsDataset(X_test, y_test, tokenizer_distil, MAX_LEN)

In [30]:
# -------------------------
# MODEL
# -------------------------
model_distil = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=num_classes
)

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/100 [00:00<?, ?it/s]

DistilBertForSequenceClassification LOAD REPORT from: distilbert-base-uncased
Key                     | Status     | 
------------------------+------------+-
vocab_transform.weight  | UNEXPECTED | 
vocab_transform.bias    | UNEXPECTED | 
vocab_layer_norm.weight | UNEXPECTED | 
vocab_projector.bias    | UNEXPECTED | 
vocab_layer_norm.bias   | UNEXPECTED | 
pre_classifier.bias     | MISSING    | 
pre_classifier.weight   | MISSING    | 
classifier.weight       | MISSING    | 
classifier.bias         | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


In [31]:
# -------------------------
# TRAINING ARGUMENTS (FAST ✅)
# -------------------------
training_args_distil = TrainingArguments(

    # ✅ SAVE TO DRIVE (same structure as BERT)
    output_dir="/content/drive/MyDrive/news_project/distilbert_output",

    eval_strategy="epoch",
    save_strategy="epoch",

    save_total_limit=1,

    num_train_epochs=EPOCHS,

    # ✅ Bigger effective batch = faster training
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=2,

    # ✅ Lighter warmup
    warmup_ratio=0.05,
    weight_decay=0.01,

    # ✅ GPU optimizations
    fp16=True,

    dataloader_num_workers=2,
    remove_unused_columns=True,

    logging_steps=100,

    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",

    report_to="none"
)

warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


In [32]:
# -------------------------
# TRAINER
# -------------------------
trainer_distil = Trainer(
    model=model_distil,
    args=training_args_distil,
    train_dataset=train_dataset_distil,
    eval_dataset=test_dataset_distil,

    compute_metrics=compute_metrics       # ✅ SAME METRICS AS BERT
)

# -------------------------
# TRAIN
# -------------------------
trainer_distil.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.365158,0.658732,0.781848,0.777264,0.777252,0.781848
2,1.00464,0.629795,0.795743,0.794227,0.794753,0.795743


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.weight', 'distilbert.embeddings.LayerNorm.bias'].
There were unexpected keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.beta', 'distilbert.embeddings.LayerNorm.gamma'].


TrainOutput(global_step=5686, training_loss=1.3316892703737626, metrics={'train_runtime': 1008.7795, 'train_samples_per_second': 180.343, 'train_steps_per_second': 5.637, 'total_flos': 4519901322461952.0, 'train_loss': 1.3316892703737626, 'epoch': 2.0})

In [33]:
# -------------------------
# SAVE FINAL MODEL + TOKENIZER FOR STREAMLIT
# -------------------------
trainer_distil.save_model("/content/drive/MyDrive/news_project/distilbert_output")
tokenizer_distil.save_pretrained("/content/drive/MyDrive/news_project/distilbert_output")


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

('/content/drive/MyDrive/news_project/distilbert_output/tokenizer_config.json',
 '/content/drive/MyDrive/news_project/distilbert_output/tokenizer.json')

In [34]:
# ==========================================================
# FINAL EVALUATION → CSV COMPARISON FILE
# ==========================================================

import os, pandas as pd

results_distil = trainer_distil.evaluate()

results_distil["model_name"] = "DistilBERT-base-uncased"

row = pd.DataFrame([results_distil])

csv_path = "/content/drive/MyDrive/news_project/model_comparisons.csv"

# Append results to existing CSV
if os.path.exists(csv_path):
    row.to_csv(csv_path, mode="a", index=False, header=False)
else:
    row.to_csv(csv_path, index=False)

print("✅ DistilBERT evaluation appended to CSV")
row

✅ DistilBERT evaluation appended to CSV


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,eval_loss,eval_accuracy,eval_f1,eval_precision,eval_recall,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,model_name
0,0.629795,0.795743,0.794227,0.794753,0.795743,91.7143,247.955,15.505,2.0,DistilBERT-base-uncased


In [37]:
import pandas as pd

csv_path = "/content/drive/MyDrive/news_project/model_comparisons.csv"

df = pd.read_csv(csv_path)

print(df)

best_model_name = df.loc[df["eval_f1"].idxmax()]["model_name"]

print("\n🏆 FINAL BEST MODEL =", best_model_name)


   eval_loss  eval_accuracy   eval_f1  eval_precision  eval_recall  \
0   0.629795       0.795743  0.794227        0.794753     0.795743   
1   0.635729       0.796931  0.796041        0.796747     0.796931   

   eval_runtime  eval_samples_per_second  eval_steps_per_second  epoch  \
0       91.7143                  247.955                 15.505    2.0   
1      106.9484                  212.635                 26.583    2.0   

                model_name  
0  DistilBERT-base-uncased  
1        BERT-base-uncased  

🏆 FINAL BEST MODEL = BERT-base-uncased


In [38]:
leaderboard = df.sort_values("eval_f1", ascending=False)

print("\n🏆 Model Leaderboard (sorted by F1)\n")
print(leaderboard[[
    "model_name",
    "eval_accuracy",
    "eval_f1",
    "eval_precision",
    "eval_recall",
    "eval_loss"
]])



🏆 Model Leaderboard (sorted by F1)

                model_name  eval_accuracy   eval_f1  eval_precision  \
1        BERT-base-uncased       0.796931  0.796041        0.796747   
0  DistilBERT-base-uncased       0.795743  0.794227        0.794753   

   eval_recall  eval_loss  
1     0.796931   0.635729  
0     0.795743   0.629795  
