<a href="https://colab.research.google.com/github/TakeMingOff/US-AI-Patents/blob/main/Notebook_E_Text_Classification_with_New_Transformer_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install -U "transformers" "datasets" "accelerate" "evaluate" "scikit-learn"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# --- 环境 & 随机种子 ---
import os, gc, random, numpy as np, pandas as pd, torch, inspect, torch.nn as nn
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.backends.cuda.matmul.allow_tf32 = True
if hasattr(torch.backends, "cudnn"):
    torch.backends.cudnn.benchmark = True
random.seed(1); np.random.seed(1); torch.manual_seed(1)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(1)

# --- 与 Notebook D 路径保持一致 ---
os.chdir("/content/drive/MyDrive/USPTO_data")

# --- 读数据（列名与 D 完全一致）---
TrainingData   = pd.read_csv("./Training_Data/4K Patents - AI 20p.csv")
IDs            = np.array(TrainingData['app number'].values.tolist())
Abstract_Text  = TrainingData['abstract'].astype(str).values.tolist()
Classes        = TrainingData['actual'].values.tolist()

# --- 输出路径（与 D 一致） ---
os.makedirs("./Output/Model Performance", exist_ok=True)
os.makedirs("./Output/Classification Output", exist_ok=True)
RESULTS_CSV            = "./Output/Model Performance/New Transformer Classification Model Performance.csv"
CLASSIFIED_VALUES_CSV  = "./Output/Classification Output/New Transformer Classification Results.csv"
MODEL_DIR_FMT          = "./Output/Models/{model}/fold_{fold}"

print("WD:", os.getcwd(), "  rows:", len(TrainingData))

# --- 依赖 ---
from tqdm.auto import tqdm
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
)
from packaging.version import parse as vparse
import transformers
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

# 兼容新老 Transformers：4.46+ 用 eval_strategy
EVAL_KEY = "eval_strategy" if vparse(transformers.__version__) >= vparse("4.46.0") else "evaluation_strategy"

# --- 标签统一到 1 维 0/1（兼容 one-hot/概率列；若原本就是 0/1 会直接返回）---
def normalize_binary_labels(arr):
    a = np.asarray(arr)
    if a.ndim == 1 and a.dtype == object:
        try: a = np.stack(a)
        except Exception: pass
    if a.ndim == 2 and a.shape[1] == 2:
        if np.array_equal(a, a.astype(int)):   # one-hot
            a = a.argmax(axis=1).astype(int)
        else:                                   # 概率
            a = (a[:, 1] >= 0.5).astype(int)
    if a.ndim != 1:
        raise ValueError(f"标签仍不是 1 维：{a.shape}")
    return a.astype(int)

LABELS_1D = normalize_binary_labels(Classes)

# --- logits -> 正类概率（算 AUC 用）---
def logits_to_prob_pos(logits: np.ndarray) -> np.ndarray:
    x = np.array(logits)
    if x.ndim == 2 and x.shape[1] == 2:
        x = x - x.max(axis=1, keepdims=True)
        exp = np.exp(x); probs = exp / exp.sum(axis=1, keepdims=True)
        return probs[:, 1]
    else:
        z = x.squeeze()
        return 1.0 / (1.0 + np.exp(-z))

# --- HF Dataset & 分词 ---
def make_ds(texts, labels): return Dataset.from_dict({"text": list(texts), "label": list(labels)})
def tokenize_fn_builder(tok, max_len: int):
    def _encode(batch):
        return tok(batch["text"], truncation=True, padding="max_length", max_length=max_len)
    return _encode

# --- 指标函数（Trainer 会读到 eval_* 指标，供早停/最优模型判定）---
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    if isinstance(logits, tuple): logits = logits[0]
    preds    = logits.argmax(-1)
    prob_pos = logits_to_prob_pos(logits)
    return {
        "accuracy":  accuracy_score(labels, preds),
        "precision": precision_score(labels, preds, zero_division=0),
        "recall":    recall_score(labels, preds, zero_division=0),
        "f1":        f1_score(labels, preds, zero_division=0),
        "auc":       roc_auc_score(labels, prob_pos),
    }

# --- 自定义 Trainer：类权重 + label smoothing（更稳，提高 Recall/F1 更常见）---
class WeightedTrainer(Trainer):
    def __init__(self, class_weights: torch.Tensor | None = None, **kwargs):
        super().__init__(**kwargs)
        self.class_weights = class_weights
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs["labels"]
        forward_inputs = {k: v for k, v in inputs.items() if k != "labels"}
        outputs = model(**forward_inputs)
        logits  = outputs.logits.float()  # 用 float32 计算 CE 更稳
        if self.class_weights is not None:
            cw = self.class_weights.to(logits.device, dtype=torch.float32)
            loss_fct = nn.CrossEntropyLoss(weight=cw, label_smoothing=0.05)
        else:
            loss_fct = nn.CrossEntropyLoss(label_smoothing=0.05)
        loss = loss_fct(logits, labels.long())
        return (loss, outputs) if return_outputs else loss

# ========================= 可配部分 =========================
NUM_OF_SPLITS = 5
Reweight      = True

# 通用训练参数（含早停）
BASE_ARGS = dict(
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    logging_steps=20,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    report_to=[],
    dataloader_num_workers=0,
    dataloader_pin_memory=True,
    dataloader_persistent_workers=False,
    learning_rate=5e-5,
    warmup_ratio=0.1,
    weight_decay=0.01,
    save_total_limit=1,
)
BASE_ARGS[EVAL_KEY] = "epoch"  # 每个 epoch 验证一次，用于早停

# 选择要对比的模型（可自行增删）
CLASSIFIERS = [
    ["DeBERTaV3-base", "microsoft/deberta-v3-base", 512],
    ["MPNet", "microsoft/mpnet-base", 512],
]
# ===========================================================

RESULTS, Classified_Values = [], []
supports_bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
dtype_for_model = torch.bfloat16 if supports_bf16 else torch.float16

# ===== 放在循环外（比如 RESULTS, Classified_Values 定义附近）=====
CONF_MODE = "perclass"   # "counts" / "perclass" / "overall"
ROUND_CM  = 3            # 小数保留位数


from transformers import Trainer  # 用来检查 __init__ 的签名

for name, model_name, max_len in tqdm(CLASSIFIERS, desc="Evaluating Classifiers (E)", leave=True):
    y_true_all, y_pred_all, prob_pos_all, id_all = [], [], [], []

    # 根据不同模型调显存/吞吐
    args_this = BASE_ARGS.copy()
    if supports_bf16:
        args_this["bf16"] = True;  args_this["fp16"] = False
    else:
        args_this["bf16"] = False; args_this["fp16"] = True

    lower = name.lower()
    if "modernbert" in lower:
        # 长上下文最省显存配置（将有效 batch≈32）
        args_this.update({
            "per_device_train_batch_size": 1,
            "per_device_eval_batch_size": 2,
            "gradient_accumulation_steps": 32,
            "gradient_checkpointing": True,
            "optim": "adamw_torch_fused",
            "remove_unused_columns": False,   # 避免丢 inputs
        })
    else:
        # 常规模型可稍微放开
        args_this.update({
            "gradient_accumulation_steps": 1,
            "optim": "adamw_torch_fused",
            "remove_unused_columns": False,
        })

    kf = StratifiedKFold(n_splits=NUM_OF_SPLITS, shuffle=True, random_state=1)
    for fold_idx, (train_i, test_i) in enumerate(
        tqdm(kf.split(Abstract_Text, LABELS_1D), desc=f"{name} | Cross-Validating", leave=False, total=NUM_OF_SPLITS), 1
    ):
        gc.collect(); torch.cuda.empty_cache()

        # 分层切分：K 折外层 + 10% 验证集（用于早停/挑最优）
        X, Y = np.array(Abstract_Text), LABELS_1D
        X_tr_full, y_tr_full = X[train_i].tolist(), Y[train_i].tolist()
        test_X, test_y       = X[test_i].tolist(),  Y[test_i].tolist()

        from sklearn.model_selection import train_test_split
        train_X, val_X, train_y, val_y = train_test_split(
            X_tr_full, y_tr_full, test_size=0.10, stratify=y_tr_full, random_state=42
        )

        tok    = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        encode = tokenize_fn_builder(tok, max_len=max_len)
        ds_train = make_ds(train_X, train_y).map(encode, batched=True, remove_columns=["text"])
        ds_val   = make_ds(val_X,   val_y  ).map(encode, batched=True, remove_columns=["text"])
        ds_eval  = make_ds(test_X,  test_y ).map(encode, batched=True, remove_columns=["text"])

        cols = ["input_ids", "attention_mask", "label"]
        if "token_type_ids" in ds_train.column_names: cols.append("token_type_ids")
        ds_train = ds_train.with_format("torch", columns=cols)
        ds_val   = ds_val.with_format("torch",   columns=cols)
        ds_eval  = ds_eval.with_format("torch",  columns=cols)

        # 类权重（不平衡更稳）
        class_weights = None
        if Reweight:
            cw = compute_class_weight(class_weight="balanced", classes=np.array([0,1]), y=train_y)
            class_weights = torch.tensor(cw, dtype=torch.float)

        out_dir = MODEL_DIR_FMT.format(model=name, fold=fold_idx)
        os.makedirs(out_dir, exist_ok=True)

        # 模型（训练精度与 dtype 对齐）
        model = AutoModelForSequenceClassification.from_pretrained(
            model_name, num_labels=2, torch_dtype=dtype_for_model
        )
        model.config.use_cache = False  # 训练禁用 KV cache，省显存

        args = TrainingArguments(output_dir=out_dir, **args_this)

        # 兼容新老版本：优先传 processing_class，其次 tokenizer
        trainer_kwargs = dict(
            model=model, args=args,
            train_dataset=ds_train, eval_dataset=ds_val,
            compute_metrics=compute_metrics,
            class_weights=class_weights,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2, early_stopping_threshold=0.0)],
        )
        if "processing_class" in inspect.signature(Trainer.__init__).parameters:
            trainer_kwargs["processing_class"] = tok
        else:
            trainer_kwargs["tokenizer"] = tok

        trainer = WeightedTrainer(**trainer_kwargs)
        trainer.train()

        # 折外测试集预测
        pred_out = trainer.predict(ds_eval)
        logits   = pred_out.predictions
        preds    = np.argmax(logits, axis=1).astype(int)
        prob_pos = logits_to_prob_pos(logits)

        id_all.extend(IDs[test_i].tolist())
        y_true_all.extend(test_y)
        y_pred_all.extend(list(preds))
        prob_pos_all.extend(list(prob_pos))

        del trainer, model, tok
        gc.collect(); torch.cuda.empty_cache()

    # ===== 汇总（列顺序严格对齐 Notebook D）=====
    y_true_all = np.asarray(y_true_all)
    y_pred_all = np.asarray(y_pred_all)

    tn, fp, fn, tp = confusion_matrix(y_true_all, y_pred_all).ravel()

    if CONF_MODE == "counts":
        tp_v, fn_v, fp_v, tn_v = tp, fn, fp, tn
    elif CONF_MODE == "overall":
        total = tp + tn + fp + fn
        tp_v = tp / total if total else 0.0
        fn_v = fn / total if total else 0.0
        fp_v = fp / total if total else 0.0
        tn_v = tn / total if total else 0.0
    else:  # "perclass" —— 推荐：与常见指标一致
        P = tp + fn  # 实际正样本总数
        N = tn + fp  # 实际负样本总数
        tp_v = tp / P if P else 0.0   # TPR / Recall
        fn_v = fn / P if P else 0.0   # FNR
        fp_v = fp / N if N else 0.0   # FPR
        tn_v = tn / N if N else 0.0   # TNR

    Share     = float(np.round(y_pred_all.mean(), 3))
    Accuracy  = accuracy_score(y_true_all, y_pred_all)
    ROC       = roc_auc_score(y_true_all, prob_pos_all)
    Precision = precision_score(y_true_all, y_pred_all, zero_division=0)
    Recall    = recall_score(y_true_all, y_pred_all, zero_division=0)
    F1        = f1_score(y_true_all, y_pred_all, zero_division=0)

    RESULTS.append([
        name, Share,
        round(tp_v, ROUND_CM), round(fn_v, ROUND_CM),
        round(fp_v, ROUND_CM), round(tn_v, ROUND_CM),
        round(Accuracy, 3), round(ROC, 3),
        round(Precision, 3), round(Recall, 3), round(F1, 3)
    ])

    Classified_Values.append(list(zip(len(id_all)*[name], id_all, y_true_all.tolist(), y_pred_all.tolist())))

# ===== 写出两张表（与 Notebook D 完全一致）=====
RESULTS_TABLE = pd.DataFrame(
    RESULTS,
    columns=["Name","Share","True-Positives","False-Negatives","False-Positives","True-Negatives",
             "Accuracy","AUC","Precision","Recall","F1"]
)
RESULTS_TABLE["Type"] = "Transformer"
RESULTS_TABLE = RESULTS_TABLE[["Name","Type","Share","True-Positives","False-Negatives",
                               "False-Positives","True-Negatives","Accuracy","AUC","Precision","Recall","F1"]]

# 保存（和 D 一样不加 index；编码 utf-8-sig）
RESULTS_TABLE.sort_values("Accuracy", ascending=False).to_csv(RESULTS_CSV, index=False, encoding="utf-8-sig")

# 逐样本横表：第一列 id、第二列 Actual，后续每个模型一列 Predicted
Final = None
for bundle in Classified_Values:
    Temp = pd.DataFrame(bundle, columns=["Model","id","Actual","Predicted"])
    if Final is None:
        mname = Temp.head(1)["Model"].iloc[0]
        Final = Temp[["id","Actual","Predicted"]].rename(columns={"Predicted": mname})
    else:
        mname = Temp.head(1)["Model"].iloc[0]
        Final = Final.merge(Temp[["id","Predicted"]].rename(columns={"Predicted": mname}), on="id", how="outer")

Final.to_csv(CLASSIFIED_VALUES_CSV, index=False, encoding="utf-8-sig")

# 与 D 相同：展示一份按 Accuracy 排序的总表
RESULTS_TABLE.sort_values("Accuracy", ascending=False)
print("Saved:", os.path.abspath(RESULTS_CSV))
print("Saved:", os.path.abspath(CLASSIFIED_VALUES_CSV))

WD: /content/drive/MyDrive/USPTO_data   rows: 4000


Evaluating Classifiers (E):   0%|          | 0/2 [00:00<?, ?it/s]

DeBERTaV3-base | Cross-Validating:   0%|          | 0/5 [00:00<?, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

`torch_dtype` is deprecated! Use `dtype` instead!


pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.7026,0.404927,0.940625,0.894737,0.796875,0.842975,0.955109
2,0.3484,0.374186,0.946875,0.927273,0.796875,0.857143,0.971985
3,0.2621,0.372475,0.953125,0.929825,0.828125,0.876033,0.974213
4,0.2297,0.353922,0.953125,0.901639,0.859375,0.88,0.977112
5,0.2657,0.34403,0.95,0.875,0.875,0.875,0.977386
6,0.257,0.351973,0.95,0.887097,0.859375,0.873016,0.977081




Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.7077,0.543541,0.71875,0.407143,0.890625,0.558824,0.914062
2,0.3288,0.357756,0.946875,0.898305,0.828125,0.861789,0.962891
3,0.2781,0.367938,0.94375,0.838235,0.890625,0.863636,0.962433
4,0.2413,0.375637,0.93125,0.808824,0.859375,0.833333,0.959351
5,0.2354,0.390852,0.94375,0.910714,0.796875,0.85,0.959015




Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.6771,0.430431,0.89375,0.682927,0.875,0.767123,0.928528
2,0.3594,0.3489,0.959375,0.947368,0.84375,0.892562,0.978302
3,0.2704,0.313545,0.940625,0.8,0.9375,0.863309,0.983551
4,0.2276,0.300942,0.953125,0.84507,0.9375,0.888889,0.982452




Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.6969,0.42719,0.915625,0.760563,0.84375,0.8,0.935516
2,0.2951,0.387878,0.8875,0.659091,0.90625,0.763158,0.970795
3,0.2825,0.39801,0.903125,0.698795,0.90625,0.789116,0.97757




Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.7053,0.450753,0.859375,0.602151,0.875,0.713376,0.9375
2,0.3115,0.361901,0.909375,0.701149,0.953125,0.807947,0.97345
3,0.255,0.399147,0.9375,0.87931,0.796875,0.836066,0.970673
4,0.2592,0.369052,0.940625,0.881356,0.8125,0.845528,0.972046
5,0.2428,0.355576,0.934375,0.811594,0.875,0.842105,0.97348
6,0.2262,0.37698,0.940625,0.846154,0.859375,0.852713,0.965546
7,0.211,0.37986,0.934375,0.811594,0.875,0.842105,0.969696
8,0.2583,0.387776,0.934375,0.830769,0.84375,0.837209,0.962128


MPNet | Cross-Validating:   0%|          | 0/5 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/493 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/532M [00:00<?, ?B/s]

Some weights of MPNetForSequenceClassification were not initialized from the model checkpoint at microsoft/mpnet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.6008,0.446026,0.86875,0.619565,0.890625,0.730769,0.954742
2,0.3917,0.385318,0.940625,0.868852,0.828125,0.848,0.96701
3,0.2964,0.394957,0.940625,0.868852,0.828125,0.848,0.963898
4,0.2525,0.396402,0.94375,0.870968,0.84375,0.857143,0.967163
5,0.267,0.382411,0.940625,0.857143,0.84375,0.850394,0.968262
6,0.2803,0.379583,0.9375,0.84375,0.84375,0.84375,0.967255


Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of MPNetForSequenceClassification were not initialized from the model checkpoint at microsoft/mpnet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.6126,0.467299,0.79375,0.491525,0.90625,0.637363,0.944336
2,0.3712,0.404696,0.934375,0.864407,0.796875,0.829268,0.956818
3,0.2993,0.421971,0.928125,0.859649,0.765625,0.809917,0.951935
4,0.2724,0.369895,0.93125,0.791667,0.890625,0.838235,0.961365
5,0.2965,0.378943,0.934375,0.820896,0.859375,0.839695,0.958557
6,0.2636,0.377831,0.93125,0.8,0.875,0.835821,0.958435
7,0.2363,0.385339,0.93125,0.808824,0.859375,0.833333,0.956726


Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of MPNetForSequenceClassification were not initialized from the model checkpoint at microsoft/mpnet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.596,0.466529,0.784375,0.479675,0.921875,0.631016,0.938873
2,0.3838,0.345112,0.921875,0.753247,0.90625,0.822695,0.973877
3,0.308,0.343932,0.88125,0.635417,0.953125,0.7625,0.982941
4,0.2665,0.318683,0.95,0.842857,0.921875,0.880597,0.983093
5,0.2862,0.315177,0.94375,0.810811,0.9375,0.869565,0.983856
6,0.2867,0.334024,0.909375,0.701149,0.953125,0.807947,0.984131


Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of MPNetForSequenceClassification were not initialized from the model checkpoint at microsoft/mpnet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.6108,0.479788,0.771875,0.465116,0.9375,0.621762,0.942841
2,0.3298,0.377063,0.878125,0.628866,0.953125,0.757764,0.970154
3,0.3174,0.365876,0.9,0.677778,0.953125,0.792208,0.970306
4,0.2834,0.376416,0.8875,0.645833,0.96875,0.775,0.970947
5,0.2698,0.368221,0.9,0.673913,0.96875,0.794872,0.971008
6,0.269,0.363571,0.90625,0.688889,0.96875,0.805195,0.972168
7,0.269,0.387259,0.8875,0.645833,0.96875,0.775,0.969604
8,0.2409,0.359713,0.90625,0.688889,0.96875,0.805195,0.973175


Map:   0%|          | 0/2880 [00:00<?, ? examples/s]

Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Some weights of MPNetForSequenceClassification were not initialized from the model checkpoint at microsoft/mpnet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Auc
1,0.5883,0.473748,0.78125,0.475806,0.921875,0.62766,0.940491
2,0.3503,0.392463,0.88125,0.641304,0.921875,0.75641,0.96286
3,0.3366,0.367223,0.903125,0.694118,0.921875,0.791946,0.969666
4,0.2807,0.361187,0.925,0.75641,0.921875,0.830986,0.973022
5,0.2839,0.351994,0.915625,0.728395,0.921875,0.813793,0.975311
6,0.2706,0.359406,0.921875,0.76,0.890625,0.820144,0.975311


Saved: /content/drive/MyDrive/USPTO_data/Output/Model Performance/New Transformer Classification Model Performance.csv
Saved: /content/drive/MyDrive/USPTO_data/Output/Classification Output/New Transformer Classification Results.csv
