In [None]:
# Import libraries (ส่วนนี้เหมือนเดิม)
import json
import os
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizerFast, RobertaForTokenClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from typing import List, Dict, Tuple
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Define input/output paths for Kaggle (เหมือนเดิม)
INPUT_DIR = "/kaggle/input/techstack-ner-dataset"
OUTPUT_DIR = "/kaggle/working/ner_roberta_results"
TRAIN_DATA_PATH = os.path.join(INPUT_DIR, "train_data.json")
VALIDATE_DATA_PATH = os.path.join(INPUT_DIR, "validate_data.json")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load dataset (เหมือนเดิม)
def load_data(file_path: str) -> List[Dict]:
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

train_data = load_data(TRAIN_DATA_PATH)
validate_data = load_data(VALIDATE_DATA_PATH)

# Define label mapping (เหมือนเดิม)
id2label = {
    0: "O",
    1: "B-CLOUDPLATFORM",
    2: "I-CLOUDPLATFORM",
    3: "B-PROGRAMMINGLANG",
    4: "I-PROGRAMMINGLANG",
    5: "B-FRAMEWORK_LIB",
    6: "I-FRAMEWORK_LIB",
    7: "B-WEBFRAMEWORK_TECH",
    8: "I-WEBFRAMEWORK_TECH",
    9: "B-DATABASE",
    10: "I-DATABASE",
    11: "B-EMBEDDEDTECH",
    12: "I-EMBEDDEDTECH"
}
label2id = {v: k for k, v in id2label.items()}
num_labels = len(id2label)

# Custom Dataset class (เหมือนเดิม)
class NERDataset(Dataset):
    def __init__(self, data: List[Dict], tokenizer, max_length: int = 512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        item = self.data[idx]
        tokens = item["tokens"]
        ner_tags = item["ner_tags"]

        encoding = self.tokenizer(
            tokens,
            is_split_into_words=True,
            return_tensors="pt",
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_offsets_mapping=True
        )
        
        word_ids = encoding.word_ids(batch_index=0)
        encoding = {key: val.squeeze(0) for key, val in encoding.items()}
        
        aligned_labels = [-100] * self.max_length
        for i, word_idx in enumerate(word_ids):
            if word_idx is None:
                aligned_labels[i] = -100
            else:
                aligned_labels[i] = ner_tags[word_idx]

        encoding["labels"] = torch.tensor(aligned_labels, dtype=torch.long)
        del encoding["offset_mapping"]
        return encoding

# Initialize tokenizer and model (เหมือนเดิม)
tokenizer = RobertaTokenizerFast.from_pretrained("FacebookAI/roberta-large", add_prefix_space=True)
model = RobertaForTokenClassification.from_pretrained(
    "FacebookAI/roberta-large",
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

# Create datasets (เหมือนเดิม)
train_dataset = NERDataset(train_data, tokenizer)
validate_dataset = NERDataset(validate_data, tokenizer)

# Compute class weights (เหมือนเดิม)
all_labels = [label for item in train_data for label in item["ner_tags"] if label != -100]
valid_labels = [label for label in all_labels if label >= 0 and label < num_labels]
unique_labels = np.arange(num_labels)  # รวมทุก label ที่เป็นไปได้
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=unique_labels,
    y=np.array(valid_labels) if valid_labels else unique_labels  # ป้องกันกรณี valid_labels ว่าง
)
full_class_weights = np.ones(num_labels) * min(class_weights)  # ค่าเริ่มต้นสำหรับ label ที่ไม่มี
for i, label in enumerate(unique_labels):
    full_class_weights[label] = class_weights[i] if i < len(class_weights) else 1.0
class_weights = torch.tensor(full_class_weights, dtype=torch.float).to("cuda" if torch.cuda.is_available() else "cpu")
print(f"Class weights: {class_weights}")

# Custom Trainer with weighted loss (เหมือนเดิม)
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Compute metrics (ปรับเพิ่ม Confusion Matrix)
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=2)

    true_labels = [label for sublist in labels for label in sublist if label != -100]
    true_preds = [pred for sublist, label_sublist in zip(predictions, labels) 
                  for pred, label in zip(sublist, label_sublist) if label != -100]

    # กรอง label ที่เกินขอบเขต
    true_labels = [label for label in true_labels if label in id2label]
    true_preds = [pred for pred, true in zip(true_preds, true_labels) if pred in id2label]

    unique_eval_labels_all = np.unique(np.concatenate([np.unique(true_labels), np.unique(true_preds)]))
    eval_target_names_all = [id2label[i] for i in unique_eval_labels_all]
    report_all = classification_report(
        true_labels, true_preds, labels=unique_eval_labels_all, 
        target_names=eval_target_names_all, output_dict=True
    )

    # บันทึก classification report
    report_path = os.path.join(OUTPUT_DIR, "classification_report.json")
    with open(report_path, "w") as f:
        json.dump(report_all, f, indent=4)
    print(f"Classification report saved to {report_path}")

    # Confusion Matrix และส่วนอื่นๆ เหมือนเดิม
    cm = confusion_matrix(true_labels, true_preds, labels=unique_eval_labels_all)
    cm_df = pd.DataFrame(cm, index=eval_target_names_all, columns=eval_target_names_all)
    print("\n=== Confusion Matrix (Evaluation) ===")
    print(cm_df.to_string())

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.title("Confusion Matrix (Evaluation Dataset)")
    plt.xlabel("Predicted Labels")
    plt.ylabel("True Labels")
    plt.xticks(rotation=45, ha="right")
    plt.yticks(rotation=0)
    plt.tight_layout()
    cm_plot_path = os.path.join(OUTPUT_DIR, "confusion_matrix_eval.png")
    plt.savefig(cm_plot_path)
    print(f"Confusion Matrix saved to {cm_plot_path}")
    plt.close()

    entity_true = [label for label in true_labels if label != 0]
    entity_preds = [pred for pred, true in zip(true_preds, true_labels) if true != 0]
    entity_f1 = report_all["weighted avg"]["f1-score"] if not entity_true else \
                classification_report(entity_true, entity_preds, output_dict=True)["weighted avg"]["f1-score"]

    return {
        "precision": report_all["weighted avg"]["precision"],
        "recall": report_all["weighted avg"]["recall"],
        "f1": report_all["weighted avg"]["f1-score"],
        "entity_f1": entity_f1
    }

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=3,
    learning_rate=2e-5,  # ปรับขึ้นจาก 1e-5
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4,  # ลดลงถ้ามีทรัพยากรเพียงพอ
    num_train_epochs=15,  # ลดลงถ้าข้อมูลไม่เยอะ
    weight_decay=0.01,
    warmup_steps=100,  # เพิ่ม warmup
    logging_dir=os.path.join(OUTPUT_DIR, "logs"),
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",  # เน้น f1
    greater_is_better=True,
    fp16=True,
    report_to=["none"],
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validate_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=4)] # ลดจาก 5 เป็น 4
)

# Train the model (เหมือนเดิม)
trainer.train()

# Save the final model (เหมือนเดิม)
final_model_path = os.path.join(OUTPUT_DIR, "final_model")
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)
print(f"Training completed! Model saved to {final_model_path}")

# Evaluate and print results (เหมือนเดิม)
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

# Test the model with validation dataset (ไม่ใส่ Confusion Matrix ซ้ำ)
def test_model(trainer, dataset, num_examples=7):
    print("\n=== Testing Model ===")
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    test_indices = np.random.choice(len(dataset), num_examples, replace=False)
    
    for idx in test_indices:
        example = dataset[idx]
        tokens = dataset.data[idx]["tokens"]
        true_labels = dataset.data[idx]["ner_tags"]

        inputs = {key: val.unsqueeze(0).to(device) for key, val in example.items() if key != "labels"}
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=2).squeeze(0).cpu().numpy()

        word_ids = tokenizer(tokens, is_split_into_words=True).word_ids(batch_index=0)
        aligned_predictions = []
        prev_word_idx = None
        for i, word_idx in enumerate(word_ids):
            if word_idx is None:
                continue
            elif word_idx != prev_word_idx:
                aligned_predictions.append(predictions[i])
            prev_word_idx = word_idx

        aligned_predictions = aligned_predictions[:len(true_labels)]
        true_labels_str = [id2label.get(label, "INVALID") for label in true_labels]
        pred_labels_str = [id2label.get(pred, "INVALID") for pred in aligned_predictions]
        matching = ["✅" if t == p else "❌" for t, p in zip(true_labels, aligned_predictions)]

        table_data = {
            "Token": tokens,
            "True Label": true_labels_str,
            "Predicted Label": pred_labels_str,
            "Match": matching
        }
        df = pd.DataFrame(table_data)
        
        print(f"\nExample {idx}:")
        print(f"Sentence: {' '.join(tokens)}")
        print(df.to_string(index=False))

# Run the test
test_model(trainer, validate_dataset, num_examples=7)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at FacebookAI/roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Class weights: tensor([8.0687e-02, 8.9188e+00, 4.5564e+01, 4.7205e+00, 7.7627e+01, 1.4455e+01,
        1.1031e+02, 1.8548e+01, 1.7466e+02, 1.5759e+01, 6.7610e+01, 3.6137e+01,
        2.6199e+02], device='cuda:0')


  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Entity F1
1,No log,2.746453,0.914157,0.137585,0.236915,0.00396
2,No log,2.588488,0.921681,0.285603,0.43342,0.027789
3,No log,2.216008,0.934134,0.512128,0.650947,0.346239
4,No log,1.781068,0.9335,0.569379,0.687639,0.522621
5,No log,1.291706,0.941163,0.763693,0.82803,0.718906
6,2.135400,0.841891,0.947314,0.832551,0.873818,0.818932
7,2.135400,0.546586,0.947372,0.828247,0.868644,0.906218
8,2.135400,0.3819,0.953077,0.877282,0.902941,0.916333
9,2.135400,0.284825,0.953843,0.874804,0.901268,0.939218
10,2.135400,0.278006,0.957341,0.885237,0.909986,0.944191


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    1043                0               15                  0                  0                0                0                   44                 1482           1        4544               1               0
B-CLOUDPLATFORM         2                0                0                  0                  0                0                0                    1                   10           0          88               0               0
I-CLOUDPLATFORM         0                0                0                  0                  0                0                0                    0                    4    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    2168               20               63                  0                  0               34                0                  180                 1042           4        3616               2               1
B-CLOUDPLATFORM         5                0                0                  0                  0                0                0                    3                    3           0          90               0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    3    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    3752              444              117                134                 11              752                7                  388                  328         160         955              55              27
B-CLOUDPLATFORM         2               35                1                  0                  0               22                0                    2                    0          19          15               5               0
I-CLOUDPLATFORM         1               12                3                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    4082              506              125                519                 26              795                9                  250                   57         385         227             130              19
B-CLOUDPLATFORM         1               30                0                  3                  0               17                0                    0                    0          32           1              17               0
I-CLOUDPLATFORM         2                9                6                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    5474              230              143                332                 21              225               13                  199                   35         272         113              61              12
B-CLOUDPLATFORM         1               55                3                  1                  0                8                0                    1                    0          21           0              11               0
I-CLOUDPLATFORM         2                2               13                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    5950              174               95                237                  4              235                6                   78                   13         200          87              46               5
B-CLOUDPLATFORM         1               81                4                  0                  0                3                0                    1                    0           9           0               2               0
I-CLOUDPLATFORM         2                1               15                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    5870              228               80                309                  8              236               10                   69                   15         215          42              44               4
B-CLOUDPLATFORM         1               93                1                  0                  0                0                0                    1                    0           5           0               0               0
I-CLOUDPLATFORM         2                1               15                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6241              279               25                205                  1              106                5                   38                   10         160          20              38               2
B-CLOUDPLATFORM         1               95                0                  0                  0                0                0                    0                    0           5           0               0               0
I-CLOUDPLATFORM         2                1               15                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6208              147               69                228                  1              157                7                  105                   19         117          33              38               1
B-CLOUDPLATFORM         1               95                0                  0                  0                0                0                    0                    0           5           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6285               98               46                206                  1              136               12                  173                   17         102          40              13               1
B-CLOUDPLATFORM         0               95                0                  0                  0                0                0                    0                    1           5           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6316              151               90                153                  1              139                3                  148                   14          65          32              18               0
B-CLOUDPLATFORM         0               95                0                  0                  0                0                0                    0                    1           5           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6725               58               46                 84                  0               38                2                  119                    3          41          10               4               0
B-CLOUDPLATFORM         1               99                0                  0                  0                0                0                    0                    0           1           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6592               72               52                 76                  0              101                3                  128                   12          66          22               6               0
B-CLOUDPLATFORM         0               95                0                  0                  0                0                0                    0                    1           5           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6621               66               50                 71                  0               91                2                  125                   11          66          22               5               0
B-CLOUDPLATFORM         0               95                0                  0                  0                0                0                    0                    1           5           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training completed! Model saved to /kaggle/working/ner_roberta_results/final_model


Classification report saved to /kaggle/working/ner_roberta_results/classification_report.json

=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH  I-EMBEDDEDTECH
O                    6725               58               46                 84                  0               38                2                  119                    3          41          10               4               0
B-CLOUDPLATFORM         1               99                0                  0                  0                0                0                    0                    0           1           0               0               0
I-CLOUDPLATFORM         2                0               16                  0                  0                0                0                    0                    0    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Example 69:
Sentence: We’re looking for talent skilled in Ninja for next-gen devices , IBM DB2 for data excellence , and Clojure for robust coding , all on Linode , providing a collaborative space to develop technologies that solve complex challenges and drive progress in a team focused on technical mastery and innovation globally .
        Token        True Label     Predicted Label Match
        We’re                 O                   O     ✅
      looking                 O                   O     ✅
          for                 O                   O     ✅
       talent                 O                   O     ✅
      skilled                 O                   O     ✅
           in                 O                   O     ✅
        Ninja    B-EMBEDDEDTECH      B-EMBEDDEDTECH     ✅
          for                 O                   O     ✅
     next-gen                 O                   O     ✅
      devices                 O                   O     ✅
            ,             