In [None]:
import json
import os
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizerFast, RobertaForTokenClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from typing import List, Dict, Tuple
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Define input/output paths for Kaggle (เหมือนเดิม)
INPUT_DIR = "/kaggle/input/techstack-ner-dataset-noaugmentate"
OUTPUT_DIR = "/kaggle/working/ner_roberta_results"
TRAIN_DATA_PATH = os.path.join(INPUT_DIR, "train_data.json")
VALIDATE_DATA_PATH = os.path.join(INPUT_DIR, "validate_data.json")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load dataset (เหมือนเดิม)
def load_data(file_path: str) -> List[Dict]:
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

train_data = load_data(TRAIN_DATA_PATH)
validate_data = load_data(VALIDATE_DATA_PATH)

# Define label mapping (เหมือนเดิม)
id2label = {
    0: "O",
    1: "B-CLOUDPLATFORM",
    2: "I-CLOUDPLATFORM",
    3: "B-PROGRAMMINGLANG",
    4: "I-PROGRAMMINGLANG",
    5: "B-FRAMEWORK_LIB",
    6: "I-FRAMEWORK_LIB",
    7: "B-WEBFRAMEWORK_TECH",
    8: "I-WEBFRAMEWORK_TECH",
    9: "B-DATABASE",
    10: "I-DATABASE",
    11: "B-EMBEDDEDTECH",
    12: "I-EMBEDDEDTECH"
}
label2id = {v: k for k, v in id2label.items()}
num_labels = len(id2label)

# Custom Dataset class (เหมือนเดิม)
class NERDataset(Dataset):
    def __init__(self, data: List[Dict], tokenizer, max_length: int = 512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        item = self.data[idx]
        tokens = item["tokens"]
        ner_tags = item["ner_tags"]

        encoding = self.tokenizer(
            tokens,
            is_split_into_words=True,
            return_tensors="pt",
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_offsets_mapping=True
        )
        
        word_ids = encoding.word_ids(batch_index=0)
        encoding = {key: val.squeeze(0) for key, val in encoding.items()}
        
        aligned_labels = [-100] * self.max_length
        for i, word_idx in enumerate(word_ids):
            if word_idx is None:
                aligned_labels[i] = -100
            else:
                aligned_labels[i] = ner_tags[word_idx]

        encoding["labels"] = torch.tensor(aligned_labels, dtype=torch.long)
        del encoding["offset_mapping"]
        return encoding

# Initialize tokenizer and model (เหมือนเดิม)
tokenizer = RobertaTokenizerFast.from_pretrained("FacebookAI/roberta-large", add_prefix_space=True)
model = RobertaForTokenClassification.from_pretrained(
    "FacebookAI/roberta-large",
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

# Create datasets (เหมือนเดิม)
train_dataset = NERDataset(train_data, tokenizer)
validate_dataset = NERDataset(validate_data, tokenizer)

# Compute class weights (เหมือนเดิม)
all_labels = [label for item in train_data for label in item["ner_tags"] if label != -100]
valid_labels = [label for label in all_labels if label >= 0 and label < num_labels]
unique_labels = np.unique(valid_labels)
print(f"Unique labels in training data: {unique_labels}")

class_weights = compute_class_weight(class_weight="balanced", classes=unique_labels, y=valid_labels)
full_class_weights = np.ones(num_labels)
for i, label in enumerate(unique_labels):
    full_class_weights[label] = class_weights[i]
class_weights = torch.tensor(full_class_weights, dtype=torch.float).to("cuda" if torch.cuda.is_available() else "cpu")
print(f"Class weights: {class_weights}")

# Custom Trainer with weighted loss (เหมือนเดิม)
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits.view(-1, num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Training arguments (เหมือนเดิม)
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    save_steps=500,
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=8,
    num_train_epochs=30,
    weight_decay=0.01,
    logging_dir=os.path.join(OUTPUT_DIR, "logs"),
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    fp16=True,
    report_to=["none"],
)

# Compute metrics (ปรับเพิ่ม Confusion Matrix)
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=2)
    
    true_labels = [label for sublist in labels for label in sublist if label != -100]
    true_preds = [pred for sublist, label_sublist in zip(predictions, labels) 
                  for pred, label in zip(sublist, label_sublist) if label != -100]
    
    # Metrics for all labels (including "O")
    unique_eval_labels_all = np.unique(np.concatenate([np.unique(true_labels), np.unique(true_preds)]))
    eval_target_names_all = [id2label[i] for i in unique_eval_labels_all]
    report_all = classification_report(
        true_labels,
        true_preds,
        labels=unique_eval_labels_all,
        target_names=eval_target_names_all,
        output_dict=True
    )
    
    # Metrics for entities only (excluding "O")
    entity_true = [label for label in true_labels if label != 0]
    entity_preds = [pred for pred, true in zip(true_preds, true_labels) if true != 0]
    if entity_true:
        unique_eval_labels_entity = np.unique(np.concatenate([np.unique(entity_true), np.unique(entity_preds)]))
        eval_target_names_entity = [id2label[i] for i in unique_eval_labels_entity]
        report_entity = classification_report(
            entity_true,
            entity_preds,
            labels=unique_eval_labels_entity,
            target_names=eval_target_names_entity,
            output_dict=True
        )
        entity_f1 = report_entity["weighted avg"]["f1-score"]
    else:
        entity_f1 = 0.0
    
    # Compute Confusion Matrix
    cm = confusion_matrix(true_labels, true_preds, labels=unique_eval_labels_all)
    cm_df = pd.DataFrame(cm, index=eval_target_names_all, columns=eval_target_names_all)
    print("\n=== Confusion Matrix (Evaluation) ===")
    print(cm_df.to_string())
    
    # Visualize Confusion Matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.title("Confusion Matrix (Evaluation Dataset)")
    plt.xlabel("Predicted Labels")
    plt.ylabel("True Labels")
    plt.xticks(rotation=45, ha="right")
    plt.yticks(rotation=0)
    plt.tight_layout()
    cm_plot_path = os.path.join(OUTPUT_DIR, "confusion_matrix_eval.png")
    plt.savefig(cm_plot_path)
    print(f"Confusion Matrix saved to {cm_plot_path}")
    plt.close()

    return {
        "precision": report_all["weighted avg"]["precision"],
        "recall": report_all["weighted avg"]["recall"],
        "f1": report_all["weighted avg"]["f1-score"],
        "entity_f1": entity_f1
    }

# Initialize trainer (เหมือนเดิม)
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validate_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
)

# Train the model (เหมือนเดิม)
trainer.train()

# Save the final model (เหมือนเดิม)
final_model_path = os.path.join(OUTPUT_DIR, "final_model")
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)
print(f"Training completed! Model saved to {final_model_path}")

# Evaluate and print results (เหมือนเดิม)
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

# Test the model with validation dataset (ไม่ใส่ Confusion Matrix ซ้ำ)
def test_model(trainer, dataset, num_examples=7):
    print("\n=== Testing Model ===")
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    test_indices = np.random.choice(len(dataset), num_examples, replace=False)
    
    for idx in test_indices:
        example = dataset[idx]
        tokens = dataset.data[idx]["tokens"]
        true_labels = dataset.data[idx]["ner_tags"]

        inputs = {key: val.unsqueeze(0).to(device) for key, val in example.items() if key != "labels"}
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=2).squeeze(0).cpu().numpy()

        word_ids = tokenizer(tokens, is_split_into_words=True).word_ids(batch_index=0)
        aligned_predictions = []
        prev_word_idx = None
        for i, word_idx in enumerate(word_ids):
            if word_idx is None:
                continue
            elif word_idx != prev_word_idx:
                aligned_predictions.append(predictions[i])
            prev_word_idx = word_idx

        aligned_predictions = aligned_predictions[:len(true_labels)]
        true_labels_str = [id2label.get(label, "INVALID") for label in true_labels]
        pred_labels_str = [id2label.get(pred, "INVALID") for pred in aligned_predictions]
        matching = ["✅" if t == p else "❌" for t, p in zip(true_labels, aligned_predictions)]

        table_data = {
            "Token": tokens,
            "True Label": true_labels_str,
            "Predicted Label": pred_labels_str,
            "Match": matching
        }
        df = pd.DataFrame(table_data)
        
        print(f"\nExample {idx}:")
        print(f"Sentence: {' '.join(tokens)}")
        print(df.to_string(index=False))

# Run the test
test_model(trainer, validate_dataset, num_examples=7)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at FacebookAI/roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Unique labels in training data: [ 0  1  2  3  4  5  6  7  8  9 10]
Class weights: tensor([9.4370e-02, 1.5259e+01, 1.2644e+02, 5.0865e+00, 8.4290e+01, 2.1073e+01,
        2.5287e+02, 4.2145e+01, 5.9003e+02, 2.5287e+01, 1.9668e+02, 1.0000e+00,
        1.0000e+00], device='cuda:0')


  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Entity F1
1,No log,2.291178,0.941121,0.059066,0.094603,0.091473
2,No log,1.940356,0.952519,0.18228,0.281537,0.358023
3,No log,1.676772,0.953128,0.213242,0.324115,0.357447
4,No log,1.453957,0.954044,0.30994,0.436101,0.663522
5,No log,1.313411,0.955943,0.446809,0.57933,0.814398
6,No log,1.225739,0.956992,0.576056,0.696103,0.846918
7,No log,1.16147,0.958865,0.709749,0.799008,0.827532
8,No log,1.10223,0.962295,0.819625,0.874445,0.843419
9,No log,1.074451,0.966341,0.874405,0.911035,0.85227
10,No log,1.038309,0.967825,0.884725,0.917899,0.861356


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                       O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    311                0                0                165                 26             5244               46                   36                    0           6         166               0
B-CLOUDPLATFORM        0                0                0                  0                  0               20                0                    0                    0           0           0               0
I-CLOUDPLATFORM        0                0                0                  0                  0                1                0                    0                    0           0           0               0
B-PROGRAMMINGLANG      3                0                0                  1                  1             

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    1028               25               25                480                112             3218              294                  230                    3         505          80               0
B-CLOUDPLATFORM         0                0                0                  0                  0               12                3                    3                    0           2           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       2                0                0                 18                  1        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    1220              171              128                604                126             1099              282                  455                   13        1868          34               0
B-CLOUDPLATFORM         0                3                2                  0                  0                2                0                    0                    0          13           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       1                0                0                 15                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    1758              507              134                908                118              932              258                  402                   17         937          29               0
B-CLOUDPLATFORM         0               15                1                  0                  0                2                0                    0                    0           2           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       2                0                0                 64                  1        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    2576              607               40                970                 52             1065              185                  209                    7         267          22               0
B-CLOUDPLATFORM         0               18                0                  0                  0                2                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       2                0                0                100                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    3381              537               22                757                 20              828              143                   86                    5         204          17               0
B-CLOUDPLATFORM         0               18                0                  0                  0                2                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       4                0                0                108                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    4228              251               18                529                  4              441              113                   57                    3         351           5               0
B-CLOUDPLATFORM         0               18                0                  0                  0                2                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       4                0                0                110                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    4918              101               18                239                  4              313               83                   40                    2         273           9               0
B-CLOUDPLATFORM         1               18                0                  0                  0                1                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       4                0                0                111                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5264               65               18                115                  4              252               69                   34                    0         163          16               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                110                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5326               75               20                 91                  5              246               58                   35                    0         133          11               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       5                0                0                110                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5351              100               21                 86                  5              219               45                   35                    0         133           5               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       5                0                0                110                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5369              105               18                105                  4              197               24                   29                    1         145           3               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       7                0                0                113                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5368              100               18                104                  4              201               15                   34                    1         150           5               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       7                0                0                112                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5390              104               19                101                  5              179               16                   38                    1         143           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       5                0                0                113                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5406              102               16                110                  5              159               16                   43                    1         138           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       5                0                0                114                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5425              105               16                108                  4              134               18                   49                    1         135           5               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       5                0                0                114                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5439              108               15                103                  4              125               16                   49                    1         135           5               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                114                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5455              111               13                 98                  5              125               15                   47                    0         127           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5461              103               12                 96                  5              132               16                   42                    0         129           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5471               97               12                 98                  5              129               16                   40                    0         128           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5477               93               13                 99                  5              124               17                   41                    0         127           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5470               92               13                103                  5              122               18                   46                    0         127           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5470               92               13                103                  5              122               18                   46                    0         127           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



=== Confusion Matrix (Evaluation) ===
                        O  B-CLOUDPLATFORM  I-CLOUDPLATFORM  B-PROGRAMMINGLANG  I-PROGRAMMINGLANG  B-FRAMEWORK_LIB  I-FRAMEWORK_LIB  B-WEBFRAMEWORK_TECH  I-WEBFRAMEWORK_TECH  B-DATABASE  I-DATABASE  B-EMBEDDEDTECH
O                    5477               93               13                 99                  5              124               17                   41                    0         127           4               0
B-CLOUDPLATFORM         2               18                0                  0                  0                0                0                    0                    0           0           0               0
I-CLOUDPLATFORM         0                0                1                  0                  0                0                0                    0                    0           0           0               0
B-PROGRAMMINGLANG       6                0                0                115                  0        