# Bixie Model Training & Benchmarking

This notebook demonstrates how to train and benchmark the Bixie vulnerability classifier using code embeddings from the SAFEEmbedder model.

In [2]:
import sys
import os

# Get the absolute path of the directory containing the current script
current_dir = os.path.dirname(os.path.abspath("."))
print(f"Current directory: {current_dir}")
# Assuming the root is one level up from the script's directory (adjust as needed)
# project_root = os.path.join(current_dir, '..') 
print(type(sys.path))
print(f"Current sys.path: {sys.path}")

# Add the project root to sys.path
# sys.path.insert(0, current_dir)
sys.path.insert(0,"/home/trashpanda/repos/bixie.ai/")
# print(f"Updated sys.path: {sys.path}")
# os.environ.setdefault("PYTHONPATH","/home/trashpanda/repos/bixie.ai/")

Current directory: /home/trashpanda/repos/bixie.ai
<class 'list'>
Current sys.path: ['/home/trashpanda/repos/bixie.ai/', '/usr/lib/python313.zip', '/usr/lib/python3.13', '/usr/lib/python3.13/lib-dynload', '', '/home/trashpanda/repos/bixie.ai/lib/python3.13/site-packages']


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, accuracy_score, f1_score
import joblib

from bixie.models.model_inference import SAFEEmbedder, CLASSIFIER_PATH
import json
import random
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import seaborn as sns


## 1. Load Labeled Samples

Assumes files are labeled by 'good' (safe) or 'bad' (vulnerable) in their filenames.

In [None]:


# with open("../data/bixie_V.json") as f:
#     vuln = json.load(f)
#     for v in vuln:
#         v["label"] = 1

# with open("../data/bixie_V.json") as f:
#     clean = json.load(f)
#     for c in clean:
#         c["label"] = 0
# data = vuln + clean
# random.shuffle(data)

# formatted = [
#     {
#         "id": f"{i}",
#         "project": item["project"],
#         "code": item["code"],
#         "label": item["label"]
#     }
#     for i, item in enumerate(data)
# ]

# with open("../datasets/training_data.json", "w") as f:
#     json.dump(formatted, f, indent=2)


In [5]:


#extract fields from training data

with open("../datasets/training_data.json") as f:
    data = json.load(f)

texts = [item["code"] for item in data]
labels = [item["label"] for item in data]
# --- Step 2: Split Data ---
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

In [6]:

#tokenization and dataset class

tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")

class CodeDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=512):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

train_dataset = CodeDataset(train_texts, train_labels, tokenizer)
val_dataset = CodeDataset(val_texts, val_labels, tokenizer)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

In [7]:

# --- Load Model & Define Trainer ---
model = RobertaForSequenceClassification.from_pretrained("microsoft/codebert-base", num_labels=2)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()




pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'

In [None]:
# ----- 7. Evaluate the Model -----
eval_result = trainer.evaluate()
print("\nEval Result:", eval_result) 

In [None]:

# --- Evaluation ---
preds = trainer.predict(val_dataset)
pred_labels = preds.predictions.argmax(axis=1)

print(classification_report(val_labels, pred_labels, digits=4))

plt.figure(figsize=(5, 4))
sns.heatmap(confusion_matrix(val_labels, pred_labels), annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# --- Training Loss Plot ---
df = pd.DataFrame(trainer.state.log_history)
df = df[df["loss"].notna()]

plt.figure(figsize=(10, 5))
plt.plot(df["step"], df["loss"], label="Training Loss")
plt.xlabel("Step")
plt.ylabel("Loss")
plt.title("Training Loss over Time")
plt.legend()
plt.grid(True)
plt.show()


In [None]:

# --- Step 7: CLS Embeddings + t-SNE ---
def get_cls_embeddings(texts, tokenizer, model):
    model.eval()
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            outputs = model.roberta(**inputs)
            cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().cpu()
            embeddings.append(cls_embedding.numpy())
    return np.array(embeddings)

embeddings = get_cls_embeddings(val_texts, tokenizer, model)

tsne = TSNE(n_components=2, perplexity=30, random_state=42)
reduced = tsne.fit_transform(embeddings)

plt.figure(figsize=(8, 6))
plt.scatter(reduced[:, 0], reduced[:, 1], c=val_labels, cmap="coolwarm", alpha=0.7)
plt.colorbar(label="Label (0 = clean, 1 = vuln)")
plt.title("t-SNE of CodeBERT CLS Embeddings")
plt.grid(True)
plt.show()

## 6. Evaluation

In [None]:
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=["Good", "Vulnerable"])
print(f"Accuracy: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(report)

## 7. Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred, labels=[0, 1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Good", "Vulnerable"])
disp.plot(cmap="Blues")
plt.title("Confusion Matrix")
plt.show()

## 8. Save Trained Classifier (Optional)

In [None]:
# Save the trained classifier for use in inference
joblib.dump(clf, CLASSIFIER_PATH)
print(f"Trained classifier saved to {CLASSIFIER_PATH}")