In [None]:
%pip install -q --upgrade bitsandbytes trl
!wget -q https://raw.githubusercontent.com/aslam-naseer/js-complexity-model/master/notebooks/utils/evaluator.py -O evaluator.py

In [None]:
# imports

import re
from huggingface_hub import login
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed
from datasets import load_dataset
from peft import PeftModel


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Constants

IS_COLAB = False

MODEL = "Qwen/Qwen3-4B-Instruct-2507"
PROJECT_NAME = "complexity"
HF_USER = "aslam-naseer"

DATA_USER = "aslam-naseer"
DATASET_NAME = f"{DATA_USER}/js-function-complexity-processed"

RUN_NAME = "2026-01-08_05.47.09"

PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"


# Hyper-parameters - QLoRA

QUANT_4_BIT = True

if IS_COLAB:
  capability = torch.cuda.get_device_capability()
  use_bf16 = capability[0] >= 8

In [10]:
if IS_COLAB:
  from evaluator import evaluate
else:
  from utils.evaluator import evaluate

In [6]:
# Log in to HuggingFace
if IS_COLAB:
  from google.colab import userdata
  hf_token = userdata.get('HF_TOKEN')
else:
  import os
  hf_token = os.environ.get('HF_TOKEN')
  
login(hf_token, add_to_git_credential=True)

Token has not been saved to git credential helper.
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [None]:
dataset = load_dataset(DATASET_NAME)
test = dataset['test']

 'complexity': 9.1,
 'label': 'Critical',
 'parameter_count': 2,
 'statement_count': 44,
 'variable_count': 5,
 'max_nesting_depth': 11}

In [None]:
if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16 if use_bf16 else torch.float16,
    bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16 if use_bf16 else torch.float16,
  )

In [None]:
# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

# Load the fine-tuned model with PEFT
fine_tuned_model = PeftModel.from_pretrained(base_model, HUB_MODEL_NAME)


print(f"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB")

In [None]:
fine_tuned_model

In [None]:
def complexity( messages: list) -> float:

        set_seed(42)
        prompt_str = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        inputs = tokenizer(prompt_str, return_tensors="pt").to("cuda")

        with torch.no_grad():
            output_ids = fine_tuned_model.generate(
                **inputs, max_new_tokens=128, pad_token_id=tokenizer.eos_token_id
            )

        prompt_len = inputs["input_ids"].shape[1]
        generated_ids = output_ids[0, prompt_len:]
        full_response = tokenizer.decode(
            generated_ids, skip_special_tokens=True)

        match = re.search(
            r"Complexity Score:\s*(\d+(?:\.\d+)?)", full_response)

        if match:
            try:
                return float(match.group(1))
            except ValueError:
                print(f"Error converting '{match.group(1)}' to float")
                return 0.0
        else:
            print("Score pattern not found in response")
            return 0.0


In [None]:
SYSTEM_PROMPT = "You are a static analysis expert. Analyze the code metrics and calculate the cyclomatic complexity."


def get_messages(code: str) -> list[dict[str, str]]:

    return [
        {
            "role": "system",
            "content": SYSTEM_PROMPT
        },    {
            "role": "user",
            "content": code
        }
    ]


In [None]:
def model_predict(item):
    messages = get_messages(item['code'])
    score = complexity(messages)
    return score

In [None]:
set_seed(42)
evaluate(model_predict, test)

### Eval results from colab

In [11]:
results = [7.5, 8.3, 3.9, 9.3, 2.9, 1.2, 3.5, 6.5, 6.5, 4.3, 6.5, 3.6, 7.5, 4.5, 4.3, 7.5, 9.5, 5.0, 5.5, 7.3, 2.6, 7.0, 7.0, 5.6, 4.6, 1.2, 6.5, 7.0, 5.3, 5.5, 3.6, 2.6, 4.6, 2.6, 5.6, 2.6, 6.5, 8.3, 4.5, 6.3, 7.5, 2.6, 3.6, 4.5, 6.3, 2.9, 4.5, 3.6, 8.3, 1.2, 8.3, 5.3, 9.3, 5.5, 6.3, 5.5, 4.6, 8.3, 7.0, 4.5, 5.6, 6.5, 3.6, 3.5, 1.2, 6.3, 5.6, 6.3, 6.5, 5.3, 5.6, 3.6, 6.3, 6.6, 1.2, 2.6, 6.5, 3.6, 3.6, 2.6, 5.6, 2.6, 2.6, 8.3, 7.5, 4.6, 5.6, 2.6, 6.5, 8.5, 1.2, 5.6, 6.5, 6.3, 3.3, 2.9, 7.0, 8.3, 7.5, 7.0, 7.5, 3.5, 1.2, 6.5, 4.6, 6.5, 8.5, 2.6, 4.6, 4.5, 4.6, 3.6, 6.3, 4.6, 5.3, 3.6, 5.5, 6.5, 1.2, 5.6, 1.2, 5.6, 5.5, 2.6, 4.6, 2.6, 8.3, 4.3, 5.6, 4.6, 4.5, 6.3, 5.6, 4.5, 7.0, 5.6, 9.5, 3.5, 5.6, 8.3, 6.5, 5.5, 7.0, 4.5, 3.6, 7.0, 5.6, 5.6, 3.3, 6.5, 5.6, 2.6, 4.6, 6.5, 6.6, 3.3, 5.3, 2.6, 5.0, 6.6, 1.2, 3.9, 7.5, 6.5, 2.6, 6.5, 7.0, 3.6, 4.3, 7.0, 3.6, 8.5, 2.6, 5.6, 2.6, 1.2, 2.3, 7.0, 2.6, 4.5, 3.6, 5.5, 3.6, 2.3, 2.9, 7.5, 3.6, 4.5, 3.6, 6.0, 2.3, 5.5, 4.6, 8.3, 6.6, 2.6, 6.6, 3.6, 6.5, 8.3, 3.6, 8.3, 4.5, 1.2, 4.6, 4.3, 6.0, 3.6, 9.3, 6.6, 2.6, 5.3, 4.3, 5.6, 6.7, 5.5, 9.3, 3.6, 6.0, 6.5, 7.5, 5.6, 5.3, 7.0, 2.9, 1.2, 3.5, 6.6, 6.6, 4.5, 2.6, 7.0, 5.6, 8.3, 3.6, 7.0, 7.0, 8.5, 5.3, 3.6, 4.6, 5.3, 2.6, 2.9, 5.5, 3.6, 6.7, 6.5, 4.6, 6.7, 6.5, 4.6, 3.6, 3.6, 6.6, 6.5, 5.6, 1.2, 7.0, 6.0, 6.0, 6.5, 6.5, 4.3, 5.3, 1.2, 4.3, 6.5, 6.5, 3.6, 3.6, 6.6, 3.6, 6.0, 6.0, 3.6, 6.0, 6.6, 8.3, 4.6, 5.6, 3.6, 3.6, 1.2, 1.2, 5.5, 2.9, 8.5, 2.6, 8.5, 3.6, 4.3, 6.6, 6.5, 2.6, 2.6, 5.5, 2.6, 3.3, 7.3, 4.3, 1.2, 5.3, 9.5, 5.3, 2.6, 4.5, 6.5, 6.5, 3.9, 4.5, 8.3, 6.5, 3.9, 4.6, 2.6, 4.5, 3.6, 4.5, 8.3, 5.6, 4.6, 2.6, 5.0, 6.0, 1.2, 4.6, 2.6, 4.6, 1.2, 3.9, 2.6, 6.5, 5.6, 8.3, 3.6, 8.3, 6.3, 5.5, 1.2, 7.5, 6.5, 4.5, 4.3, 5.3, 5.5, 5.5, 6.6, 6.5, 4.6, 7.5, 2.6, 6.5, 6.5, 6.3, 4.5, 6.7, 6.6, 6.6, 5.5, 3.5, 4.6, 2.6, 4.6, 6.6, 3.6, 5.6, 8.3, 3.6, 5.6, 5.6, 8.3, 3.6, 6.6, 4.5, 6.5, 5.3, 4.6, 6.6, 2.6, 5.6, 4.6, 2.6, 2.9, 5.6, 2.6, 5.6, 4.6, 5.3, 5.3, 2.6, 4.5, 7.0, 1.2, 3.6, 8.3, 4.6, 6.5, 6.5, 4.3, 5.3, 6.5, 2.6, 6.6, 3.6, 7.0, 5.6, 6.6, 5.6, 7.0, 1.2, 3.6, 4.3, 3.6, 1.2, 4.5, 6.5, 1.2, 6.5, 5.3, 6.6, 3.6, 7.0, 9.3, 3.6, 6.5, 5.6, 6.6, 1.2, 5.6, 6.0, 3.6, 8.3, 5.6, 7.5, 3.6, 5.6, 7.0, 4.6, 2.6, 6.6, 5.5, 3.6, 8.3, 3.6, 3.3, 6.5, 5.6, 6.7, 5.6, 2.3, 2.9, 6.6, 4.3, 4.3, 4.5, 6.3, 2.6, 6.5, 2.3, 6.0, 3.6, 3.6, 1.2, 3.6, 6.6, 4.5, 7.0, 8.3, 6.5, 1.2, 6.6, 6.6, 4.3, 2.6, 5.6, 2.6, 6.5, 5.3, 7.5, 5.5, 5.3, 5.6, 2.9, 6.6, 4.5, 2.9, 1.2, 8.0, 6.5, 6.6, 7.0, 5.3, 6.5, 2.6, 6.6, 5.6, 2.6, 6.5, 4.5]

counter = 0
def colab_result(item):
    global counter
    score = results[counter]
    counter += 1
    return score

evaluate(colab_result, test)


100%|██████████| 500/500 [00:00<00:00, 18692.19it/s]


[93m1.6 [91m2.0 [92m0.3 [92m0.7 [92m0.2 [92m0.0 [92m0.3 [92m0.5 [92m0.3 [93m0.5 [93m1.0 [93m0.8 [92m0.7 [93m0.5 [92m0.3 [91m6.4 [91m8.3 [93m1.0 [92m0.3 [92m0.1 [92m0.2 [92m0.6 [93m1.1 [92m0.6 [92m0.5 [92m0.0 [93m1.0 [93m1.4 [92m0.3 [92m0.2 [93m0.7 [91m1.5 [91m3.5 [92m0.4 [92m0.3 [93m0.8 [93m1.0 [93m1.3 [91m3.3 [92m0.1 [93m1.1 [91m1.5 [92m0.1 [92m0.3 [93m0.8 [92m0.1 [93m0.5 [92m0.2 [92m0.1 [92m0.0 [92m0.5 [92m0.3 [92m0.1 [92m0.0 [92m0.3 [93m0.8 [93m0.9 [92m0.3 [92m0.1 [93m0.5 [92m0.5 [93m1.0 [92m0.2 [92m0.1 [92m0.0 [93m0.9 [92m0.2 [91m1.1 [92m0.2 [92m0.1 [93m0.7 [92m0.4 [92m0.1 [93m1.0 [93m0.8 [92m0.3 [92m0.6 [92m0.3 [92m0.2 [91m1.8 [93m0.8 [92m0.4 [91m1.6 [92m0.8 [92m0.2 [92m0.2 [92m0.2 [92m0.2 [91m1.3 [92m0.9 [91m3.0 [93m0.8 [92m0.4 [92m0.1 [93m0.5 [91m1.7 [92m0.4 [92m0.1 [93m0.7 [92m0.3 [92m0.1 [92m0.2 [91m1.7 [92m0.1 [92m0.1 [93m1.3 [93m1.2 [91m1.5 [92m0.4 [92m0.2 [92m0.1 