<!-- ## Install dependencies -->

In [1]:


# For Notebook:
# %pip install ollama
# %pip install openai
# %pip install anthropic
# %pip install transformers
# %pip install tiktoken
# %pip install python-dotenv
# %pip install colorama

# For packages: 
# %pip install scikit-learn
# %pip install matplotlib
# %pip install numpy
# %pip install pandas
# %pip install iterative-stratification
# %pip install langchain
# %pip install "langsmith<0.2.0"

# For WebUI testing:
# %pip install open-webui

%load_ext autoreload


# Static splits

In [2]:
import sys
import os

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

sys.path.append('../../../')  # Add the path to the my_packages module
os.environ['EXPERIMENT_DB_NAME'] = "syncode_experiments"
os.environ['HF_CACHE'] = "/cluster/work/projects/ec12/ec-sindrre/hf-models"
os.environ['SYNCODE_CACHE'] = "/cluster/work/projects/ec12/ec-sindrre/syncode"

from my_packages.prompting.prompt_building import transform_code_data
from my_packages.utils.file_utils import read_dataset_to_json

main_dataset_folder = '../../../data/MBPP_Midio_50/'

def get_hold_out_splits(main_dataset_folder):
    train_data = transform_code_data(read_dataset_to_json(main_dataset_folder + 'splits/hold_out/train_dataset.json'))
    val_data = transform_code_data(read_dataset_to_json(main_dataset_folder + 'splits/hold_out/validation_dataset.json'))
    test_data = transform_code_data(read_dataset_to_json(main_dataset_folder + 'splits/hold_out/test_dataset.json'))

    print(f"Train data: {len(train_data)}")
    print(f"Val data: {len(val_data)}")
    print(f"Test data: {len(test_data)}")
    return train_data, val_data, test_data

# Use the static validation split for hyperparmeter tuning:
_, val_data, _ = get_hold_out_splits(main_dataset_folder=main_dataset_folder)


Script is located in: /fp/homes01/u01/ec-sindrre/Thesis_project/my_packages/common
Project is located in: /fp/homes01/u01/ec-sindrre/Thesis_project
Train data: 32
Val data: 9
Test data: 9


# 3_fold splits

In [3]:

import os
from my_packages.data_processing.split_dataset import create_kfold_splits

main_dataset_folder = '../../../data/MBPP_Midio_50/'
run_k_fold = 0 # 0-2, because we have 3 folds

def get_k_fold_splits(main_dataset_folder, k, k_folds=3):

    if os.path.exists(main_dataset_folder + f'splits/{k_folds}_fold'):
        print(f"Using existing {k_folds}-fold splits")
    else:
        print(f"Creating {k_folds}-fold splits")
        train, _ ,test = get_hold_out_splits(main_dataset_folder)
        # Only use train+train for k-fold splits
        create_kfold_splits((train+test), k_folds=k_folds, write_to_file=True)

    train_data = read_dataset_to_json(main_dataset_folder + f'splits/{k_folds}_fold/train_dataset_{k}.json')
    test_data = read_dataset_to_json(main_dataset_folder + f'splits/{k_folds}_fold/test_dataset_{k}.json')

    print(f"Train data: {len(train_data)}")
    print(f"Test data: {len(test_data)}")
    return train_data, test_data

# Use the K_fold split for testing:
train_data, test_data = get_k_fold_splits(
    main_dataset_folder=main_dataset_folder, 
    k=run_k_fold, 
    k_folds=3
)
print(f"Chosen fold: {run_k_fold}")



Using existing 3-fold splits
Train data: 27
Test data: 14
Chosen fold: 0


# Model configurations

In [4]:
import sys
from dotenv import load_dotenv
from langchain_ollama import OllamaEmbeddings
script_dir = os.getcwd() 
project_dir = os.path.abspath(f"{script_dir}/../../..")
experiment_dir = os.path.abspath(f"{script_dir}/..")
env_path = os.path.abspath(f"{project_dir}/../.env")
load_dotenv(env_path)

sys.path.append(project_dir)
print("Script is located in:", script_dir)
print("Project is located in:", project_dir)
print("Env is located in:", env_path)

from my_packages.common.config import model_configs

model_provider = 'ollama'
all_responses = [sample["response"] for sample in train_data] + [sample["response"] for sample in val_data] + [sample["response"] for sample in test_data]
print(f"Number of all responses: {len(all_responses)}")

client, models = model_configs(
    all_responses=all_responses,
    model_provider=model_provider,
    models=["phi4:14b-fp16"],
    ollama_port= "11434"
)





Script is located in: /fp/homes01/u01/ec-sindrre/Thesis_project/experiments/SynCode/fox
Project is located in: /fp/homes01/u01/ec-sindrre/Thesis_project
Env is located in: /fp/homes01/u01/ec-sindrre/.env
Script is located in: /fp/homes01/u01/ec-sindrre/Thesis_project/my_packages/common
Project is located in: /fp/homes01/u01/ec-sindrre/Thesis_project
Env is located in: /fp/homes01/u01/ec-sindrre/Thesis_project/.env
Number of all responses: 50
Url: http://localhost:11434/api/tags
Response: <Response [200]>
Server is reachable.


# Task configurations

<!-- ## Experiments settings -->

In [5]:
import json
import sys
sys.path.append('../../')  # Add the path to the my_packages module
from my_packages.common.classes import PromptType
from my_packages.data_processing.attributes_processing import used_functions_to_string
from my_packages.utils.file_utils import read_dataset_to_json


# prompt_prefix = "Create a function" # "e.g., "Create a flow"
NUM_SHOTS = 5
semantic_selector = False

dataset = read_dataset_to_json(main_dataset_folder + "MBPP-Midio-50.json")
script_dir = os.getcwd()
project_dir = os.path.abspath(f"{script_dir}/../../../")
dataset_nodes = read_dataset_to_json(main_dataset_folder + "/metadata/used_external_functions.json")
print(f"Number of nodes in datset: {len(dataset_nodes)}")

all_nodes = read_dataset_to_json( f"{project_dir}/data/all_library_nodes.json") # All nodes
print(f"Number all nodes: {len(all_nodes)}")


Number of nodes in datset: 51
Number all nodes: 290


# Init Example selector

In [6]:
import sys
sys.path.append('../../')  # Add the path to the my_packages module
from my_packages.prompting.example_selectors import get_coverage_example_selector, get_semantic_similarity_example_selector

example_pool = train_data
example_pool.sort(key=lambda x: int(x['task_id']))
print(f"Number of examples in the pool: {len(example_pool)}")

if semantic_selector:
    selector = get_semantic_similarity_example_selector(
        example_pool, 
        OllamaEmbeddings(model="nomic-embed-text"),
        shots=NUM_SHOTS,
        input_keys=["task"],
    )
else:
    selector = get_coverage_example_selector(
        example_pool, 
        label = "external_functions",
        shots=NUM_SHOTS,
        seed=9
    )
used_ids = []
for task in train_data:
    examples = selector.select_examples({"task": task["task"]})
    used_ids += [e['task_id'] for e in examples]
    print(f"Task: {task['task']}")


Number of examples in the pool: 27
Task: Create a function to find the smallest number in a list.
Task: Create a function to find the kth element in the given array using 1-based indexing.
Task: Create a function to multiply two numbers.
Task: Create a function to find the minimum of three numbers.
Task: Create a function that takes two lists and returns true if they have at least one common element.
Task: Create a function to find the last digit of a given number.
Task: Create a function to check whether the given list contains consecutive numbers or not.
Task: Create a function to check whether a list is a sublist of another list.
Task: Create a function to find the difference between largest and smallest value in a given list.
Task: Create a function to find the difference of the first even and first odd number of a given list.
Task: Create a function to find sum and average of first n natural numbers.
Task: Create a function to check whether the given number is even or not.
Task: C

## Evaluation

In [7]:
import sys
sys.path.append('../../')  # Add the path to the my_packages module
%autoreload 2
%reload_ext autoreload


from my_packages.evaluation.code_evaluation import evaluate_code, run_model, run_validation, run_testing
from my_packages.utils.tokens_utils import get_model_code_tokens_from_file
from colorama import Fore, Back, Style
import os

script_dir = os.getcwd()
project_dir = os.path.abspath(f"{script_dir}/../../..")

sys.path.append(project_dir)
print("Script is located in:", script_dir)
print("Project is located in:", project_dir)

metrics = ['syntax', 'semantic'] #['syntax', 'semantic', 'tests'] OR ['syntax', 'semantic']
prompt_type = PromptType.REGULAR
example_selector = f"{'semantic' if semantic_selector else 'coverage'}"
experiment_name = f"{prompt_type.value}_syncode_{NUM_SHOTS}_shot"

results = {}

for model_name in models:
    print("\n\n")
    model = get_model_code_tokens_from_file(model_name, f'{project_dir}/data/max_tokens.json')
    results[model_name] = (None, None)

    model_result, largest_context = run_model(
        client,
        model_name,
        dataset_nodes=dataset_nodes,
        all_nodes=all_nodes,
        data=test_data[3:],
        example_pool=selector,
        max_new_tokens = model["max_tokens"],
        temperature = 0.9,
        top_p = 0.9,
        top_k = 50, #100
        n = 5,
        seed = 75,
        debug =True, 
        prompt_type=prompt_type,
        rag_data=None,
        max_ctx=16000,
        constrained_output=True
    )

    metric_results_lists = evaluate_code (
        model_result,
        ks=[1],
        evaluation_metrics=["syntax"],
        experiment_name=experiment_name,
        model_name=model_name,
        env="no",
        hyperparams={"seed": 9, "temperature": 0.2, "top_p": 0.2, "top_k": 10},
        phase="validation"
    )
    ## Optimizing for the first k in the ks list
    pass_at_k_dict = metric_results_lists[0]
    val_metric = pass_at_k_dict[f"pass@1"]
    print(f"Validation with temp={0.2}, top_k={50} and top_p={0.9}. Gave pass@1={val_metric} and pass@ks={pass_at_k_dict}")
        
    print(f"======\n{model_name} FINAL RESULTS: ===============================")
    print(f"{Style.BRIGHT}{Fore.CYAN} Model: {model_name} {Style.RESET_ALL}")
    test_runs = results[model_name][0]
    final_result = results[model_name][1]
    if test_runs:
        for run in test_runs:
            run.print()
    if final_result:
        final_result.print()

      
# Print the final results
print("\nALL MODELS FINAL RESULTS:")
for model_name, (test_runs, final_result) in results.items():
    print(f"{Style.BRIGHT}{Fore.CYAN} Model: {model_name} {Style.RESET_ALL}")
    for test_run in test_runs:
        test_run.print()
    final_result.print()



Script is located in: /fp/homes01/u01/ec-sindrre/Thesis_project/experiments/SynCode/fox
Project is located in: /fp/homes01/u01/ec-sindrre/Thesis_project



Constrained output is set to True.
Model kwargs: {'max_length': 16000, 'max_new_tokens': 2012, 'temperature': 0.9, 'top_k': 50, 'top_p': 0.9, 'do_sample': True, 'quantize': True, 'use_cache': True}
Loading Syncode model with miodel kwargs: {'max_length': 16000, 'max_new_tokens': 2012, 'temperature': 0.9, 'top_k': 50, 'top_p': 0.9, 'do_sample': True, 'quantize': True, 'use_cache': True}
Using device_map=auto
[2025-04-02 11:13:05,481-accelerate.utils.modeling] - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]



[1m=== Sample: 1 (task_id: 15) ===
Building CODE prompt..


Token indices sequence length is longer than the specified maximum sequence length for this model (6936 > 1024). Running this sequence through the model will result in indexing errors


Using 16000 for context window

Prompt size = 6936, leaving 7052 tokens for RAG + buffer.
Final prompt size: 6936 (plus 2012 for output).
Using Syncodeing n response..  (1/5)


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
You are not running the flash-attention implementation, expect numerical differences.


SynCode output:  ```midio

    import("std", Std_k98ojb)
import("http", Http_q7o96c)

module() main { 
    func(doc: "Check whether a specified list is sorted or not.") is_sorted {
        in(x: -234, y: -34, name: "list") property(List) list_7c8ea8

        out(x: 757, y: -10, name: "output") property(Bool) output_b7b2b0

        instance(x: 50, y: 17) zip_f2a1d8 root.Std_k98ojb.Iteration.Zip {}
        instance(x: 353, y: -46) map_d3a3dd root.Std_k98ojb.Iteration.Map {}
        instance(x: 619, y: 9) for_8d0a6c root.Std_k98ojb.Std.For {}
        instance(x: 1026, y: -53) and_8eb9c9 root.Std_k98ojb.Logic.And {}
        instance(x: 1114, y: -155) if_7b4a3a root.Std_k98ojb.Std.If {}
        instance(x: 1389, y: -52) lessThan_e2a9d3 root.Std_k98ojb.Math.LessThan {}
        instance(x: 1302, y: -155) not_4c6d8b root.Std_k98ojb.Logic.Not {}
        instance(x: 1443, y: -52) expression_b7eb66 root.Std_k98ojb.Math.Expression {
            expression: "i + 1 < list.length"
        }
        f

AttributeError: 'bool' object has no attribute 'returncode'

<!-- ## Langsmith evaluate -->