# Install dependencies

In [1]:


# # For Notebook:
# %pip install ollama
# %pip install openai
# %pip install anthropic
# %pip install transformers
# %pip install tiktoken
# %pip install python-dotenv
# %pip install colorama


# # For packages: 
# %pip install scikit-learn
# %pip install matplotlib
# %pip install numpy
# %pip install pandas
# %pip install iterative-stratification

# # For WebUI testing:
# %pip install open-webui



# Prepare data

In [2]:
import sys
sys.path.append('../')  # Add the path to the my_packages module
from my_packages.data_processing.split_dataset import split_on_shots, read_dataset_to_json
from my_packages.data_processing.get_labels_data import used_libraries_from_dataset, used_libraries_to_string
from my_packages.analysis.analyze_datasets import analyze_library_distribution, analyze_instance_distribution, analyze_visual_node_types_distribution

main_dataset_folder = '../data/mbpp_transformed_code_examples/sanitized-MBPP-midio.json'
DATA_DIR = '../data/mbpp_transformed_code_examples/only_files'

dataset = read_dataset_to_json(main_dataset_folder)
    
num_shot = 10 # Few-shot examples
eval_size_percentage = 0.5

train_data, val_data, test_data = split_on_shots(num_shot, eval_size_percentage, dataset, seed = 64, write_to_file=True)

# Extract all unique nodes (library_functions) across datasets
used_libraries_json = used_libraries_from_dataset(train_data)
explained_used_libraries = used_libraries_to_string(used_libraries_json)

#Bar chart of distribuation
# analyze_library_distribution(train_data, val_data, test_data)
# analyze_instance_distribution(train_data, val_data, test_data)
# analyze_visual_node_types_distribution(train_data, val_data, test_data)

15
7
8
Library functions included in the dataset: 32


# Prepare prompts

In [3]:
from my_packages.prompting.few_shot import split_and_format

train_prompts, train_responses = split_and_format(train_data,"CODE_TEMPLATE",'../data/mbpp_transformed_code_examples/only_files/')
val_prompts, val_responses = split_and_format(val_data,"CODE_TEMPLATE", '../data/mbpp_transformed_code_examples/only_files/')
test_prompts, test_responses = split_and_format(test_data ,"CODE_TEMPLATE", '../data/mbpp_transformed_code_examples/only_files/')

# Init model provider

In [4]:
import os
import ollama
from openai import OpenAI
from anthropic import Anthropic
from dotenv import load_dotenv
from my_packages.prompting.few_shot import create_few_shot_messages
from my_packages.utils.tokens_utils import write_models_code_tokens_to_file, models_not_in_file
from my_packages.utils.server_utils import server_diagnostics, is_remote_server_reachable

model_provider = 'ollama'

match model_provider:
    case 'ollama':
        host = 'http://localhost:11434'
        if is_remote_server_reachable(url = host + "/api/tags"):
            print("Server is reachable.")
        else:
            server_diagnostics()
            print("Ollama server is not reachable. Batch job might have finished. Try running bash script again.")

        embed_client = OpenAI(
            base_url=host+'/v1/',
            api_key='ollama',
        )
        client = ollama.Client(
            host=host,
        )
        models = [
            #llama models:
            # "codestral:22b-v0.1-q8_0",
            "llama3.1:8b-instruct-fp16",
            # "llama3.3:70b-instruct-q8_0,
            # "llama3.3:70b-instruct-fp16,

            # "qwq:32b-preview-fp16,
            # "mistral-large:123b-instruct-2407-q4_K_M,
        ]
        few_shot_messages = create_few_shot_messages(explained_used_libraries, train_prompts, train_responses, "CODE_GENERATOR_TEMPLATE", "system")
        models_not_tokenized = models_not_in_file(models, 'nodes_max_tokens.json')
        write_models_code_tokens_to_file(embed_client, models_not_tokenized, DATA_DIR, 'code_max_tokens.json')
        
    case 'openai':
        load_dotenv("../.env")
        openai_token = os.getenv('OPENAI_API_KEY')
        few_shot_messages = create_few_shot_messages(explained_used_libraries, train_prompts, train_responses, "CODE_GENERATOR_TEMPLATE", "developer")
        if not openai_token:
            raise Exception("OpenAI API key not found in .env file")
        client = OpenAI(api_key=openai_token)
        models = [
            "gpt-4o",
            # "o1-preview", 
        ]
        models_not_tokenized = models_not_in_file(models, 'nodes_max_tokens.json')
        write_models_code_tokens_to_file(client, models_not_tokenized, DATA_DIR, 'code_max_tokens.json')

    case 'anthropic':
        load_dotenv("../.env")
        anthropic_token = os.getenv('ANTHROPIC_API_KEY')
        if not anthropic_token:
            raise Exception("Anthropic API key not found in .env file")
        client = Anthropic(api_key=anthropic_token)
        models = [
            "claude-3-5-sonnet-latest"
        ]
        few_shot_messages = create_few_shot_messages(explained_used_libraries, train_prompts, train_responses, "CODE_GENERATOR_TEMPLATE", "system")
        models_not_tokenized = models_not_in_file(models, 'nodes_max_tokens.json')
        write_models_code_tokens_to_file(client, models_not_tokenized, DATA_DIR, 'code_max_tokens.json')
        
    case _:
        raise Exception("Model provider not supported")




Server is reachable.


## Evalaution

In [5]:
from my_packages.evaluation.code_success import run_code_evaluation, print_success_rate_result
from my_packages.utils.tokens_utils import get_model_code_tokens_from_file
from colorama import Fore, Back, Style

results = {}
for model_name in models:
    print(f"Model: {model_name}")
    model = get_model_code_tokens_from_file(model_name, 'code_max_tokens.json')
    
    result = run_code_evaluation(
        client,
        few_shot_messages,
        model,
        val_prompts,
        val_responses,
        test_prompts,
        test_responses,
        temperatures=[0.2, 0.5, 1.0],
        top_ps=[0.2, 0.5, 0.8],
        seeds=[3, 75, 346],
        debug=True,
    )
    results[model["name"]] = result
      
# Print the final results
print("\nFinal Results:")
for model_name, runs in results.items():
    print(f"{Style.BRIGHT}{Fore.CYAN} Model: {model_name} {Style.RESET_ALL}")
    for run in runs:
        print_success_rate_result(run)
        


Model: llama3.1:8b-instruct-fp16
VALIDATION Phase
Validating with temperature: 0.2, and top_p: 0.2
Generating response for sample 0..



[1mSample: 0
[36m[1m User prompt: [0m
You are an expert Midio code programmer. Genereate the code for the following task:
Create a flow to rotate a given list by specified number of items to the right direction.

You must return the code in the following form:

```midio

    //Midio Code

```

[33m[1m Assistant response:[0m
```midio

    import("std", Std_k98ojb)
import("http", Http_q7o96c)

module() main { 
    func(doc: "Rotates a given list by specified number of items to the right direction.") rotate_list {
        in(x: -143.5, y: 123.75, name: "list") property(List) list_3a4b6f
        in(x: -144.5, y: 193.25, name: "n") property(Number) n_8d7e9c

        out(x: 1041.5, y: 123.75, name: "output") property(List) output_4a2b3f

        instance(x: -143.5, y: 193.25) slice_6a0e35 root.Std_k98ojb.List.Slice {}
        instance(x: 1041.5, y: 123.75) concat_9c8d7a root.Std_k98ojb.List.Concat {}
        instance(x: -143.5, y: 163.25) length_3f4e6a root.Std_k98ojb.List.Length {}

KeyboardInterrupt: 