In [1]:
import requests
import matplotlib.pyplot as plt
from typing import List
from model import (
    SampleMethod, TestMethod, DistanceFunction, GenerationParameters, 
    TestParameters, PredictionTestParameters, DistanceTestParameters, 
    TestQuery, TestResult, TestResponse
)
import numpy as np
from itertools import product
from tqdm import tqdm
import pickle
import json
import os

In [2]:
SEVER_URL = "http://delos.eaalab.hpi.uni-potsdam.de"
SERVER_PORT = 9002

In [3]:
def get_generative_models() -> List[str]:
    url = f"{SEVER_URL}:{SERVER_PORT}/models/generative"
    response = requests.get(url)
    return response.json()

def get_embedding_models() -> List[str]:
    url = f"{SEVER_URL}:{SERVER_PORT}/models/embedding"
    response = requests.get(url)
    return response.json()

def unload_model(mid: str):
    url = f"{SEVER_URL}:{SERVER_PORT}/unload/{mid}"
    response = requests.delete(url)
    return response.json()

def run_test(
    mid: str,
    codes: List[str],
    docstrings: List[str],
    test_method: TestMethod,
    test_parameters: TestParameters,
) -> TestResponse:
    test_query = TestQuery(
        mid=mid,
        codes=codes,
        docstrings=docstrings,
        test_method=test_method,
        test_parameters=test_parameters,
    )
    response = requests.post(
        f"{SEVER_URL}:{SERVER_PORT}/test",
        json=test_query.model_dump(),
    )
    response.raise_for_status()
    return TestResponse(**response.json())

In [4]:
# Defaults

sample_method = SampleMethod.TOP_P
generation_parameters = GenerationParameters(
    max_new_tokens=256,
    sample_method=sample_method,
    top_p=0.85,
    temperature=0.5,
)

In [5]:
embedding_model = get_embedding_models()[0]
generative_models = get_generative_models()

print("Embedding model:", embedding_model)
print("Generative models:", generative_models)

Embedding model: microsoft/codebert-base
Generative models: ['google/codegemma-2b']


In [6]:
distance_test_configs = {
    f"{generative_model}_{distance_function}_{normalize}_{sample_many}_{test_threshold}": {
        "mid": generative_model,
        "test_method": TestMethod.DISTANCE,
        "test_parameters": DistanceTestParameters(
            mid=embedding_model,
            distance_function=distance_function,
            normalize=normalize,
            sample_many=sample_many,
            test_threshold=test_threshold,
            generation_parameters=generation_parameters
        )
    }
    for generative_model, distance_function, normalize, sample_many, test_threshold in product(
        generative_models,
        [DistanceFunction.COSINE, DistanceFunction.EUCLIDEAN],
        [True, False],
        [True, False],
        [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
    )
}

prediction_test_configs = {
    f"{generative_model}_{weight_decay}_{frequency_importance}_{test_threshold}": {
        "mid": generative_model,
        "test_method": TestMethod.PREDICTION,
        "test_parameters": PredictionTestParameters(
            weight_decay=weight_decay,
            frequency_importance=frequency_importance,
            test_threshold=test_threshold,
            generation_parameters=generation_parameters
        )
    }
    for generative_model, weight_decay, frequency_importance, test_threshold in product(
        generative_models,
        [0.0, 0.25, 0.5, 0.75],
        [0.0, 0.25, 0.5, 0.75],
        [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
    )
}

print("Distance test configurations:", len(distance_test_configs))
print("Prediction test configurations:", len(prediction_test_configs))

Distance test configurations: 48
Prediction test configurations: 96


In [14]:
with open("test_data.json", 'r') as f:
    test_data = json.load(f)

BATCH_SIZE = 16

codes = [e["c"] for e in test_data]
docstrings = [e["d"] for e in test_data]
labels = [e["l"] for e in test_data]

code_batches = [codes[i:i+BATCH_SIZE] for i in range(0, len(codes), BATCH_SIZE)]
docstring_batches = [docstrings[i:i+BATCH_SIZE] for i in range(0, len(docstrings), BATCH_SIZE)]

assert len(codes) == len(docstrings) == len(labels)
print(len(codes))

assert len(code_batches) == len(docstring_batches)
print(len(code_batches))

4144
259


In [15]:
if os.path.exists("results.pkl"):
    with open("results.pkl", 'rb') as f:
        results = pickle.load(f)
else:
    results = {}

    for test_config in (distance_test_configs, prediction_test_configs):
        config_key = list(test_config.values())[0]["test_method"].value
        results[config_key] = {}
        for key, config in tqdm(test_config.items(), total=len(test_config)):
            batch_results = []
            for i, (codes, docstrings) in enumerate(zip(code_batches, docstring_batches)):
                result = run_test(
                    mid=config["mid"],
                    codes=codes,
                    docstrings=docstrings,
                    test_method=config["test_method"],
                    test_parameters=config["test_parameters"],
                )
                batch_results.extend(result.results)
            results[config_key][key] = (batch_results, labels)

    with open("results.pkl", 'wb') as f:
        pickle.dump(results, f)

print(len(results[TestMethod.DISTANCE.value]))
print(len(results[TestMethod.PREDICTION.value]))


  0%|          | 0/48 [00:19<?, ?it/s]


HTTPError: 507 Server Error: Insufficient Storage for url: http://delos.eaalab.hpi.uni-potsdam.de:9002/test