### Summary + Code

In [None]:
import json

def get_qa_data(path):
    with open(path, 'r') as json_file:
        data = json.load(json_file)
        json_file.close()
    return data


qa_data = get_qa_data('path_to_process')

In [None]:
import requests

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision"
HEADERS = {"Authorization": f"Bearer "}

def query_huggingface(prompt):
    payload = {"inputs": prompt}
    response = requests.post(API_URL, headers=HEADERS, json=payload)
    return response.json()


QUERY_PROMPT = """### Task
Generate a question that could realistically have the given code snippet as a response.
To be clear, we want to reverse-engineer a question from a given response (code snippet).

### Code Snippet
{code_snippet}

### Warnings
Do not make your question too specific. Make your question general yet suitable for the resulting code snippet.

### Potential Question
"""

results = []

for qa in qa_data:
    
    response = query_huggingface(QUERY_PROMPT.format(code_snippet = qa['code']))
    response.update(qa)
    results.append(response)

with open("test_dataset.json", 'w') as json_file:
    json.dump(results, json_file, ensure_ascii = False, indent = 4)
    json_file.close()
    


### Query - Testing

In [None]:
from datetime import datetime


data = None

with open("test_dataset.json", 'w') as json_file:
    data = json.load(json_file)
    
texts = [dictionary.pop("summary") for dictionary in data]
metadatas = data
ids = [str(datetime.now()) + "-" + i for i in range(len(texts))]

In [None]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions

chroma_client = chromadb.Client()

# With huggingface embeddings, in case we ever transition to open source implementation
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
    api_key="",
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

collection = chroma_client.get_or_create_collection(
    name="test",
    embedding_function = huggingface_ef,
    metadata={
        "hnsw:space": "cosine"
    }
)

collection.add(
    documents= texts,
    metadatas= metadatas,
    ids= ids
)