In [None]:
# Format dataset into a concise prompt
function format_test_prompt(data::DataFrame, test_data::DataFrame, algorithms)
    train_rows = eachrow(data)
    test_data_no_alg = select(test_data, Not(:algorithm))

    excluded = Set(["algorithm", "pattern", "time"])
    features = [Symbol(col) for col in names(data) if !(col in excluded)]

    # Guideline 1 of the prompt, provide the LLM with its role
    prompt = """
    You are an expert in matrix factorization. Below is a dataset with matrix features (e.g., size, rank, sparsity, etc.)
    and the best algorithm for each matrix.

    Training Dataset Examples:
    """

    # Guideline 2 of the prompt, include SmartSolve performance databse
    # Add few-shot examples
    for row in train_rows
        nt = NamedTuple(row)
        filtered = (; (k => v for (k, v) in nt if k in features)...)
        prompt *= "\nMatrix: $(JSON3.write(filtered)) → Algorithm: $(row.algorithm)"
    end
    # Add test matrix inputs
    # Guideline 3 of the prompt, ask the LLM to analyze the database according to a set of limitations
    prompt *= """

    Now, predict the best algorithm for each of the following $(nrow(test_data)) new matrices.

    ⚠️ Only use one of these algorithms: $(join(algorithms, ", "))
    ⚠️ Do NOT use any other algorithm names (e.g., no variants like "sklumt", "arpack", "clapack")

    Test Matrices:
    """

    for (i, row) in enumerate(eachrow(test_data_no_alg))
        prompt *= "\nMatrix $i: $(JSON3.write(NamedTuple(row)))"
    end

    # Final instruction block
    prompt *= """

    Output only a comma-separated list of the predicted algorithm names in order, one per test matrix.
    Do NOT include matrix numbers, explanations, or extra text.

    ⚠️ Repeat: Output must be a comma-separated list of exactly $(nrow(test_data)) predictions.
    ⚠️ Only use one of: $(join(["\"$a\"" for a in algorithms], ", "))
    ⚠️ No text, no variants, no labels. Just: klu, dgetrf, umfpack, ...
    """

    return prompt
end

format_test_prompt (generic function with 1 method)

In [None]:
# Format heuristic prompt
function format_heuristic_prompt(data::DataFrame, algorithms)
    excluded = Set(["algorithm", "pattern", "time"])
    features = [Symbol(col) for col in names(data) if !(col in excluded)]

    full_data = JSON3.write(data)

    feature_list = join(features, ", ")
    feature_gets = join(["    $(s) = get(kwargs, :$(s), missing)" for s in features], "\n")

    # Guideline 1 of the prompt, provide the LLM with its role

    prompt = """
    You are an expert in matrix factorization."""

    # Guideline 2 of the prompt, include SmartSolve performance databse

    prompt *= """Below is a dataset with matrix features ($feature_list)
    and the best algorithm $algorithms for each matrix.

    Dataset: $full_data
    """

    # Guideline 3 of the prompt, ask the LLM to analyze the database and generate the heuristic according to a set of limitations

    prompt *= """
    Using the dataset provided, please create a heuristic that I can use to find the optimal matrix factorization algorithm
    for any combination of input matrix features. The heuristic must cover all algorithms from the dataset: $(join(algorithms, ", ")).
    Ensure that each algorithm has a well-defined region in the feature space. Do not omit any algorithms or suggest only one algorithm.

    Please follow these steps:
    1. Analyze the dataset to understand how each algorithm performs under different feature conditions.
    2. Identify clear boundaries where one algorithm consistently outperforms others.
    3. Create a heuristic using numerical rules that covers all algorithms.
    4. Provide clear, readable guidelines for selecting the optimal algorithm for any given matrix feature set.
    5. Ensure the heuristic covers all algorithms in the dataset, without suggesting any placeholder or alternative algorithms.

    ### Important Instructions for the Julia Code:

    - You must implement the heuristic as a Julia function using this exact signature:
        ```julia
        function choose_algorithm(; kwargs...)::String
        ```
    - Inside the function, access each matrix feature using the `get` function:
        ```julia
    $feature_gets
        ```
    - Do **not** use named parameters in the function signature (e.g., `; sparsity=...` is not allowed).
    - Use `if/elseif/else` logic to express the heuristic clearly.

    - You must return **only one** of the following algorithms, as a string literal: $algorithms
    - Do **not** modify or extend algorithm names (e.g., avoid returning `"umfpack_triangular"` instead of `"umfpack"`).
    - Each `return` statement must use **only one** of the allowed strings exactly as written above.

    - Note: Some feature names may suggest Boolean values (e.g., `issymmetric`, `ishermitian`, `isreal`), but in the dataset, they are represented as numeric indicators (e.g., 0 or 1). Do **not** treat them as Booleans — instead, compare them numerically using expressions like `issymmetric == 1`.

    - Do **not** use logic like `if issymmetric` — this is incorrect. Always use explicit comparisons like `if issymmetric == 1`.

    Your output should include:
    1. A plain-language explanation of the heuristic rules.
    2. The complete Julia code using the structure above.
    """

    return prompt
end

format_heuristic_prompt (generic function with 1 method)