<a href="https://colab.research.google.com/github/SamarthJ03/AI-Assisted-Parameter-Extraction-For-RISC-V-SPEC/blob/main/notebooks/parameter_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install -q langchain langchain-community transformers accelerate pydantic pyyaml langchain_huggingface langchain_core


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.[0m[31m
[0m

In [None]:
# from huggingface_hub import login
# login()

In [None]:
import yaml
from pathlib import Path
from typing import List

from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate


In [None]:
class Parameter(BaseModel):
    name: str = Field(description="Concise parameter name given in the specification or derived from the description")
    description: str = Field(description="Description derived strictly from the specification")
    type: str = Field(description="type of values the parameter takes : integer | boolean | enum | bitfield | range | structural")
    constraints: str = Field(description="Explicit constraints or 'unspecified'")
class ParameterList(BaseModel):
    parameters: List[Parameter]


In [None]:
parser = PydanticOutputParser(pydantic_object=ParameterList)
format_instructions = parser.get_format_instructions()


In [None]:
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"$defs": {"Parameter": {"properties": {"name": {"description": "Concise parameter name given in the specification or derived from the description", "title": "Name", "type": "string"}, "description": {"description": "Description derived strictly from the specification", "title": "Description", "type": "string"}, "type": {"description": "type of values the parameter takes : integer | boolean | enum | bitfield | range | structural", "title": "Type", "type": "string"}, "constraints": {"description": "Explicit constraints or \'unspecified\

In [None]:
models = {
    "Qwen/Qwen2.5-14B-Instruct" : ChatHuggingFace(llm = HuggingFaceEndpoint(
        repo_id="Qwen/Qwen2.5-14B-Instruct",
        task="text-generation",
        max_new_tokens=1024,
        temperature=0.0,
        seed=42
    )),
    "meta-llama/Llama-3.1-8B-Instruct": ChatHuggingFace(llm = HuggingFaceEndpoint(
        repo_id="meta-llama/Llama-3.1-8B-Instruct",
        task="text-generation",
        max_new_tokens=1024,
        temperature=0.0,
        seed=42
    )
    )
}


In [None]:
def append_yaml_entries(entries, file_path="results.yaml"):
    path = Path(file_path)
    if path.exists():
        data = yaml.safe_load(path.read_text())
    else:
        data = []

    data.extend(entries)
    path.write_text(yaml.safe_dump(data, sort_keys=False))





In [None]:
def parse_output(text):
    try:
        return parser.parse(text).dict()
    except Exception as e:
        return {
            "parse_error": True,
            "error": str(e),
            "raw_output": text
        }


In [None]:
def run_models_on_snippets(prompting_technique, prompt, snippets, models, format_instructions):
    all_results = []

    for snippet in snippets:

        current_full_prompt = prompt.format(spec_snippet=snippet.strip(), format_instructions=format_instructions)

        entry = {
            "prompting_technique": prompting_technique.strip(),
            "prompt": current_full_prompt,
            "input": { "text": snippet.strip() },
            "models": []
        }

        for model_name, llm in models.items():
            try:

                raw = llm.invoke(current_full_prompt)
                content = getattr(raw, 'content', str(raw))


                parsed_data = parse_output(content)

                entry["models"].append({
                    "model_name": model_name,
                    "output": parsed_data
                })
            except Exception as e:
                print(f"Error with {model_name}: {e}")
                entry["models"].append({
                    "model_name": model_name,
                    "output": {"error": str(e)}
                })

        all_results.append(entry)


        append_yaml_entries([entry])

    return all_results

In [None]:
template =f"""
You are a expert in RISC-V specifications.
Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words implies a parameter:
may/might/should,
optional/optionally,
implementation defined/implementation specific

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write "unspecified"

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
snippets = ["""Privileged Spec 19.3.1:
Caches organize copies of data into cache blocks, each of which represents a contiguous, naturally aligned power-of-two (or NAPOT) range of memory locations. A cache block is identified by any of the physical addresses corresponding to the underlying memory locations. The capacity and organization of a cache and the size of a cache block are both implementation-specific, and the execution environment provides software a means to discover information about the caches and cache blocks in a system. In the initial set of CMO extensions, the size of a cache block shall be uniform throughout the system."""
,
"""Privileged Spec 2.1:
"Conventional" R/W accessibility of CSRs according to address mapping
The standard RISC-V ISA sets aside a 12-bit encoding space (csr[11:0]) for up to 4,096 CSRs. By convention, the upper 4 bits of the CSR address (csr[11:8]) are used to encode the read and write accessibility of the CSRs according to privilege level as shown in Table 1. The top two bits (csr[11:10]) indicate whether the register is read/write (00,01, or 10) or read-only (11). The next two bits (csr[9:8]) encode the lowest privilege level that can access the CSR.
"""]

In [None]:
run_models_on_snippets("zero_shot",template,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'zero_shot',
  'prompt': '\nYou are a expert in RISC-V specifications.\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words implies a parameter:\nmay/might/should,\noptional/optionally,\nimplementation defined/implementation specific\n\nRules:\n- Do NOT infer unstated behavior\n- Do NOT describe the schema\n- Output ONLY data\n- If no constraint is given, write "unspecified"\n\nInstructions on how to format the output:\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is th

In [None]:
template2 =f"""
You are a expert in RISC-V specifications.
Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific
but it maybe that the above words are not present but a parameter is defined.

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write "unspecified"

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("zero_shot",template2,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'zero_shot',
  'prompt': '\nYou are a expert in RISC-V specifications.\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\noptional/optionally,\nimplementation defined/implementation specific\nbut it maybe that the above words are not present but a parameter is defined.\n\nRules:\n- Do NOT infer unstated behavior\n- Do NOT describe the schema\n- Output ONLY data\n- If no constraint is given, write "unspecified"\n\nInstructions on how to format the output:\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatt

In [None]:
template3 =f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.

Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.

There are two main types of parameters:
Fixed Parameters (Standard): These are defined by the ISA and cannot be changed.
Implementation-Specific Parameters: The ISA says these must exist, but the chip designer chooses the value.

You are a expert in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific
but it maybe that the above words are not present but a parameter is defined.

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write unspecified

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("zero_shot3",template3,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'zero_shot3',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\n\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\n\nThere are two main types of parameters:\nFixed Parameters (Standard): These are defined by the ISA and cannot be changed.\nImplementation-Specific Parameters: The ISA says these must exist, but the chip designer chooses the value.\n\nYou are a expert in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\nopti

In [None]:
template4 =f"""
RISC-V is an open standard Instruction Set Architecture (ISA) defining the instructions and architectural behavior of processors.
In this context, architectural parameters are implementation-defined choices that hardware designers make, which influence architectural behavior and must be tolerated by software.
These parameters arise where the ISA allows implementation freedom rather than mandating one behavior.
Typical cases include:
• Implementation-defined sizes or limits
• Optional features
• Implementation-dependent behavior
• WARL CSR field legal value ranges and mappings
Parameters are generally not directly controlled by software-visible architectural state, but are properties of the implementation.

You are an expert hardware designer with years of experience in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific
but it maybe that the above words are not present but a parameter is defined.

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write unspecified

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("zero_shot4",template4,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'zero_shot4',
  'prompt': '\nRISC-V is an open standard Instruction Set Architecture (ISA) defining the instructions and architectural behavior of processors.\nIn this context, architectural parameters are implementation-defined choices that hardware designers make, which influence architectural behavior and must be tolerated by software.\nThese parameters arise where the ISA allows implementation freedom rather than mandating one behavior.\nTypical cases include:\n• Implementation-defined sizes or limits\n• Optional features\n• Implementation-dependent behavior\n• WARL CSR field legal value ranges and mappings\nParameters are generally not directly controlled by software-visible architectural state, but are properties of the implementation.\n\nYou are an expert hardware designer with years of experience in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe us

In [None]:
template5 = f"""
RISC-V is an open standard Instruction Set Architecture (ISA) defining the instructions and architectural behavior of processors.
In this context, architectural parameters are implementation-defined choices that hardware designers make which influence architectural behavior and must be tolerated by software.
These parameters arise where the ISA allows implementation freedom rather than mandating one behavior.
Typical cases include:
• Implementation-defined sizes or limits
• Optional features or behaviors
• Implementation-dependent behavior
• WARL CSR field legal value ranges and illegal-to-legal mappings

Parameters are not directly controlled by software-visible architectural state, but are properties of the implementation.
You are an expert hardware designer with years of experience in RISC-V.

Task:
You will be given excerpts from the RISC-V Instruction Set Manual and must extract architectural parameters from them.

The usage of following words usually implies a parameter so put extra focus on them:
may,might,should,
optional,optionally,
implementation defined,implementation specific,either, can.

However, parameters may exist even when these words are absent.

Rules:
- Extract only implementation-defined choices affecting architectural behavior.
- Do NOT infer unstated behavior.
- Do NOT extract mechanisms provided to software (such as discovery interfaces).
- Do NOT describe the schema.
- Output ONLY data.
- If no constraint is given, write unspecified.
- Extract a parameter only if different implementations may behave differently because of it.

Examples:
- name: means to discover cache information
  description: A means provided by the execution environment to discover information about caches and cache blocks
  type: string
  constraints: unspecified
This is a negative example. It is a software mechanism, not an implementation parameter.

name: cache_capacity
description: The capacity of a cache is implementation-specific.
type: integer
constraints: unspecified
This is a positive example since the capacity is implementation-specific and decided by the hardware designer.

Instructions on how to format the output:
{{format_instructions}}

If the specification refers to external tables or sections for constraints, write constraints as unspecified.

Excerpt from RISC-V (extract parameters from the following text):
{{spec_snippet}}
"""


In [None]:
run_models_on_snippets("prompt_with_1ex",template5,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt_with_1ex',
  'prompt': '\nRISC-V is an open standard Instruction Set Architecture (ISA) defining the instructions and architectural behavior of processors.\nIn this context, architectural parameters are implementation-defined choices that hardware designers make which influence architectural behavior and must be tolerated by software.\nThese parameters arise where the ISA allows implementation freedom rather than mandating one behavior.\nTypical cases include:\n• Implementation-defined sizes or limits\n• Optional features or behaviors\n• Implementation-dependent behavior\n• WARL CSR field legal value ranges and illegal-to-legal mappings\n\nParameters are not directly controlled by software-visible architectural state, but are properties of the implementation.\nYou are an expert hardware designer with years of experience in RISC-V.\n\nTask:\nYou will be given excerpts from the RISC-V Instruction Set Manual and must extract architectural parameters from t

In [None]:
template6 = f"""
RISC-V is an open standard Instruction Set Architecture (ISA) defining the instructions and architectural behavior of processors.
In this context, architectural parameters are implementation-defined choices that hardware designers make which influence architectural behavior and must be tolerated by software.
Typical cases include:
• Implementation-defined sizes or limits
• Optional features or behaviors
• Implementation-dependent behavior
• WARL CSR field legal value ranges and illegal-to-legal mappings

Parameters are not directly controlled by software-visible architectural state, but are properties of the implementation.
You are an expert hardware designer with years of experience in RISC-V.

Task:
You will be given excerpts from the RISC-V Instruction Set Manual and must extract architectural parameters from them.

The usage of following words usually implies a parameter so pay special attention on them:
may,might,should,
optional,optionally,
implementation defined,implementation specific,either, can.

However, parameters may exist even when these words are absent.

Rules:
- Do NOT infer unstated behavior.
- Do NOT describe the schema.
- If constraint refers to external source write unspecified.

Examples:
- name: means to discover cache information
  description: A means provided by the execution environment to discover information about caches and cache blocks
  type: string
  constraints: unspecified
This is a negative example. It is a software mechanism, not an implementation parameter.

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V:
{{spec_snippet}}
"""


In [None]:
run_models_on_snippets("prompt6",template6,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt6',
  'prompt': '\nRISC-V is an open standard Instruction Set Architecture (ISA) defining the instructions and architectural behavior of processors.\nIn this context, architectural parameters are implementation-defined choices that hardware designers make which influence architectural behavior and must be tolerated by software.\nTypical cases include:\n• Implementation-defined sizes or limits\n• Optional features or behaviors\n• Implementation-dependent behavior\n• WARL CSR field legal value ranges and illegal-to-legal mappings\n\nParameters are not directly controlled by software-visible architectural state, but are properties of the implementation.\nYou are an expert hardware designer with years of experience in RISC-V.\n\nTask:\nYou will be given excerpts from the RISC-V Instruction Set Manual and must extract architectural parameters from them.\n\nThe usage of following words usually implies a parameter so pay special attention on them:\nmay,might,sh

In [None]:
template7 =f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.

You are a expert in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific
However, parameters may exist even when these words are absent.

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write unspecified
- If constraint refers to external source write unspecified.

Examples:
- name: means to discover cache information
  description: A means provided by the execution environment to discover information about caches and cache blocks
  type: string
  constraints: unspecified
This is a negative example. It is a software mechanism, not an implementation parameter.

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("prompt7",template7,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt7',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\n\nYou are a expert in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\noptional/optionally,\nimplementation defined/implementation specific\nHowever, parameters may exist even when these words are absent.\n\nRules:\n- Do NOT infer unstated behavior\n- Do NOT describe the schema\n- Output ONLY data\n- If no constr

In [None]:
template8 =f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard that we have to follow it giving us no choice it is not a parameter.
You are a expert in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write unspecified
- If constraint refers to external source write unspecified.

Examples:
- name: means to discover cache information
  description: A means provided by the execution environment to discover information about caches and cache blocks
  type: string
  constraints: unspecified
This is a negative example. It is a software mechanism, not an implementation parameter.

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("prompt8",template8,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt8',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard that we have to follow it giving us no choice it is not a parameter.\nYou are a expert in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\noptional/optionally,\nimp

In [None]:
template9 =f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard or convention that we have to follow it giving us no choice it is not a parameter.
You are a expert in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.
Also, it is not necessary that every exercpt will contain parameters.

Rules:
- Do NOT infer unstated behavior
- Do NOT describe the schema
- Output ONLY data
- If no constraint is given, write unspecified
- If constraint refers to external source write unspecified.

Examples:
- name: means to discover cache information
  description: A means provided by the execution environment to discover information about caches and cache blocks
  type: string
  constraints: unspecified
This is a negative example. It is a software mechanism, not an implementation parameter.

 name: cache_capacity
 description: The capacity of a cache, which may affect the number of cache
 blocks it can store.
 type: range
 constraints: unspecified
 name: cache_organization
 description: The organization of a cache, including the arrangement of cache
 blocks and their corresponding tags.
 type: enum
 constraints: unspecified
 These are positive examples as changes in them affect the processor behavior and they dont have any previously defined standard value.

Instructions on how to format the output:
{{format_instructions}}


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("prompt9",template9,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt9',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard or convention that we have to follow it giving us no choice it is not a parameter.\nYou are a expert in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\noptional/op

In [None]:
template10 =f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard or convention that we have to follow it giving us no choice it is not a parameter.
You are a expert in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.
Also, it is not necessary that every exercpt will contain parameters.

Instructions on how to format the output:
{{format_instructions}}

Rules:
- NEVER infer unstated behavior
- NEVER describe the schema
- Output ONLY data
- If no constraint is given, write unspecified
- If constraint refers to external source NEVER WRITE IT INSTEAD WRITE unspecified.

Reasoning Process (Think Step-by-Step):
For every potential parameter, ask:
1. Is this a fixed rule I must follow to be RISC-V Compliant? (If yes, discard).
2. Is this a field labeled WARL (Write Any values, Reads Legal values)? (If yes, it is a parameter for legal values and mapping).
3. Does the text allow for different 'BEHAVIOUR' or 'VALUES' across different chips? (If yes, extract the choice).
4. Once choice is extracted check if the spec specifies a type or constraint on its values.(REFER TO RULES AND FORMAT INSTRUCTIONS).

Examples:
- name: means to discover cache information
  description: A means provided by the execution environment to discover information about caches and cache blocks
  type: string
  constraints: unspecified
This is a negative example. It is a software mechanism, not an implementation parameter.

 name: cache_capacity
 description: The capacity of a cache, which may affect the number of cache
 blocks it can store.
 type: range
 constraints: unspecified
 This is a positive examples as changes in them affect the processor behavior and they dont have any previously defined standard value.


Excerpt from RISC-V(extract parameters from the following text):
{{spec_snippet}}

"""

In [None]:
run_models_on_snippets("prompt10",template10,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt10',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard or convention that we have to follow it giving us no choice it is not a parameter.\nYou are a expert in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\noptional/o

In [None]:
template11 = f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard or convention that we have to follow it giving us no choice it is not a parameter.
You are a expert in RISC-V.

Task:
You will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.
The usage of following words usually implies a parameter so put extra focus on them:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.
Also, it is not necessary that every excerpt will contain parameters.

Instructions on how to format the output:
{{format_instructions}}

Rules:
- NEVER describe the schema.
- If constraint refers to external source NEVER WRITE IT; INSTEAD WRITE unspecified.
- Output ONLY data.

Reasoning Process (Think Step-by-Step):
1. Is this a fixed rule or address mapping set by the ISA? Even if it is called a 'convention', if it must be followed for standard compliance, DISCARD it.
2. Is this a field labeled WARL (Write Any values, Reads Legal values)? If yes, it is ALWAYS a parameter for legal values and mapping behavior.
3. Does the text allow for different 'BEHAVIOUR' or 'VALUES' across different chips? If the hardware designer has a choice between A or B, EXTRACT it.
4. Once a choice is extracted, check if the spec specifies a type or constraint on its values.

Examples:
- Text: 'By convention, the red light is always located at the top of the traffic signal and indicates that the vehicle must stop.'
  Reasoning: This is a fixed standard ('By convention') required for the system to work. A manufacturer cannot choose to put the red light at the bottom and remain compliant.
  Result: 'parameters: []'
  (This is a negative example: showing what NOT to extract).

- Text: 'The horsepower and fuel-tank capacity of an engine and the material of the seats are all manufacturer-specific.'
  Reasoning: The phrase 'manufacturer-specific' indicates these are choices the builder makes. Different versions of the car will have different values.
  Output:
  - name: engine_horsepower
    description: The power output of the engine as determined by the manufacturer.
    type: range
    constraints: manufacturer-specific
  - name: fuel_tank_capacity
    description: The total volume of fuel the vehicle can store.
    type: range
    constraints: manufacturer-specific
  - name: seat_material
    description: The type of material used for the interior upholstery.
    type: enum
    constraints: manufacturer-specific
  This is a positive example.

Excerpt from RISC-V (extract parameters from the following text):
{{spec_snippet}}
"""

In [None]:
run_models_on_snippets("prompt13",template11,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt13',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard or convention that we have to follow it giving us no choice it is not a parameter.\nYou are a expert in RISC-V.\n\nTask:\nYou will be given excerpts from RISC-V Instruction Set Manual (RISC-V ISA) and have to extract architectural parameters from them.\nThe usage of following words usually implies a parameter so put extra focus on them:\nmay/might/should,\noptional/o

In [None]:
template12 = f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard or convention that we have to follow it giving us no choice it is not a parameter.
You are a strict, pedantic expert in RISC-V.

Task:
You will be given excerpts from the RISC-V Instruction Set Manual (RISC-V ISA). You MUST extract architectural parameters from them.
You are required to AGGRESSIVELY focus on the following trigger words, as they usually imply a parameter:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.
If an excerpt contains no parameters, return an empty list.

Instructions on how to format the output:
{{format_instructions}}

Strict Rules:
- NEVER output the schema definition.
- If a constraint refers to an external source (e.g. 'see section 4.2'), NEVER WRITE IT; instead write 'unspecified'.
- Output ONLY the requested data.
- Do not add conversational text or explanations.
- The output MUST be a JSON object with a single key named 'parameters' which contains the list of parameters.


Reasoning Process (Follow this strictly Step-by-Step):
1. FILTER: Is this a fixed rule, standard, or address mapping set by the ISA? If it MUST be followed for compliance, DISCARD it immediately.
2. CHECK WARL: Is a field labeled WARL (Write Any values, Reads Legal values)? If yes, this is ALWAYS a parameter. Extract it.
3. IDENTIFY CHOICE: Does the text allow for different 'BEHAVIOUR' or 'VALUES' across different chips? If the hardware designer has a CHOICE (e.g., between A or B), you MUST extract it.
4. VALIDATE: Once a choice is extracted, check if the spec specifies a type or constraint on its values.

Excerpt from RISC-V (extract parameters from the following text):
{{spec_snippet}}
"""

In [None]:
run_models_on_snippets("prompt12",template12,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt12',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard or convention that we have to follow it giving us no choice it is not a parameter.\nYou are a strict, pedantic expert in RISC-V.\n\nTask:\nYou will be given excerpts from the RISC-V Instruction Set Manual (RISC-V ISA). You MUST extract architectural parameters from them.\nYou are required to AGGRESSIVELY focus on the following trigger words, as they usually imply a p

In [None]:
template13 = f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard or convention that we have to follow it giving us no choice it is not a parameter.
You are a strict, pedantic expert in RISC-V.

Task:
You will be given excerpts from the RISC-V Instruction Set Manual (RISC-V ISA). You MUST extract architectural parameters from them.
You are required to AGGRESSIVELY focus on the following trigger words, as they usually imply a parameter:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.
If an excerpt contains no parameters, return an empty list.

Instructions on how to format the output:
{{format_instructions}}

Strict Rules:
- NEVER output the schema definition.
- If a constraint refers to an external source (e.g. "see section 4.2"), NEVER WRITE IT; instead write "unspecified".
- Output ONLY the requested data.
- Do not add conversational text or explanations.
- The output MUST be a JSON object with a single key named "parameters".

Reasoning Process (Follow this strictly Step-by-Step):
1. FILTER: Is this a fixed rule, standard, or address mapping set by the ISA? If it MUST be followed for compliance, DISCARD it immediately.
2. CHECK WARL: Is a field labeled WARL (Write Any values, Reads Legal values)? If yes, this is ALWAYS a parameter. Extract it.
3. IDENTIFY CHOICE: Does the text allow for different 'BEHAVIOUR' or 'VALUES' across different chips? If the hardware designer has a CHOICE (e.g., between A or B), you MUST extract it.
4. VALIDATE: Once a choice is extracted, check if the spec specifies a type or constraint on its values.

Examples:

- Text: 'The funct3 field of the ADDI instruction is always bits [14:12] and must be set to 000. Any other value is reserved.'
  Reasoning: These bits are fixed by the ISA to define the opcode. A hardware designer cannot change which bits represent funct3 or what value they hold for ADDI. There is NO choice.
  Result: 'parameters: []'

- Text: 'The misa register is a WARL register. By convention, the lower 26 bits are used to indicate the presence of standard extensions A through Z.'
  Reasoning: While it mentions WARL, the specific mapping of bits to extensions is a fixed 'convention' required for software compatibility. A designer cannot remap 'A' to bit 5 and remain standard.
  Result: 'parameters: []'

- Text: 'The MXLEN parameter can be either 32 or 64 bits. In some implementations, the upper bits [63:32] of a register may be ignored when operating in RV32 mode.'
  Reasoning: The word 'either' and 'may be ignored' indicates a CHOICE. One hardware design might support 64 bits, while another only 32.
  Output:
  - name: mxlen_width
    description: The supported bit-width of the machine architecture.
    type: enum
    constraints: 32, 64
  - name: upper_bit_behavior
    description: Behavior of bits [63:32] when in RV32 mode.
    type: string
    constraints: may be ignored

- Text: 'The number of PMP (Physical Memory Protection) entries is implementation-specific, but must be between 0 and 64. The pmpcfg0 register is a WARL field.'
  Reasoning: 'Implementation-specific' and 'WARL' both signal hardware choices. The designer chooses the number of entries and the legal values for the config register.
  Output:
  - name: pmp_entries_count
    description: Total number of implemented PMP entries.
    type: integer
    constraints: 0-64
  - name: pmpcfg0_legal_values
    description: The set of supported/legal values for the WARL pmpcfg0 register.
    type: set
    constraints: implementation-defined

Excerpt from RISC-V (extract parameters from the following text):
{{spec_snippet}}
"""

In [None]:
run_models_on_snippets("prompt13",template13,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt13',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard or convention that we have to follow it giving us no choice it is not a parameter.\nYou are a strict, pedantic expert in RISC-V.\n\nTask:\nYou will be given excerpts from the RISC-V Instruction Set Manual (RISC-V ISA). You MUST extract architectural parameters from them.\nYou are required to AGGRESSIVELY focus on the following trigger words, as they usually imply a p

In [None]:
template14 = f"""
RISC-V is an Open Standard Instruction Set Architecture (ISA).
To put it simply, an ISA is the language that a computer's hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.
Architectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.
In the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.
If it is a standard or convention that we have to follow it giving us no choice it is not a parameter.
You are a strict, pedantic expert in RISC-V.

Task:
You will be given excerpts from the RISC-V Instruction Set Manual (RISC-V ISA). You MUST extract architectural parameters from them.
You are required to AGGRESSIVELY focus on the following trigger words, as they usually imply a parameter:
may/might/should,
optional/optionally,
implementation defined/implementation specific,
can/either
However, parameters may exist even when these words are absent.
If an excerpt contains no parameters, return an empty list.

Instructions on how to format the output:
{{format_instructions}}

Strict Rules:
- NEVER output the schema definition.
- If a constraint refers to an external source (e.g. "see section 4.2"), NEVER WRITE IT; instead write "unspecified".
- Output ONLY the requested data.
- Do not add conversational text or explanations.
- The output MUST be a JSON object with a single key named "parameters".

Reasoning Process (Follow this strictly Step-by-Step):
1. FILTER: Is this a fixed rule, standard, or address mapping set by the ISA? If it MUST be followed for compliance, DISCARD it immediately.
2. CHECK WARL: Is a field labeled WARL (Write Any values, Reads Legal values)? If yes, this is ALWAYS a parameter. Extract it.
3. IDENTIFY CHOICE: Does the text allow for different 'BEHAVIOUR' or 'VALUES' across different chips? If the hardware designer has a CHOICE (e.g., between A or B), you MUST extract it.
4. VALIDATE: Once a choice is extracted, check if the spec specifies a type or constraint on its values.

Examples:

- Text: 'The funct3 field of the ADDI instruction is always bits [14:12] and must be set to 000. Any other value is reserved.'
  Reasoning: These bits are fixed by the ISA to define the opcode. A hardware designer cannot change which bits represent funct3 or what value they hold for ADDI. There is NO choice.
  Result: 'parameters: []'

- Text: 'The MXLEN parameter can be either 32 or 64 bits. In some implementations, the upper bits [63:32] of a register may be ignored when operating in RV32 mode.'
  Reasoning: The word 'either' and 'may be ignored' indicates a CHOICE. One hardware design might support 64 bits, while another only 32.
  Output:
  - name: mxlen_width
    description: The supported bit-width of the machine architecture.
    type: enum
    constraints: 32, 64
  - name: upper_bit_behavior
    description: Behavior of bits [63:32] when in RV32 mode.
    type: string
    constraints: may be ignored

Excerpt from RISC-V (extract parameters from the following text):
{{spec_snippet}}
"""

In [None]:
run_models_on_snippets("prompt14",template14,snippets,models,format_instructions)

/tmp/ipython-input-3546129513.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  return parser.parse(text).dict()


[{'prompting_technique': 'prompt14',
  'prompt': '\nRISC-V is an Open Standard Instruction Set Architecture (ISA).\nTo put it simply, an ISA is the language that a computer\'s hardware speaks. It defines the set of instructions (like add, subtract, load, or store) that a processor can execute.\nArchitectural Parameters are the variables of a hardware design. They are the specific values or behaviors that the hardware designer must decide on and the software must account for.\nIn the most basic form, a parameter is a choice between say A or B as value of a field which changes the behaviour of processor.\nIf it is a standard or convention that we have to follow it giving us no choice it is not a parameter.\nYou are a strict, pedantic expert in RISC-V.\n\nTask:\nYou will be given excerpts from the RISC-V Instruction Set Manual (RISC-V ISA). You MUST extract architectural parameters from them.\nYou are required to AGGRESSIVELY focus on the following trigger words, as they usually imply a p