In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path   
import glob
import os

In [None]:
{
  "label": "Foliage projective cover in the lower canopy strata",
  "comment": "The proportion of the ground area covered by foliage (or photosynthetic tissue) held in a vertical plane. Foliage Projective Cover is usually expressed as the percentage of ground covered by foliage and can be separated according to vegetation strata.",
  "hasProperty": "projective cover",
  "hasMatrix": "canopy strata",
  "hasObjectOfInterest": "foliage",
  "hasConstraint": [
    {
      "label": "lower",
      "on": "hasMatrix: canopy strata"
    }
  ],
  "hasStatisticalModifier": "",
  "hasContextObject": ""
}


{
  "label": "Foliage projective cover in the lower canopy strata",
  "comment": "The proportion of the ground area covered by foliage (or photosynthetic tissue) held in a vertical plane. Foliage Projective Cover is usually expressed as the percentage of ground covered by foliage and can be separated according to vegetation strata.",
  "hasProperty": "projective cover",
  "hasMatrix": "lower canopy strata",
  "hasObjectOfInterest": "foliage",
  "hasConstraint": [
    {
      "label": "lower canopy strata",
      "on": "hasObjectOfInterest: foliage"
    }
  ],
  "hasStatisticalModifier": "",
  "hasContextObject": ""
}


# T=0.5 

{
  "label": "Foliage projective cover in the lower canopy strata",
  "comment": "The proportion of the ground area covered by foliage (or photosynthetic tissue) held in a vertical plane. Foliage Projective Cover is usually expressed as the percentage of ground covered by foliage and can be separated according to vegetation strata.",
  "hasProperty": "projective cover",
  "hasObjectOfInterest": "foliage",
  "hasMatrix": "lower canopy strata",
  "hasStatisticalModifier": "",
  "hasContextObject": "",
  "hasConstraint": []
}


{
  "label": "Foliage projective cover in the lower canopy strata",
  "comment": "The proportion of the ground area covered by foliage (or photosynthetic tissue) held in a vertical plane. Foliage Projective Cover is usually expressed as the percentage of ground covered by foliage and can be separated according to vegetation strata.",
  "hasProperty": "projective cover",
  "hasObjectOfInterest": "foliage",
  "hasMatrix": "lower canopy strata",
  "hasConstraint": [
    {
      "label": "lower canopy strata",
      "on": "hasMatrix"
    }
  ],
  "hasStatisticalModifier": "",
  "hasContextObject": ""
}

In [3]:
# 1) Point this at the directory containing your timestamped files
data_dir = "/Users/rastegar-a/Documents/GitHub/i-adopt-llm-based-service/benchmarking_outputs/Matrix with T=0"

# 2) Grab all files matching the pattern
pattern = os.path.join(data_dir, "iadopt_Fexact_matrix_*.xlsx")
files = glob.glob(pattern)

# 3) Read and concatenate
dfs = []
for fn in files:
    df = pd.read_excel(fn, sheet_name="best_pairs")
    dfs.append(df)
all_data = pd.concat(dfs, ignore_index=True)

# 4) Compute the mean F_exact per Prompt × Model
mean_data = (
    all_data
    .groupby(['Prompt', 'Model'], as_index=False)['F_exact']
    .mean()
)

# 5) Sort descending, best combinations first
mean_data = mean_data.sort_values(by='F_exact', ascending=False).reset_index(drop=True)

# Display
print("Flat table, best first:\n")
display(mean_data)


Flat table, best first:



Unnamed: 0,Prompt,Model,F_exact
0,3-shot,anthropic/claude-4-sonnet-20250522,0.58704
1,5-shot,anthropic/claude-4-sonnet-20250522,0.57936
2,3-shot,qwen/qwen3-32b,0.480768
3,5-shot,qwen/qwen3-32b,0.476248
4,3-shot,deepseek/deepseek-r1-0528-qwen3-8b,0.465272
5,5-shot,openai/gpt-4.1,0.464376
6,5-shot,deepseek/deepseek-r1-0528-qwen3-8b,0.46016
7,5-shot,meta-llama/llama-4-maverick-17b-128e-instruct,0.457184
8,3-shot,openai/gpt-4.1,0.453736
9,5-shot,microsoft/phi-4,0.45174


In [4]:
# 1) Point this at the directory containing your timestamped files
data_dir = "/Users/rastegar-a/Documents/GitHub/i-adopt-llm-based-service/benchmarking_outputs/Matrix with T=0.5"

# 2) Grab all files matching the pattern
pattern = os.path.join(data_dir, "iadopt_Fexact_matrix_*.xlsx")
files = glob.glob(pattern)

# 3) Read and concatenate
dfs = []
for fn in files:
    df = pd.read_excel(fn, sheet_name="best_pairs")
    dfs.append(df)
all_data = pd.concat(dfs, ignore_index=True)

# 4) Compute the mean F_exact per Prompt × Model
mean_data = (
    all_data
    .groupby(['Prompt', 'Model'], as_index=False)['F_exact']
    .mean()
)

# 5) Sort descending, best combinations first
mean_data = mean_data.sort_values(by='F_exact', ascending=False).reset_index(drop=True)

# Display
print("Flat table, best first:\n")
display(mean_data)

Flat table, best first:



Unnamed: 0,Prompt,Model,F_exact
0,3-shot,qwen/qwen3-32b,0.467208
1,5-shot,meta-llama/llama-4-maverick-17b-128e-instruct,0.464992
2,5-shot,deepseek/deepseek-r1-0528-qwen3-8b,0.463608
3,5-shot,openai/gpt-4.1,0.46044
4,5-shot,qwen/qwen3-32b,0.45364
5,3-shot,openai/gpt-4.1,0.436456
6,3-shot,deepseek/deepseek-r1-0528-qwen3-8b,0.433792
7,3-shot,qwen/qwen3-14b,0.430592
8,3-shot,meta-llama/llama-4-maverick-17b-128e-instruct,0.42804
9,1-shot,deepseek/deepseek-r1-0528-qwen3-8b,0.41464


### how many tokens are the prompts?

In [5]:
from transformers import GPT2TokenizerFast

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

In [10]:
prompt_text_5_shot = """PROMPT | shot=5 | Peak ground acceleration
You are an ontology engineer.
Your task is to output **one** JSON object that satisfies the
JSON-Schema provided below.

▸ Copy *label* and *comment* verbatim from the user section.
▸ Do **NOT** introduce keys that are absent from the schema.
▸ Every value must respect the declared JSON type
  (e.g. hasProperty is a string, hasConstraint is an array, …).
▸ Reply with the JSON object only — no markdown fences, no narration.

### JSON-Schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://example.org/schemas/iadopt-variable.json",
  "title": "I-ADOPT decomposed variable",
  "description": "Single variable expressed in the compact JSON layout used for LLM-driven decomposition.",
  "type": "object",

  "required": ["label", "comment", "hasProperty", "hasObjectOfInterest"],

  "properties": {
    "label": {
      "type": "string",
      "description": "Variable name."
    },
    "comment": {
      "type": "string",
      "description": "Definition / description."
    },
    "hasProperty": {
      "type": "string",
      "description": "Property being observed (e.g. 'distance', 'temperature')."
    },
    "hasStatisticalModifier": {
      "type": "string",
      "description": "Statistical qualifier (e.g. 'maximum', 'latest')."
    },
    "hasMatrix": {
      "$ref": "#/$defs/entityOrSystem"
    },
    "hasObjectOfInterest": {
      "$ref": "#/$defs/entityOrSystem"
    },
    "hasContextObject": {
      "$ref": "#/$defs/entityOrSystem"
    },
    "hasConstraint": {
      "type": "array",
      "description": "List of constraints, each with a label and the target it applies to.",
      "items": {
        "type": "object",
        "required": ["label", "on"],
        "properties": {
          "label": {
            "type": "string",
            "description": "Constraint label (e.g. 'nearest neighbour', 'daily')."
          },
          "on": {
            "type": "string",
            "description": "What the constraint applies to (e.g. 'hasTarget: habitat patch')."
          }
        },
        "additionalProperties": false
      },
      "minItems": 1
    }
  },

  "additionalProperties": false,

  "$defs": {
    "entityOrSystem": {
      "description": "Either a simple label or a structured system.",
      "oneOf": [
        { "type": "string" },
        {
          "$comment": "Asymmetric system",
          "type": "object",
          "required": [
            "AsymmetricSystem",
            "hasSource",
            "hasTarget",
            "hasNumerator",
            "hasDenominator"
          ],
          "properties": {
            "AsymmetricSystem": { "type": "string" },
            "hasSource":        { "type": "string" },
            "hasTarget":        { "type": "string" }
          },
          "additionalProperties": false
        },
        {
          "$comment": "Symmetric system",
          "type": "object",
          "required": ["SymmetricSystem", "hasPart"],
          "properties": {
            "SymmetricSystem": { "type": "string" },
            "hasPart": {
              "type": "array",
              "items": { "type": "string" },
              "minItems": 1
            }
          },
          "additionalProperties": false
        }
      ]
    }
  }
}

### Examples (valid against the same schema)
{
  "label": "Electron density in the solar wind",
  "comment": "Density (particle per cm3) of electrons measured in the Solar Wind.",
  "hasProperty": "volumetric number density",
  "hasMatrix": "solar wind",
  "hasObjectOfInterest": "electron"
}

{
  "label": "Strike of bedding",
  "comment": "Geographic azimuth (relative to true north) at a point observation location, of a horizontal line contained in a sedimentary rock bedding surface. The bedding surface must not be horizontal; the azimuth is reported such that the dip direction of the inclined bedding is to the right when facing in the azimuth direction.",
  "hasProperty": "azimuth",
  "hasMatrix": "sedimentary bedrock",
  "hasObjectOfInterest": {
    "SymmetricSystem": "line of intersection",
    "hasPart": [
      "bedding plane",
      "horizontal plane"
    ]
  }
}

{
  "label": "Distance to nearest neighbour habitat patch",
  "comment": "This variable is part of the EBV Connectivity of terrestrial ecosystem habitat types and helps to measure the degree of connection of EUNIS habitats within a landscape, in terms of their spatial distribution. https://github.com/EuropaBON/EBV-Descriptions/wiki/Terrestrial-Connectivity-of-terrestrial-ecosystem-habitat-types",
  "hasProperty": "distance",
  "hasObjectOfInterest": {
    "AsymmetricSystem": "habitat patch system",
    "hasSource": "habitat patch",
    "hasTarget": "habitat patch"
  },
  "hasConstraint": [
    {
      "label": "nearest neighbour",
      "on": "hasTarget: habitat patch"
    }
  ]
}

{
  "label": "Weight specific-ingestion Carbon rate at 15 °C",
  "comment": "The amount of carbon consumed by an organism at non-limiting concentration of food relative to the individual dry weight measured at 15°C. It is expressed as μg C mg DW−1 h−1",
  "hasProperty": "mass flow rate",
  "hasMatrix": "organism",
  "hasObjectOfInterest": "Carbon",
  "hasConstraint": [
    {
      "label": "weight-specific",
      "on": "mass flow rate"
    },
    {
      "label": "dry",
      "on": "organism"
    },
    {
      "label": "at non-limiting conditions",
      "on": "Carbon"
    },
    {
      "label": "at 15°C temperature",
      "on": "mass flow rate"
    },
    {
      "label": "due to ingestion",
      "on": "mass flow rate"
    }
  ]
}

{
  "label": "Mass flux of carbon into soil from vegetation due to senescence",
  "comment": "In accordance with common usage in geophysical disciplines, \"flux\" implies per unit area, called \"flux density\" in physics. \"Vegetation\" means any living plants e.g. trees, shrubs, grass. The specification of a physical process by the phrase \"due_to_\" process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. The term \"senescence\" means loss of living biomass excluding plant death, e.g. leaf drop and other seasonal effects. The term refers to changes in the whole plant and is not confined only to leaf drop.",
  "hasProperty": "mass flux",
  "hasMatrix": {
    "AsymmetricSystem": "from vegetation to soil",
    "hasSource": "vegetation",
    "hasTarget": "soil"
  },
  "hasObjectOfInterest": "carbon",
  "hasConstraint": [
    {
      "label": "due to senescence",
      "on": "mass flux"
    }
  ]
}

### Variable to decompose
label: Peak ground acceleration
comment: Peak acceleration measured on the earth surface when facing seismic events, like earthquakes.

### Expected output
*(only the JSON object)*"""

In [13]:
count_tokens = tokenizer.encode(prompt_text_5_shot)
len(count_tokens)

2598

In [14]:
prompt_text_3_shot= """PROMPT | shot=3 | Peak ground acceleration
You are an ontology engineer.
Your task is to output **one** JSON object that satisfies the
JSON-Schema provided below.

▸ Copy *label* and *comment* verbatim from the user section.
▸ Do **NOT** introduce keys that are absent from the schema.
▸ Every value must respect the declared JSON type
  (e.g. hasProperty is a string, hasConstraint is an array, …).
▸ Reply with the JSON object only — no markdown fences, no narration.

### JSON-Schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://example.org/schemas/iadopt-variable.json",
  "title": "I-ADOPT decomposed variable",
  "description": "Single variable expressed in the compact JSON layout used for LLM-driven decomposition.",
  "type": "object",

  "required": ["label", "comment", "hasProperty", "hasObjectOfInterest"],

  "properties": {
    "label": {
      "type": "string",
      "description": "Variable name."
    },
    "comment": {
      "type": "string",
      "description": "Definition / description."
    },
    "hasProperty": {
      "type": "string",
      "description": "Property being observed (e.g. 'distance', 'temperature')."
    },
    "hasStatisticalModifier": {
      "type": "string",
      "description": "Statistical qualifier (e.g. 'maximum', 'latest')."
    },
    "hasMatrix": {
      "$ref": "#/$defs/entityOrSystem"
    },
    "hasObjectOfInterest": {
      "$ref": "#/$defs/entityOrSystem"
    },
    "hasContextObject": {
      "$ref": "#/$defs/entityOrSystem"
    },
    "hasConstraint": {
      "type": "array",
      "description": "List of constraints, each with a label and the target it applies to.",
      "items": {
        "type": "object",
        "required": ["label", "on"],
        "properties": {
          "label": {
            "type": "string",
            "description": "Constraint label (e.g. 'nearest neighbour', 'daily')."
          },
          "on": {
            "type": "string",
            "description": "What the constraint applies to (e.g. 'hasTarget: habitat patch')."
          }
        },
        "additionalProperties": false
      },
      "minItems": 1
    }
  },

  "additionalProperties": false,

  "$defs": {
    "entityOrSystem": {
      "description": "Either a simple label or a structured system.",
      "oneOf": [
        { "type": "string" },
        {
          "$comment": "Asymmetric system",
          "type": "object",
          "required": [
            "AsymmetricSystem",
            "hasSource",
            "hasTarget",
            "hasNumerator",
            "hasDenominator"
          ],
          "properties": {
            "AsymmetricSystem": { "type": "string" },
            "hasSource":        { "type": "string" },
            "hasTarget":        { "type": "string" }
          },
          "additionalProperties": false
        },
        {
          "$comment": "Symmetric system",
          "type": "object",
          "required": ["SymmetricSystem", "hasPart"],
          "properties": {
            "SymmetricSystem": { "type": "string" },
            "hasPart": {
              "type": "array",
              "items": { "type": "string" },
              "minItems": 1
            }
          },
          "additionalProperties": false
        }
      ]
    }
  }
}

### Examples (valid against the same schema)
{
  "label": "Distance to nearest neighbour habitat patch",
  "comment": "This variable is part of the EBV Connectivity of terrestrial ecosystem habitat types and helps to measure the degree of connection of EUNIS habitats within a landscape, in terms of their spatial distribution. https://github.com/EuropaBON/EBV-Descriptions/wiki/Terrestrial-Connectivity-of-terrestrial-ecosystem-habitat-types",
  "hasProperty": "distance",
  "hasObjectOfInterest": {
    "AsymmetricSystem": "habitat patch system",
    "hasSource": "habitat patch",
    "hasTarget": "habitat patch"
  },
  "hasConstraint": [
    {
      "label": "nearest neighbour",
      "on": "hasTarget: habitat patch"
    }
  ]
}

{
  "label": "Weight specific-ingestion Carbon rate at 15 °C",
  "comment": "The amount of carbon consumed by an organism at non-limiting concentration of food relative to the individual dry weight measured at 15°C. It is expressed as μg C mg DW−1 h−1",
  "hasProperty": "mass flow rate",
  "hasMatrix": "organism",
  "hasObjectOfInterest": "Carbon",
  "hasConstraint": [
    {
      "label": "weight-specific",
      "on": "mass flow rate"
    },
    {
      "label": "dry",
      "on": "organism"
    },
    {
      "label": "at non-limiting conditions",
      "on": "Carbon"
    },
    {
      "label": "at 15°C temperature",
      "on": "mass flow rate"
    },
    {
      "label": "due to ingestion",
      "on": "mass flow rate"
    }
  ]
}

{
  "label": "Mass flux of carbon into soil from vegetation due to senescence",
  "comment": "In accordance with common usage in geophysical disciplines, \"flux\" implies per unit area, called \"flux density\" in physics. \"Vegetation\" means any living plants e.g. trees, shrubs, grass. The specification of a physical process by the phrase \"due_to_\" process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. The term \"senescence\" means loss of living biomass excluding plant death, e.g. leaf drop and other seasonal effects. The term refers to changes in the whole plant and is not confined only to leaf drop.",
  "hasProperty": "mass flux",
  "hasMatrix": {
    "AsymmetricSystem": "from vegetation to soil",
    "hasSource": "vegetation",
    "hasTarget": "soil"
  },
  "hasObjectOfInterest": "carbon",
  "hasConstraint": [
    {
      "label": "due to senescence",
      "on": "mass flux"
    }
  ]
}

### Variable to decompose
label: Peak ground acceleration
comment: Peak acceleration measured on the earth surface when facing seismic events, like earthquakes.

### Expected output
*(only the JSON object)*"""

In [15]:
count_tokens = tokenizer.encode(prompt_text_3_shot)
len(count_tokens)

2344

In [1]:
2598 - 2344

254