# Graph d'un programme gaml

In [176]:
original_code="""
/**
* Name: Basic model (prey agents)
* Author:
* Description: First part of the tutorial : Predator Prey
* Tags:
*/
model prey_predator

global {
	int nb_preys_init <- 200;
	init {
		create prey number: nb_preys_init;
	}
}

species prey {
	float size <- 1.0;
	rgb color <- #blue;
		
	aspect base {
		draw circle(size) color: color;
	}
} 

experiment prey_predator type: gui {
	parameter "Initial number of preys: " var: nb_preys_init min: 1 max: 1000 category: "Prey";
	output {
		display main_display {
			species prey aspect: base;
		}
	}
}

"""

In [177]:
original_code="""
/**
* Name: Exploration (batch)
* Author:
* Description: 13th part of the tutorial: Predator Prey
* Tags: batch
*/
model prey_predator

global {
	int nb_preys_init <- 200;
	int nb_predators_init <- 20;
	float prey_max_energy <- 1.0;
	float prey_max_transfer <- 0.1;
	float prey_energy_consum <- 0.05;
	float predator_max_energy <- 1.0;
	float predator_energy_transfer <- 0.5;
	float predator_energy_consum <- 0.02;
	float prey_proba_reproduce <- 0.01;
	int prey_nb_max_offsprings <- 5;
	float prey_energy_reproduce <- 0.5;
	float predator_proba_reproduce <- 0.01;
	int predator_nb_max_offsprings <- 3;
	float predator_energy_reproduce <- 0.5;
	file map_init <- image_file("../includes/data/raster_map.png");
	int nb_preys -> {length(prey)};
	int nb_predators -> {length(predator)};
	bool is_batch <- false;

	init {
		create prey number: nb_preys_init;
		create predator number: nb_predators_init;
		ask vegetation_cell {
			color <- rgb (map_init at {grid_x,grid_y});
			food <- 1 - (((color as list) at 0) / 255);
			food_prod <- food / 100; 
		}
	}
	
	reflex save_result when: (nb_preys > 0) and (nb_predators > 0){
		save ("cycle: "+ cycle + "; nbPreys: " + nb_preys
			+ "; minEnergyPreys: " + (prey min_of each.energy)
			+ "; maxSizePreys: " + (prey max_of each.energy) 
	   		+ "; nbPredators: " + nb_predators           
	   		+ "; minEnergyPredators: " + (predator min_of each.energy)          
	   		+ "; maxSizePredators: " + (predator max_of each.energy)) 
	   		to: "results.txt" rewrite: (cycle = 0) ? true : false;
	}
	
	reflex stop_simulation when: ((nb_preys = 0) or (nb_predators = 0)) and !is_batch {
		do pause;
	} 
}

species generic_species {
	float size <- 1.0;
	rgb color;
	float max_energy;
	float max_transfer;
	float energy_consum;
	float proba_reproduce;
	int nb_max_offsprings;
	float energy_reproduce;
	image_file my_icon;
	vegetation_cell my_cell <- one_of(vegetation_cell);
	float energy <- rnd(max_energy) update: energy - energy_consum max: max_energy;

	init {
		location <- my_cell.location;
	}

	reflex basic_move {
		my_cell <- choose_cell();
		location <- my_cell.location;
	}

	reflex eat {
		energy <- energy + energy_from_eat();		
	}

	reflex die when: energy <= 0 {
		do die;
	}

	reflex reproduce when: (energy >= energy_reproduce) and (flip(proba_reproduce)) {
		int nb_offsprings <- rnd(1, nb_max_offsprings);
		create species(self) number: nb_offsprings {
			my_cell <- myself.my_cell;
			location <- my_cell.location;
			energy <- myself.energy / nb_offsprings;
		}

		energy <- energy / nb_offsprings;
	}

	float energy_from_eat {
		return 0.0;
	}

	vegetation_cell choose_cell {
		return nil;
	}

	aspect base {
		draw circle(size) color: color;
	}

	aspect icon {
		draw my_icon size: 2 * size;
	}

	aspect info {
		draw square(size) color: color;
		draw string(energy with_precision 2) size: 3 color: #black;
	}
}

species prey parent: generic_species {
	rgb color <- #blue;
	float max_energy <- prey_max_energy;
	float max_transfer <- prey_max_transfer;
	float energy_consum <- prey_energy_consum;
	float proba_reproduce <- prey_proba_reproduce;
	int nb_max_offsprings <- prey_nb_max_offsprings;
	float energy_reproduce <- prey_energy_reproduce;
	image_file my_icon <- image_file("../includes/data/sheep.png");

	float energy_from_eat {
		float energy_transfer <- 0.0;
		if(my_cell.food > 0) {
			energy_transfer <- min([max_transfer, my_cell.food]);
			my_cell.food <- my_cell.food - energy_transfer;
		} 			
		return energy_transfer;
	}

	vegetation_cell choose_cell {
		return (my_cell.neighbors2) with_max_of (each.food);
	}
}

species predator parent: generic_species {
	rgb color <- #red;
	float max_energy <- predator_max_energy;
	float energy_transfer <- predator_energy_transfer;
	float energy_consum <- predator_energy_consum;
	float proba_reproduce <- predator_proba_reproduce;
	int nb_max_offsprings <- predator_nb_max_offsprings;
	float energy_reproduce <- predator_energy_reproduce;
	image_file my_icon <- image_file("../includes/data/wolf.png");

	float energy_from_eat {
		list<prey> reachable_preys <- prey inside (my_cell);
		if(! empty(reachable_preys)) {
			ask one_of (reachable_preys) {
				do die;
			}
			return energy_transfer;
		}
		return 0.0;
	}

	vegetation_cell choose_cell {
		vegetation_cell my_cell_tmp <- shuffle(my_cell.neighbors2) first_with (!(empty(prey inside (each))));
		if my_cell_tmp != nil {
			return my_cell_tmp;
		} else {
			return one_of(my_cell.neighbors2);
		}
	}
}

grid vegetation_cell width: 50 height: 50 neighbors: 4 {
	float max_food <- 1.0;
	float food_prod <- rnd(0.01);
	float food <- rnd(1.0) max: max_food update: food + food_prod;
	rgb color <- rgb(int(255 * (1 - food)), 255, int(255 * (1 - food))) update: rgb(int(255 * (1 - food)), 255, int(255 * (1 - food)));
	list<vegetation_cell> neighbors2 <- (self neighbors_at 2);
}

experiment prey_predator type: gui {
	parameter "Initial number of preys: " var: nb_preys_init min: 0 max: 1000 category: "Prey";
	parameter "Prey max energy: " var: prey_max_energy category: "Prey";
	parameter "Prey max transfer: " var: prey_max_transfer category: "Prey";
	parameter "Prey energy consumption: " var: prey_energy_consum category: "Prey";
	parameter "Initial number of predators: " var: nb_predators_init min: 0 max: 200 category: "Predator";
	parameter "Predator max energy: " var: predator_max_energy category: "Predator";
	parameter "Predator energy transfer: " var: predator_energy_transfer category: "Predator";
	parameter "Predator energy consumption: " var: predator_energy_consum category: "Predator";
	parameter 'Prey probability reproduce: ' var: prey_proba_reproduce category: 'Prey';
	parameter 'Prey nb max offsprings: ' var: prey_nb_max_offsprings category: 'Prey';
	parameter 'Prey energy reproduce: ' var: prey_energy_reproduce category: 'Prey';
	parameter 'Predator probability reproduce: ' var: predator_proba_reproduce category: 'Predator';
	parameter 'Predator nb max offsprings: ' var: predator_nb_max_offsprings category: 'Predator';
	parameter 'Predator energy reproduce: ' var: predator_energy_reproduce category: 'Predator';

	output {
		display main_display type:2d antialias:false {
			grid vegetation_cell border: #black;
			species prey aspect: icon;
			species predator aspect: icon;
		}

		display info_display type:2d antialias:false {
			grid vegetation_cell border: #black;
			species prey aspect: info;
			species predator aspect: info;
		}

		display Population_information refresh: every(5#cycles)  type: 2d {
			chart "Species evolution" type: series size: {1,0.5} position: {0, 0} {
				data "number_of_preys" value: nb_preys color: #blue;
				data "number_of_predator" value: nb_predators color: #red;
			}
			chart "Prey Energy Distribution" type: histogram background: #lightgray size: {0.5,0.5} position: {0, 0.5} {
				data "]0;0.25]" value: prey count (each.energy <= 0.25) color:#blue;
				data "]0.25;0.5]" value: prey count ((each.energy > 0.25) and (each.energy <= 0.5)) color:#blue;
				data "]0.5;0.75]" value: prey count ((each.energy > 0.5) and (each.energy <= 0.75)) color:#blue;
				data "]0.75;1]" value: prey count (each.energy > 0.75) color:#blue;
			}
			chart "Predator Energy Distribution" type: histogram background: #lightgray size: {0.5,0.5} position: {0.5, 0.5} {
				data "]0;0.25]" value: predator count (each.energy <= 0.25) color: #red;
				data "]0.25;0.5]" value: predator count ((each.energy > 0.25) and (each.energy <= 0.5)) color: #red;
				data "]0.5;0.75]" value: predator count ((each.energy > 0.5) and (each.energy <= 0.75)) color: #red;
				data "]0.75;1]" value: predator count (each.energy > 0.75) color: #red;
			}
		}

		monitor "Number of preys" value: nb_preys;
		monitor "Number of predators" value: nb_predators;
	}
}

experiment Optimization type: batch repeat: 2 keep_seed: true until: ( time > 200 ) {
	parameter "Prey max transfer:" var: prey_max_transfer min: 0.05 max: 0.5 step: 0.05;
	parameter "Prey energy reproduce:" var: prey_energy_reproduce min: 0.05 max: 0.75 step: 0.05;
	parameter "Predator energy transfer:" var: predator_energy_transfer min: 0.1 max: 1.0 step: 0.1;
	parameter "Predator energy reproduce:" var: predator_energy_reproduce min: 0.1 max: 1.0 step: 0.1;
	parameter "Batch mode:" var: is_batch <- true;
	
	method tabu maximize: nb_preys + nb_predators iter_max: 10 tabu_list_size: 3;
	
	
	reflex save_results_explo {
		ask simulations {
			save [int(self),prey_max_transfer,prey_energy_reproduce,predator_energy_transfer,predator_energy_reproduce,self.nb_predators,self.nb_preys] 
		   		to: "results.csv" format:"csv" rewrite: (int(self) = 0) ? true : false header: true;
		}		
	}
}

"""

## Preprocessing

### On enlève les commentaires au début

In [178]:
import re

# Step 1: Remove the /** ... */ block at the beginning
code = re.sub(r'^\s*/\*\*.*?\*/\s*', '', original_code, flags=re.DOTALL)

# Step 3: Remove empty lines (lines with only whitespace or nothing)
code = '\n'.join(line for line in code.splitlines() if line.strip())

In [179]:
print(code)

model prey_predator
global {
	int nb_preys_init <- 200;
	int nb_predators_init <- 20;
	float prey_max_energy <- 1.0;
	float prey_max_transfer <- 0.1;
	float prey_energy_consum <- 0.05;
	float predator_max_energy <- 1.0;
	float predator_energy_transfer <- 0.5;
	float predator_energy_consum <- 0.02;
	float prey_proba_reproduce <- 0.01;
	int prey_nb_max_offsprings <- 5;
	float prey_energy_reproduce <- 0.5;
	float predator_proba_reproduce <- 0.01;
	int predator_nb_max_offsprings <- 3;
	float predator_energy_reproduce <- 0.5;
	file map_init <- image_file("../includes/data/raster_map.png");
	int nb_preys -> {length(prey)};
	int nb_predators -> {length(predator)};
	bool is_batch <- false;
	init {
		create prey number: nb_preys_init;
		create predator number: nb_predators_init;
		ask vegetation_cell {
			color <- rgb (map_init at {grid_x,grid_y});
			food <- 1 - (((color as list) at 0) / 255);
			food_prod <- food / 100; 
		}
	}
	reflex save_result when: (nb_preys > 0) and (nb_predators > 0){


### Decoupage du code mot par mot

In [180]:
tokens = re.findall(r'#\w+|\w+|[^\w\s]', code)
print(tokens)

['model', 'prey_predator', 'global', '{', 'int', 'nb_preys_init', '<', '-', '200', ';', 'int', 'nb_predators_init', '<', '-', '20', ';', 'float', 'prey_max_energy', '<', '-', '1', '.', '0', ';', 'float', 'prey_max_transfer', '<', '-', '0', '.', '1', ';', 'float', 'prey_energy_consum', '<', '-', '0', '.', '05', ';', 'float', 'predator_max_energy', '<', '-', '1', '.', '0', ';', 'float', 'predator_energy_transfer', '<', '-', '0', '.', '5', ';', 'float', 'predator_energy_consum', '<', '-', '0', '.', '02', ';', 'float', 'prey_proba_reproduce', '<', '-', '0', '.', '01', ';', 'int', 'prey_nb_max_offsprings', '<', '-', '5', ';', 'float', 'prey_energy_reproduce', '<', '-', '0', '.', '5', ';', 'float', 'predator_proba_reproduce', '<', '-', '0', '.', '01', ';', 'int', 'predator_nb_max_offsprings', '<', '-', '3', ';', 'float', 'predator_energy_reproduce', '<', '-', '0', '.', '5', ';', 'file', 'map_init', '<', '-', 'image_file', '(', '"', '.', '.', '/', 'includes', '/', 'data', '/', 'raster_map', '

### Decoupage ligne par ligne

In [181]:
tokenized_lines = []
pattern = r'"[^"]*"|<=|>=|->|!=|<\-|:=|\d+d|\d+\.\d+|\d+|#\w+|\w+|[.]|[^\s\w#".]'

for line in code.strip().splitlines():
    tokens = re.findall(pattern, line)
    tokenized_lines.append(tokens)

In [182]:
# Print or use `tokenized_lines`
for line_tokens in tokenized_lines:
    print(line_tokens)

['model', 'prey_predator']
['global', '{']
['int', 'nb_preys_init', '<-', '200', ';']
['int', 'nb_predators_init', '<-', '20', ';']
['float', 'prey_max_energy', '<-', '1.0', ';']
['float', 'prey_max_transfer', '<-', '0.1', ';']
['float', 'prey_energy_consum', '<-', '0.05', ';']
['float', 'predator_max_energy', '<-', '1.0', ';']
['float', 'predator_energy_transfer', '<-', '0.5', ';']
['float', 'predator_energy_consum', '<-', '0.02', ';']
['float', 'prey_proba_reproduce', '<-', '0.01', ';']
['int', 'prey_nb_max_offsprings', '<-', '5', ';']
['float', 'prey_energy_reproduce', '<-', '0.5', ';']
['float', 'predator_proba_reproduce', '<-', '0.01', ';']
['int', 'predator_nb_max_offsprings', '<-', '3', ';']
['float', 'predator_energy_reproduce', '<-', '0.5', ';']
['file', 'map_init', '<-', 'image_file', '(', '"../includes/data/raster_map.png"', ')', ';']
['int', 'nb_preys', '->', '{', 'length', '(', 'prey', ')', '}', ';']
['int', 'nb_predators', '->', '{', 'length', '(', 'predator', ')', '}', '

## Graphage du code

In [183]:
from neo4j import GraphDatabase

# Connect to Neo4j
uri = "bolt://localhost:7687"
username = "neo4j"
password = "password"

driver = GraphDatabase.driver(uri, auth=(username, password))

In [184]:
def create_token_node(tx, token, line_number, position):
    tx.run("""
        CREATE (:Token {
            value: $value,
            line: $line,
            position: $position,
            code_example: True
        })
    """, value=token, line=line_number, position=position)

In [185]:
with driver.session() as session:
    for line_number, tokens in enumerate(tokenized_lines, start=1):
        for position, token in enumerate(tokens, start=1):
            session.execute_write(create_token_node, token, line_number, position)


In [186]:
def link_next_tokens_by_position(tx):
    tx.run("""
        MATCH (a:Token), (b:Token)
        WHERE a.line = b.line AND a.position = b.position - 1
        MERGE (a)-[:NEXT]->(b)
    """)

with driver.session() as session:
    session.execute_write(link_next_tokens_by_position)


In [187]:
def link_next_lines(tx):
    tx.run("""
        MATCH (a:Token)
        WITH a.line AS line, max(a.position) AS max_pos
        MATCH (end_token:Token {line: line, position: max_pos})
        MATCH (start_token:Token {line: line + 1, position: 1})
        MERGE (end_token)-[:NEXT_LINE]->(start_token)
    """)

with driver.session() as session:
    session.execute_write(link_next_lines)


In [188]:
def mark_block_heads(tx):
    tx.run("""
        MATCH (first:Token {position: 1})
        MATCH (last:Token)
        WHERE last.line = first.line
        WITH first, last
        ORDER BY last.position DESC
        WITH first, collect(last)[0] AS last_token
        WHERE last_token.value = '{'
        SET first.block_head = true
    """)

with driver.session() as session:
    session.execute_write(mark_block_heads)

#### Gestion des blocks

In [189]:
def add_word_after_property(session):
    session.run("""
        MATCH (n)-[:NEXT|NEXT_LINE]->(next)
        WHERE n.block_head=true
        SET n.word_after = next.value
    """)

def add_of_group_relations(session):
    # Step 1: Get all tokens ordered by line and position
    tokens = session.run("""
        MATCH (t:Token)
        RETURN elementId(t) AS id, t.line AS line, t.position AS pos, t.value AS value
        ORDER BY t.line ASC, t.position ASC
    """).data()

    stack = []  # Each element: {"brace_id", "line", "head_id"}

    for token in tokens:
        val = token["value"]
        tok_id = token["id"]
        line = token["line"]

        if val == "{":
            # Get head of the line (position = 1)
            head = session.run("""
                MATCH (head:Token {line: $line, position: 1})
                RETURN elementId(head) AS head_id
            """, line=line).single()
            if head:
                stack.append({
                    "brace_id": tok_id,
                    "line": line,
                    "head_id": head["head_id"]
                })
        elif val == "}":
            if stack:
                stack.pop()
        else:
            if stack:
                # Inside a block → add OF_BLOCK relation to last opened block head
                head_id = stack[-1]["head_id"]
                session.run("""
                    MATCH (token), (head)
                    WHERE elementId(token) = $token_id AND elementId(head) = $head_id
                    MERGE (token)-[:OF_BLOCK]->(head)
                """, token_id=tok_id, head_id=head_id)


In [190]:
with driver.session() as session:
    add_of_group_relations(session)
    add_word_after_property(session)

  warn(


### Match vers des noeuds déjà existant

In [191]:
def link_code_examples(session, variable):
    query = f"""
        MATCH (example:Token {{code_example: true}})
        MATCH (original:{variable})
        WHERE original.name = example.value AND original.code_example IS NULL
        MERGE (example)-[:IS]->(original)
    """
    session.run(query)


In [192]:


to_link=["Action", "built_in_specie", "Variable","built_in_skill","built_in_architecture", "FileType", "Operator", "Statement", "ConstantOrUnit", "PseudoVariable"
 "VariableOrAttribute", "BuiltInAttribute", "BuiltInAgentAttribute", "BuiltInSpeciesAttribute"]


#On a tout sauf Type et PossibleOperation

for i in to_link:
    with driver.session() as session:
        link_code_examples(session,i)

# Partie IA

## On matche chaque node qui a un IS sortant et on regarde lequel devrait être gardé en fonction du contexte

In [193]:
import re
import warnings
from langchain_core._api.deprecation import LangChainDeprecationWarning

# Suppress deprecation warnings
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)

from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

# Setup models once
#_llm_model = "deepseek-r1:latest"
_llm_model="codeqwen:latest"
_embedding_model = "mxbai-embed-large"
_model = ChatOllama(model=_llm_model)
_embeddings = OllamaEmbeddings(model=_embedding_model)

# Prompt template


#You are an expert in the GAML language. Use the following documentation to understand the context:
#{references}

_prompt = ChatPromptTemplate.from_template(
    """
Here is a code snippet of gaml code:
{complete_code}

The candidate is {token}, marked by carets in the code sippet. It descend from the following blocks : 
{blocks}

Each candidate represents a potential semantic match and includes:
- An `id`: a unique identifier.
- A `label`: the category of the node (e.g., BuiltInAction, Keyword, Facet).
- A `name`: the identifier of the node.
- A list of incoming nodes (nodes that reference this one), which helps you understand how the node is used in the language. These nodes may include Statements, Facets, or other structural links.

You should select the **best matching candidate** by carefully analyzing:
- The candidate’s label and name
- The role of the token in the code
- The nodes that point to each candidate (incoming links)
- The types of the objects and their context

Candidates:

{options}

Return **only the id** of the best matching candidate (as a number, with no explanation).
"""
)


# Load existing vector store
# _vector_store_refs = Chroma(
#     embedding_function=_embeddings,
#     persist_directory="../chroma_db_wiki"
# )

# Create retriever
# _retriever_refs = _vector_store_refs.as_retriever(
#     search_type="similarity_score_threshold",
#     search_kwargs={"k": 1, "score_threshold": 0.4},
# )

# Full QA chain
_chain = RunnablePassthrough() | _prompt | _model | StrOutputParser()


def format_option_entry(option: dict) -> str:
    incoming_str = (
        "\n      ↳ " +
        "\n      ↳ ".join(
            f"{inc.get('type', 'Unknown')}:{inc.get('name', 'Unnamed')}"
            for inc in option.get("incoming", [])
        )
        if option.get("incoming") else " None"
    )
    return (
        f"- id: {option['id']}\n"
        f"  label: {option.get('type', 'Unknown')}\n"
        f"  name: {option.get('name', 'Unnamed')}\n"
        f"  incoming from:{incoming_str}"
    )


def ask_token_disambiguation(options: list, complete_code: str, blocks) -> dict:
    """
    Ask which target best matches a token in a given code context.

    Parameters:
        word (str): The specific token to disambiguate.
        phrase (str): The phrase where the token appears.
        options (list): A list of target strings (e.g., ["type:name", ...]).
        complete_code (str): The code context to consider.

    Returns:
        dict:
            {
                "answer": str,
                "sources": List[Dict[str, Any]]
            }
    """
    # Retrieve relevant documents from the vector store
    #truc_to_retrieve = token + ' ' + ' '.join(
    #    ' '.join(f"{k} {v}" for k, v in opt.items() if not isinstance(v, list))
    #    for opt in options
    #)
    #retrieved_docs = _retriever_refs.invoke(truc_to_retrieve)
#
    #if not retrieved_docs:
    #    return {
    #        "answer": "No relevant context found. Please check your code or document store.",
    #        "sources": []
    #    }

    formatted_input = {
        #"references": "\n\n".join(doc.page_content for doc in retrieved_docs),
        "complete_code": complete_code,
        "token": token,
        "blocks": blocks,
        "position": position,
        "options": "\n\n".join(format_option_entry(opt) for opt in options),
    }

    raw_output = _chain.invoke(formatted_input)
    cleaned_output = re.sub(r"<think>.*?</think>", "", raw_output, flags=re.DOTALL).strip()

    #sources = [
    #    {
    #        "index": i + 1,
    #        "source": doc.metadata.get("source", "unknown"),
    #        "score": doc.metadata.get("score", None)
    #    }
    #    for i, doc in enumerate(retrieved_docs)
    #]

    return {
        "answer": cleaned_output,
        #"sources": sources
    }



In [194]:
#response = ask_gaml_question("What is a gaml reflex ?")
#print("Answer:", response["answer"])

In [195]:
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
username = "neo4j"
password = "password"
driver = GraphDatabase.driver(uri, auth=(username, password))

cypher = """
MATCH (t:Token)-[:IS]->(target)

OPTIONAL MATCH (source)-[]->(target)
WHERE NOT source:Token

OPTIONAL MATCH (t)-[:OF_BLOCK*]->(ancestor)

WITH t, target, id(target) AS target_id,
     target.all_content AS all_content,
     labels(target) AS target_labels,
     collect(DISTINCT {
       id: id(source),
       props: properties(source),
       labels: labels(source)
     }) AS incoming_sources,
     collect(DISTINCT {
       value: ancestor.value,
       word_after: ancestor.word_after
     }) AS block_info

WITH t, block_info,
     collect({
       id: target_id,
       all_content: all_content,
       labels: target_labels,
       incoming: incoming_sources
     }) AS targets

WHERE size(targets) > 1
RETURN id(t) AS token_id,
       t.value AS value,
       t.position AS position,
       t.line AS line,
       targets,
       block_info

"""

def get_token_possibilities_with_target_and_incoming(driver):
    with driver.session() as session:
        result = session.run(cypher)
        output = []

        for record in result:
            token_id = record["token_id"]
            token_text = f"{record['value']} line {record['line']} position {record['position']}"
            targets_text = []

            for t in record["targets"]:
                target_info = f"id: {t['id']}; all_content: {t.get('all_content', '')}"
                label_info = f"labels: {', '.join(t['labels'])}"
                incoming_info = []
                for inc in t.get("incoming", []):
                    labels = inc.get('labels') or []
                    props = inc.get("props") or {}
                    incoming_desc = (
                        f"id: {inc.get('id', '?')}; labels: {', '.join(labels)}; " +
                        "; ".join(f"{k}: {v}" for k, v in props.items())
                    )
                    incoming_info.append(incoming_desc)

                targets_text.append({
                    "target": f"{target_info}; {label_info}",
                    "incoming": incoming_info
                })

                block_info = [
                    f"value: {entry.get('value')}, word_after: {entry.get('word_after')}"
                    for entry in record.get("block_info", [])  # not block_context!
                ]

            output.append([token_id, token_text, targets_text, block_info])

        return output


output = get_token_possibilities_with_target_and_incoming(driver)

for token_entry in output:
    token_id = token_entry[0]
    token_str = token_entry[1]
    target_entries = token_entry[2]
    block_info=token_entry[3]

    # print(f"Token ID: {token_id}")
    # print(f"Token: {token_str}")
    # print("Targets:")
    for target in target_entries:
        #print(f"  - {target['target']}")
        #print("    Incoming from:")
        for inc in target["incoming"]:
            #print(f"      * {inc}")
            pass
    #print("---")
    print(block_info)




['value: reflex, word_after: save_result', 'value: global, word_after: {']
['value: global, word_after: {']
['value: global, word_after: {']
['value: ask, word_after: simulations', 'value: reflex, word_after: save_results_explo', 'value: experiment, word_after: Optimization']
['value: reflex, word_after: save_result', 'value: global, word_after: {']
['value: global, word_after: {']
['value: global, word_after: {']
['value: ask, word_after: simulations', 'value: reflex, word_after: save_results_explo', 'value: experiment, word_after: Optimization']
['value: global, word_after: {']
['value: global, word_after: {']
['value: None, word_after: None']
['value: species, word_after: generic_species']
['value: species, word_after: generic_species']
['value: species, word_after: generic_species']
['value: species, word_after: generic_species']
['value: reflex, word_after: reproduce', 'value: species, word_after: generic_species']
['value: None, word_after: None']
['value: None, word_after: None'

In [196]:
output[0][1]

'= line 37 position 8'

In [197]:
# # ask_token_disambiguation(token: str, options: list, complete_code: str)
# answer=ask_token_disambiguation(output[0][1], output[0][2], code)
# print(answer['answer'])

In [198]:
def parse_flat_target_entry(entry):
    # Parse main target string
    target_parts = dict(
        item.strip().split(":", 1)
        for item in entry["target"].split(";")
        if ":" in item
    )
    target = {
        "id": int(target_parts.get("id", -1)),
        "type": target_parts.get("type", target_parts.get("labels", "Unknown")).strip(),
        "all_content": target_parts.get("all_content", "Unnamed").strip(),
    }

    # Parse incoming nodes (if any)
    incoming_list = []
    for inc in entry.get("incoming", []):
        inc_parts = dict(
            item.strip().split(":", 1)
            for item in inc.split(";")
            if ":" in item
        )
        incoming_list.append({
            "type": inc_parts.get("labels", "Unknown").strip(),
            "name": inc_parts.get("name", "Unnamed").strip()
        })

    target["incoming"] = incoming_list
    return target


def mark_token_with_caret_by_name(code_lines, relative_line_index, token_name, token_index_hint, pattern): #position au lieu de index_hint
    """
    Insert a caret marker (^) under the N-th token in the line,
    based on the full list of tokens, not just matches of token_name.
    """
    if relative_line_index < 0 or relative_line_index >= len(code_lines):
        return "\n".join(code_lines)

    line = code_lines[relative_line_index]
    matches = list(re.finditer(pattern, line))

    #print("matches : "+str(matches)+", len matchs : "+str(len(matches)))

    token_index_hint=token_index_hint-1
    if token_index_hint >= len(matches):
        print("----------------------------------------------")
        print("DEBUG — pattern:", pattern)
        print("DEBUG — line repr:", repr(line))
        print("DEBUG — matches found:", [m.group() for m in re.finditer(pattern, line)])
        print("matches : "+str(matches))
        print("-------------")
        print(token_name)
        print(f"WARNING: Token index {token_index_hint} out of range for line {relative_line_index}: {line}")
        print("\n".join(code_lines))
        return "\n".join(code_lines)

    match = matches[token_index_hint]
    start_index = match.start()
    matched_token = match.group()

    # Build marker line, preserving tabs
    marker_line = ""
    for i in range(start_index):
        marker_line += "\t" if line[i] == "\t" else " "
    marker_line += "^" * len(matched_token)

    # Insert caret line
    marked_lines = code_lines.copy()
    marked_lines.insert(relative_line_index + 1, marker_line)

    # print("DEBUG:", {
    #     "token_index_hint": token_index_hint,
    #     "matched_token": matched_token,
    #     "line": line,
    #     "start_index": start_index,
    #     "num_tokens": len(matches)
    # })

    return "\n".join(marked_lines)


In [199]:
output

[[1303,
  '= line 37 position 8',
  [{'target': 'id: 6217; all_content: special_cases: * if both operands are any kind of objects, returns true if they are identical (i.e., the same object) or equal (comparisons between nil values are permitted) \n  \n \nbool var0 <- [2,3] = [2,3]; // var0 equals true\n description: returns true if both operands are equal, false otherwise\nreturns true if both operands are equal, false otherwise name: = example:  \nbool var1 <- 4.7 = 4; // var1 equals false \nbool var2 <- 4.5 = 4.7; // var2 equals false \nbool var3 <- 3 = 3.0; // var3 equals true \nbool var4 <- 4 = 4.7; // var4 equals false \nbool var5 <- 4 = 5; // var5 equals false \nbool var6 <- #now = #now minus_hours 1; // var6 equals false\n\n      \n\n\n**See also:** [!=](OperatorsAA#!=), [>](OperatorsAA#>), [<](OperatorsAA#<), [>=](OperatorsAA#>=), [<=](OperatorsAA#<=), \n    \t\n---- comment: ; labels: Operator',
    'incoming': ["id: 6245; labels: Operator; weight: 0; special_cases: * if one o

In [200]:
print(output[0][3])
for i in output:
    print(i[3])

['value: reflex, word_after: save_result', 'value: global, word_after: {']
['value: reflex, word_after: save_result', 'value: global, word_after: {']
['value: global, word_after: {']
['value: global, word_after: {']
['value: ask, word_after: simulations', 'value: reflex, word_after: save_results_explo', 'value: experiment, word_after: Optimization']
['value: reflex, word_after: save_result', 'value: global, word_after: {']
['value: global, word_after: {']
['value: global, word_after: {']
['value: ask, word_after: simulations', 'value: reflex, word_after: save_results_explo', 'value: experiment, word_after: Optimization']
['value: global, word_after: {']
['value: global, word_after: {']
['value: None, word_after: None']
['value: species, word_after: generic_species']
['value: species, word_after: generic_species']
['value: species, word_after: generic_species']
['value: species, word_after: generic_species']
['value: reflex, word_after: reproduce', 'value: species, word_after: generic_s

In [201]:
print(output[0][1])

= line 37 position 8


In [202]:

# code_lines = code.splitlines()

# for i in output:
#     token = i[1]
#     line = int(i[1].split()[2])  # 1-based line
#     position = int(i[1].split()[-1])  # 0-based token index
#     #print(position)
#     options = [parse_flat_target_entry(entry) for entry in i[2]]
#     block_info=str(i[3])


#     # Get snippet: 3 lines before and after the token's line
#     start = max(0, line - 4)
#     end = min(len(code_lines), line + 3)
#     snippet_lines = code_lines[start:end]
#     relative_line = line - 1 - start  # index in the snippet

#     # Highlight only the specified token occurrence
#     highlighted_code = mark_token_with_caret_by_name(
#         snippet_lines, relative_line, token.split()[0], position, pattern
#     )

#     print(f"Token: {token}")
#     print("-")
#     print(options)
#     print("-")
#     print(highlighted_code)
#     print("----\n")

#     answer = ask_token_disambiguation(options, highlighted_code, block_info)
#     print("Chosen ID:", answer['answer'])
#     print('----------------\n\n')


In [203]:
# from neo4j import GraphDatabase
# import re

# def get_token_id(tx, line, position):
#     query = """
#     MATCH (t:Token {line: $line, position: $position})
#     RETURN id(t) AS token_id
#     """
#     result = tx.run(query, line=line, position=position)
#     record = result.single()
#     return record["token_id"] if record else None

# # Your existing code
# code_lines = code.splitlines()

# for i in output:
#     token = i[1]
#     line = int(i[1].split()[2])  # 1-based line
#     position = int(i[1].split()[-1])  # 0-based token index
#     options = [parse_flat_target_entry(entry) for entry in i[2]]
#     block_info = str(i[3])

#     start = max(0, line - 4)
#     end = min(len(code_lines), line + 3)
#     snippet_lines = code_lines[start:end]
#     relative_line = line - 1 - start

#     highlighted_code = mark_token_with_caret_by_name(
#         snippet_lines, relative_line, token.split()[0], position, pattern
#     )

#     # Get token ID from Neo4j
#     with driver.session() as session:
#         token_id = session.execute_read(get_token_id, line, position)

# #    print(f"Token: {token} (ID: {token_id})")
# #    print("-")
# #    print(options)
# #    print("-")
# #    print(highlighted_code)
# #    print("----\n")

#     answer = ask_token_disambiguation(options, highlighted_code, block_info)

#     print(answer['answer'])

#     # All valid option IDs as strings
#     valid_ids = {str(opt['id']) for opt in options}

#     # Extract candidate IDs from the full string
#     raw_ids = re.findall(r'\d+', answer['answer'])

#     # Try to find a priority ID using patterns
#     priority_patterns = [
#         r'\*\*id[:\s]*(\d+)\*\*',            # **id:1234**
#         r'\*\*(\d+)\*\*',                    # **1234**
#         r'\{[^}]*?id[:\s]*(\d+)[^}]*?\}',    # {id: 1234}
#         r'\{[^}]*?(\d+)[^}]*?\}',            # { ... 1234 ... }
#         r'Answer\s*:\s*(\d+)',               # Answer : 1234
#         r'\*\*Answer:\*\*\s*(\d+)'           # **Answer:** 1234
#     ]


#     priority_id = None
#     for pattern in priority_patterns:
#         match = re.search(pattern, answer['answer'], flags=re.IGNORECASE)
#         if match and match.group(1) in valid_ids:
#             priority_id = match.group(1)
#             break

#     if priority_id:
#         print(f"✅ token_id: {token_id}, answer_id (priority match): {priority_id}")
#     else:
#         # Fallback to all valid matches
#         matching_ids = [id_ for id_ in raw_ids if id_ in valid_ids]
#         unique_ids = list(set(matching_ids))

#         if len(unique_ids) == 0:
#             print(f"❌ Problem: no valid option ID found in answer → found: {raw_ids}, expected one of: {valid_ids}")
#         elif len(unique_ids) > 1:
#             print(f"❌ Problem: multiple different valid option IDs found → {unique_ids}")
#         else:
#             answer_id = unique_ids[0]
#             print(f"✅ token_id: {token_id}, answer_id: {answer_id}")

#     print('----------------\n\n')


In [204]:
from neo4j import GraphDatabase
import re

def get_token_id(tx, line, position):
    query = """
    MATCH (t:Token {line: $line, position: $position})
    RETURN id(t) AS token_id
    """
    result = tx.run(query, line=line, position=position)
    record = result.single()
    return record["token_id"] if record else None

def disambiguate_tokens(output, code, driver, pattern):
    results = []

    code_lines = code.splitlines()

    for i in output:
        token = i[1]
        line = int(token.split()[2])  # 1-based line
        position = int(token.split()[-1])  # 0-based token index
        options = [parse_flat_target_entry(entry) for entry in i[2]]
        block_info = str(i[3])

        start = max(0, line - 4)
        end = min(len(code_lines), line + 3)
        snippet_lines = code_lines[start:end]
        relative_line = line - 1 - start

        highlighted_code = mark_token_with_caret_by_name(
            snippet_lines, relative_line, token.split()[0], position, pattern
        )

        # Get token ID from Neo4j
        with driver.session() as session:
            token_id = session.execute_read(get_token_id, line, position)

        answer = ask_token_disambiguation(options, highlighted_code, block_info)
        answer_text = answer['answer'].replace('\n', ' ').strip()

        valid_ids = {str(opt['id']) for opt in options}
        raw_ids = re.findall(r'\d+', answer_text)

        priority_patterns = [
            r'\*\*id[:\s]*(\d+)\*\*',
            r'\*\*(\d+)\*\*',
            r'\{[^}]*?id[:\s]*(\d+)[^}]*?\}',
            r'\{[^}]*?(\d+)[^}]*?\}',
            r'Answer\s*:\s*(\d+)',
            r'\*\*Answer:\*\*\s*(\d+)'
        ]

        priority_id = None
        for pattern__to_search in priority_patterns:
            match = re.search(pattern__to_search, answer_text, flags=re.IGNORECASE)
            if match and match.group(1) in valid_ids:
                priority_id = match.group(1)
                break

        if priority_id:
            answer_id = priority_id
        else:
            matching_ids = [id_ for id_ in raw_ids if id_ in valid_ids]
            unique_ids = list(set(matching_ids))

            if len(unique_ids) == 1:
                answer_id = unique_ids[0]
            else:
                #print(highlighted_code)
                print("llm_answer : "+str(answer['answer']))
                print(f"❌ Problem: {len(unique_ids)} valid IDs found → {unique_ids}")
                answer_id = None

        results.append((token_id, answer_id))

    return results


In [205]:
# results=disambiguate_tokens(output, code, driver, pattern)
# print(results)

In [206]:
def update_is_count_relations(driver, results):
    def upsert_is_count(tx, token_id, target_id):
        query = """
        MATCH (a), (b)
        WHERE id(a) = $token_id AND id(b) = $target_id
        MERGE (a)-[r:IS_COUNT]->(b)
        ON CREATE SET r.count = 1
        ON MATCH SET r.count = coalesce(r.count, 0) + 1
        """
        tx.run(query, token_id=token_id, target_id=target_id)

    with driver.session() as session:
        for token_id, target_id in results:
            if token_id is None or target_id is None:
                print(f"⚠️ Skipping invalid pair: token_id={token_id}, target_id={target_id}")
                continue
            session.execute_write(upsert_is_count, int(token_id), int(target_id))


In [207]:
import gc  # garbage collector

for _ in range(3):
    results = disambiguate_tokens(output, code, driver, pattern)
    print(results)
    update_is_count_relations(driver, results)

    # Explicitly delete large objects
    #del results
    #gc.collect()


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


llm_answer : Based on the provided information, I would select the statement node with an id of 5635 as it has a label of "Statement" and no incoming links. The operator node with an id of 632 is not relevant to the given code snippet and can be disregarded. Therefore, the best matching candidate is statement node with an id of 5635.
❌ Problem: 2 valid IDs found → ['632', '5635']


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[(1303, '4840'), (1319, '6217'), (1325, '4840'), (2618, '6217'), (1303, '6217'), (1319, '6217'), (1325, '4840'), (2618, '4840'), (1226, '5507'), (1312, '4643'), (1338, '5635'), (1403, '5507'), (1419, '5507'), (1431, '4643'), (1443, '5507'), (1471, '5635'), (1574, '5635'), (1686, None), (1938, '5211'), (2143, '5635'), (2149, '5635'), (2171, '5635'), (2177, '5635'), (2475, '4350'), (2576, '5507'), (1226, '4643'), (1312, '5507'), (1338, '632'), (1403, '4643'), (1419, '5507'), (1431, '5507'), (1443, '5507'), (1471, '5635'), (1574, '5635'), (1686, '5635'), (1938, '4350'), (2143, '5635'), (2149, '5635'), (2171, '5635'), (2177, '5635'), (2475, '5211'), (2576, '5507'), (1246, '4351'), (1302, '5925'), (1550, '668'), (1246, '4351'), (1302, '4351'), (1550, '668'), (1396, '1051'), (1400, '1051'), (1412, '1051'), (1416, '1051'), (1485, '1051'), (1489, '1051'), (1396, '1051'), (1400, '1051'), (1412, '1051'), (1416, '1051'), (1485, '1051'), (1489, '1051'), (1140, '4821'), (1367, '4822'), (1615, '4822

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[(1303, '4840'), (1319, '4840'), (1325, '4840'), (2618, '4840'), (1303, '4840'), (1319, '4840'), (1325, '4840'), (2618, '6217'), (1226, '4643'), (1312, '4643'), (1338, '5635'), (1403, '5507'), (1419, '4643'), (1431, '4643'), (1443, '4643'), (1471, '5635'), (1574, '5635'), (1686, '5635'), (1938, '5211'), (2143, '5635'), (2149, '5635'), (2171, '5635'), (2177, '5635'), (2475, '4350'), (2576, '4643'), (1226, '5507'), (1312, '4643'), (1338, '5635'), (1403, '5507'), (1419, '5507'), (1431, '5507'), (1443, '5507'), (1471, '5635'), (1574, '5635'), (1686, '5635'), (1938, '4350'), (2143, '5635'), (2149, '5635'), (2171, '5635'), (2177, '5635'), (2475, '5211'), (2576, '5507'), (1246, '5925'), (1302, '4351'), (1550, '668'), (1246, '5925'), (1302, '5925'), (1550, '668'), (1396, '1051'), (1400, '1051'), (1412, '1051'), (1416, '1051'), (1485, '1051'), (1489, '1051'), (1396, '1051'), (1400, '1051'), (1412, '1051'), (1416, '1051'), (1485, '1051'), (1489, '1051'), (1140, '4822'), (1367, '4821'), (1615, '4

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


llm_answer : Based on the given information, I would recommend selecting the `built_in_architecture` node as the best matching candidate. The reason being that it directly relates to the token in the code and provides a clear understanding of how the node is used in the language. Additionally, it is a part of the global scope and not referenced by any other nodes.
❌ Problem: 0 valid IDs found → []


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


llm_answer : The best matching candidate is the first one (id: 5925), as it is a ConstantOrUnit and does not have any incoming links. This constant or unit could be a variable that has never been defined before in the code snippet. This makes it an unlikely candidate.

On the other hand, the second candidate (id: 4351) is a Variable and does point to the built_in_specie:experiment. Although this can't be confirmed as the best match without additional information from the full context of the code snippet, it still seems like an unlikely match based on its name and incoming link.

Therefore, the best matching candidate would be id 4351 (Variable: Unnamed).
❌ Problem: 2 valid IDs found → ['5925', '4351']


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


llm_answer : The best matching candidate is 1051, which represents an unnamed agent attribute. The token in the code snippet is "init", and it points to node 4219 (Type:agent), which indicates that this attribute is associated with agents in the GAML language. Therefore, the best matching candidate is 1051.
❌ Problem: 2 valid IDs found → ['4219', '1051']


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


[(1303, '4840'), (1319, '4840'), (1325, '6217'), (2618, '4840'), (1303, '4840'), (1319, '4840'), (1325, '4840'), (2618, '6217'), (1226, None), (1312, '4643'), (1338, '5635'), (1403, '4643'), (1419, '5507'), (1431, '4643'), (1443, '4643'), (1471, '5635'), (1574, '5635'), (1686, '5635'), (1938, '5211'), (2143, '5635'), (2149, '5635'), (2171, '5635'), (2177, '5635'), (2475, '4350'), (2576, '4643'), (1226, '5507'), (1312, '5507'), (1338, '5635'), (1403, '4643'), (1419, '4643'), (1431, '5507'), (1443, '4643'), (1471, '5635'), (1574, '5635'), (1686, '5635'), (1938, '4350'), (2143, '5635'), (2149, '5635'), (2171, '5635'), (2177, '5635'), (2475, '4350'), (2576, '4643'), (1246, '4351'), (1302, '5925'), (1550, '668'), (1246, None), (1302, '4351'), (1550, '668'), (1396, '1051'), (1400, '1051'), (1412, '4219'), (1416, '1051'), (1485, '1051'), (1489, '1051'), (1396, None), (1400, '4263'), (1412, '1051'), (1416, '1051'), (1485, '1051'), (1489, '1051'), (1140, '4821'), (1367, '4822'), (1615, '4822'),



In [208]:
def promote_highest_is_count_to_is(driver):
    def promote(tx):

        tx.run("MATCH ()-[r:IS]->() DELETE r")
        
        # Step 1: Create IS from highest-count IS_COUNT
        query = """
        MATCH (n)-[r:IS_COUNT]->(target)
        WITH n, r
        ORDER BY r.count DESC
        WITH n, collect(r)[0] AS top_r
        CALL {
            WITH top_r
            MATCH (src)-[top_r]->(dst)
            MERGE (src)-[:IS]->(dst)
        }
        RETURN count(*) AS promoted
        """
        tx.run(query)

        # Step 2: Delete all IS_COUNT relations
        tx.run("MATCH ()-[r:IS_COUNT]->() DELETE r")

    with driver.session() as session:
        session.execute_write(promote)

In [209]:
promote_highest_is_count_to_is(driver)

In [211]:
def IS_to_weight(driver):
    def promote(tx):

        
        # Step 2: Promote IS relationships by adding 1 to the weight of each node that has incoming IS relationships
        query = """
        MATCH ()-[r:IS]->(n)
        WITH n, count(*) AS count_of_is_relationships
        CALL {
            WITH n, count_of_is_relationships
            SET n.weight=n.weight+count_of_is_relationships
        }
        """
        tx.run(query)


        # Step 1: Delete existing IS relationships
        tx.run("MATCH ()-[r:IS]->() DELETE r")

    with driver.session() as session:
        session.execute_write(promote)

In [212]:
IS_to_weight(driver)