In [7]:
def get_cfg_prompt(code, language, ast):
    prompt = f"""

# Control Flow Graph Generation Protocol  
**Objective**: Convert source code into standardized CFG JSON format according to the Abstract Syntax Tree (AST)

## Input Requirements  
Submit code and language using this pattern:  
```
[CODE]  
{code}
[/CODE]  

[AST]  
{ast}
[/AST]  

[LANGUAGE]  
{language}  
[/LANGUAGE]  
```

## Output Specifications  
### Required JSON Structure  
{{
  "name": "function_name",  
  "entryBlock": {{  
    "id": "B0",  
    "label": "entry_point_description",  
    "type": "branch|loop|normal|error"  
  }},  
  "blocks": [  
    {{  
      "id": "B1",  
      "label": "code_fragment",  
      "type": "branch",  
      "subCFG": {{ /* Nested structure */ }}  
    }}  
  ],  
  "edges": [  
    {{  
      "sourceId": "B0",  
      "targetId": "B1",  
      "label": "edge_description",  
      "isError": false  
    }}  
  ]  
}}  
"""

    return prompt

In [8]:
import os
import json
from llm import get_llm_answers

language = "cangjie"
source_code_dir = "../dataset/cangjie"

for i in range(1,2):
    code = open(os.path.join(source_code_dir, f"{i}.cj"), "r").read()
    ast = open(f"ast_{i}.json", "r").read()
    prompt = get_cfg_prompt(code, language, ast)
    # print(prompt)

    answer = json.loads(get_llm_answers(prompt, require_json=True, model_name="deepseek-r1:70b"))
    with open(f"cfg_{i}.json", "w") as f:
        json.dump(answer, f, indent=4)



In [4]:
def get_ast_prompt(code, language):
    prompt = f"""
Please analyze the following {language} code and generate a detailed Abstract Syntax Tree (AST) representation. The AST should:

1. Show the hierarchical structure of the code
2. Include all important nodes like:
   - Package declarations
   - Import statements 
   - Function/class definitions
   - Control flow statements
   - Expressions
3. Preserve the relationships between nodes
4. Include relevant attributes and metadata for each node
5. Follow standard AST notation

Code to analyze:
{code}
"""
    return prompt

In [5]:
import os
import json
language = "cangjie"
source_code_dir = "../dataset/cangjie"

for i in range(1, 2):
    code = open(os.path.join(source_code_dir, f"{i}.cj"), "r").read()
    prompt = get_ast_prompt(code, language)
    # print(prompt)
    answer = json.loads(get_llm_answers(prompt, require_json=True, model_name="deepseek-r1:70b"))

    with open(f"ast_{i}.json", "w") as f:
        json.dump(answer, f, indent=4)



Please analyze the following cangjie code and generate a detailed Abstract Syntax Tree (AST) representation. The AST should:

1. Show the hierarchical structure of the code
2. Include all important nodes like:
   - Package declarations
   - Import statements 
   - Function/class definitions
   - Control flow statements
   - Expressions
3. Preserve the relationships between nodes
4. Include relevant attributes and metadata for each node
5. Follow standard AST notation

Code to analyze:
package json_cj.testutil

import std.unittest.*
import std.unittest.testmacro.{Assert, Fail}
import encoding.json.*

public func assertJson(a: JsonValue, b: JsonValue): Unit {
    match ((a, b)) {
        case (_: JsonNull, _: JsonNull) => return
        case (a: JsonBool, b: JsonBool) => @Assert(a.getValue(), b.getValue())
        case (a: JsonInt, b: JsonInt) => @Assert(a.getValue(), b.getValue())
        case (a: JsonFloat, b: JsonFloat) => @Assert(a.getValue(), b.getValue())
        case (a: JsonStri