In [16]:
import os

os.environ["OPENAI_API_KEY"]='<Your Key>'
os.environ["LLM_MODEL"]='deepseek-chat'

# 代码生成与验证

In [8]:
from openai import OpenAI


def work_on(input):
    client = OpenAI(api_key=os.getenv("DEEPSEEK_API_KEY"),
        base_url="https://api.deepseek.com")

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a software engineer."},   
            {"role": "user", "content": input}
        ],
        temperature = 0.9, 
        max_tokens = 500
      )
    return response.choices[0].message.content

In [9]:
prompt = """
实现一个 Python 函数(find_declarations)以从给定的 Golang 源代码中查找结构/接口声明。

该函数需要两个输入参数：
1. 包含源文件的目录。
2. 由包名称和结构/接口名称组成的 JSON 字符串，格式如下：{“package_name”:<package name>,“element”:<struct/interface>}

函数的输出是声明。当未找到请求的结构/接口时，该函数应返回“Not Found”。
"""

In [10]:
print (work_on(prompt))

要实现一个 Python 函数 `find_declarations` 来从给定的 Golang 源代码中查找结构体或接口的声明，我们可以使用 Python 的文件操作和正则表达式来解析 Go 源代码文件。以下是一个可能的实现：

```python
import os
import re
import json

def find_declarations(directory, json_input):
    # 解析 JSON 输入
    try:
        input_data = json.loads(json_input)
        package_name = input_data.get("package_name")
        element_name = input_data.get("element")
    except json.JSONDecodeError:
        return "Invalid JSON input"

    if not package_name or not element_name:
        return "Invalid input: package_name and element are required"

    # 遍历目录中的 Go 文件
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                    # 检查文件是否属于指定的包
                    package_match = re.search(r'package\s+(\w+)', content)
                    if package_match a

In [11]:
import os
import re
import json

def find_declarations(directory, json_input):
    # 解析 JSON 输入
    try:
        input_data = json.loads(json_input)
        package_name = input_data.get("package_name")
        element_name = input_data.get("element")
    except json.JSONDecodeError:
        return "Invalid JSON input"

    if not package_name or not element_name:
        return "Invalid input: package_name and element are required"

    # 遍历目录中的 Go 文件
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()

                    # 检查文件是否属于指定的包
                    package_match = re.search(r'package\s+(\w+)', content)
                    if package_match and package_match.group(1) == package_name:
                        # 查找结构体或接口声明
                        pattern = rf'(type\s+{element_name}\s+(?:struct|interface)\s*{{[^}}]*}})'
                        match = re.search(pattern, content)
                        if match:
                            return match.group(0)

    return "Not Found"

In [14]:
src_dir = "./llm_friendly_code"
interface_dec = '{"package_name":"demo","element":"TokenCreator"}'
expected = """
type TokenCreator interface {
	CreateToken(data string) string
}
"""
interface_dec1 = '{"package_name":"demo","element":"Processor"}'

In [15]:
print (find_declarations(src_dir, interface_dec1))

type Processor interface {
	// Process is to the raw data
	Process(raw string) (string, error)
}


In [20]:
from io import StringIO
import sys
from typing import Dict, Optional
from langchain_openai.chat_models.base import BaseChatOpenAI
from langchain.agents import load_tools
from langchain.agents import initialize_agent, Tool
from langchain.llms import OpenAI


class PythonREPL:
    """Simulates a standalone Python REPL."""

    def __init__(self):
        pass        

    def run(self, command: str) -> str:
        command = '\n'.join(line for line in command.split('\n') if not line.startswith('```'))
        """Run command and returns anything printed."""
        # sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
            exec(command, globals())
            sys.stdout = old_stdout
            output = mystdout.getvalue()
        except Exception as e:
            sys.stdout = old_stdout
            output = str(e)
        # sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
        return output

llm = BaseChatOpenAI(
        model=os.getenv("LLM_MODEL"), 
        openai_api_key=os.getenv("DEEPSEEK_API_KEY"), 
        openai_api_base='https://api.deepseek.com',
        max_tokens=1024
    )

python_repl = Tool(
        "Python REPL",
        PythonREPL().run,
        """A Python shell. Use this to execute python commands. 
        Input should be a valid python command.
        If you expect output it should be printed out.
        For example: to verify the the following python function
        ---
        def add(a, b):
            return (a+b)
        ---
        we can invoke the tool with the input 
        "
        def add(a, b):
            return (a+b)
        print (add(1,2))
        
        "
        """,
    )
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True, handle_parsing_errors=True)

In [21]:
PythonREPL().run('print("Hello Python")')

'Hello Python\n'

In [22]:
prompt_1 = prompt + f"\n 给出回答前请用以下例子测试生成的函数:\n 函数输入：{src_dir}, {interface_dec}; 函数输出：{expected}"
print(agent.run(prompt_1))

  print(agent.run(prompt_1))




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo solve this problem, we need to implement a Python function called `find_declarations` that will search through Go source code files in a specified directory to find the declaration of a given struct or interface. The function will take two parameters: the directory containing the Go source files and a JSON string specifying the package name and the element (struct/interface) to search for.

### Steps to Implement the Function:
1. **Parse the JSON Input**: Extract the package name and the element (struct/interface) name from the JSON string.
2. **Traverse the Directory**: Iterate through all `.go` files in the specified directory.
3. **Search for the Declaration**: For each file, search for the declaration of the specified struct or interface.
4. **Return the Declaration**: If found, return the declaration; otherwise, return "Not Found".

### Testing the Function:
We will test the function with the provided example:
- Direc

In [23]:
import os
import json
import re

def find_declarations(directory, json_input):
    # Parse the JSON input
    try:
        data = json.loads(json_input)
        package_name = data.get("package_name")
        element_name = data.get("element")
    except json.JSONDecodeError:
        return "Invalid JSON input"
    
    if not package_name or not element_name:
        return "Invalid JSON input: missing package_name or element"
    
    # Traverse the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as f:
                    content = f.read()
                    
                    # Check if the file belongs to the specified package
                    package_match = re.search(r'package\s+' + package_name, content)
                    if not package_match:
                        continue
                    
                    # Search for the struct/interface declaration
                    pattern = r'type\s+' + element_name + r'\s+(?:struct|interface)\s*\{[^}]*\}'
                    match = re.search(pattern, content, re.DOTALL)
                    if match:
                        return match.group(0)
    
    return "Not Found"

In [24]:
print (find_declarations(src_dir, interface_dec))

type TokenCreator interface {
	CreateToken(data string) string
}


In [25]:
interface_dec = '{"package_name":"demo","element":"Storage"}'
print (find_declarations(src_dir, interface_dec))

type Storage interface {
	// RetiveData is to retrive the data by the associated token.
	RetieveData(token string) (string, error)

	// StoreData is to persist the data,
	// input paramters:
	//   token is used to retrive the associated data
	//
	StoreData(token string, data string) error
}


# 解决依赖问题

## 本地文件搜索

In [26]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import LLMRequestsChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_openai.chat_models.base import BaseChatOpenAI
import time
import os
import json
import re

src_dir = "./llm_friendly_code"

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]

    # Regular expression to match structure/interface declarations
    regex = re.compile(r"(type\s+" + element_name + r"\s+(struct|interface)\s+{\s+[^}]*\s+})")

    # Read the source files in the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()
                    # Check if the file is in the correct package
                    if "package " + package_name in content:
                        # Search for the structure/interface declaration
                        match = regex.search(content)
                        if match:
                            return match.group(1)

    return "Not Found"

def get_dep(json_string):
    return find_declarations(src_dir, json_string)

llm = BaseChatOpenAI(
        model=os.getenv("LLM_MODEL"), 
        openai_api_key=os.getenv("DEEPSEEK_API_KEY"), 
        openai_api_base='https://api.deepseek.com',
        max_tokens=1024)


# 将“依赖获取”工具添加到 ReAct 过程中    
tools = [
    Tool(
        name="find_declarations", func=get_dep,
        description="""Only invoke it when you need to find the declaration of the interface/struct/method that is not in the given code. 
        The input is a JSON string includes package name and the struct/interface name, 
          and its the format {{"package_name":<package name>,"element":<struct/interface>}}
        """)
]


In [27]:
def write_code(input):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    print(agent.run(input))

In [28]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.TokenCreator interface.
 The implementation is to generate the token with uuid and its package is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To implement the `demo.TokenCreator` interface, I first need to understand the structure of the interface, including its methods and their signatures. Since the interface is not provided in the question, I will need to use the `find_declarations` tool to retrieve the declaration of the `demo.TokenCreator` interface.

Action: find_declarations
Action Input: {"package_name":"demo","element":"TokenCreator"}
[0m
Observation: [36;1m[1;3mtype TokenCreator interface {
	CreateToken(data string) string
}[0m
[32;1m[1;3mThought: Now that I know the `demo.TokenCreator` interface has a single method `CreateToken(data string) string`, I can proceed to implement this interface. The implementation will generate a token using a UUID and return it as a string. I will use the `github.com/google/uuid` package to generate the UUID.

Final Answer: Here is the Go code to implement the `demo.TokenCreator` interface:

```go
package dem

## 采用向量数据库保存遗留代码

In [38]:
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
loader = GenericLoader.from_filesystem(
    src_dir,
    glob="**/*",
    suffixes=[".go"],
    parser=LanguageParser(language=Language.GO, parser_threshold=500)
)
documents = loader.load()
print(len(documents))

from langchain.text_splitter import RecursiveCharacterTextSplitter
go_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.GO, 
                                                               chunk_size=500, 
                                                               chunk_overlap=50)
texts = go_splitter.split_documents(documents)
print(len(texts))

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings


# Ollama local embedding model
ollama_host_url = "http://localhost:11434"
embedding_model = "nomic-embed-text"
embedding=OllamaEmbeddings(model=embedding_model, base_url=ollama_host_url,show_progress=True)

persist_directory = 'codes_vector_db/'
db = Chroma.from_documents(texts, embedding, persist_directory=persist_directory)
db.persist()

10
65


  embedding=OllamaEmbeddings(model=embedding_model, base_url=ollama_host_url,show_progress=True)
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 65/65 [00:16<00:00,  3.92it/s]
  db.persist()


## 采用向量数据库实现

In [39]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_openai.chat_models.base import BaseChatOpenAI

llm = BaseChatOpenAI(
        model=os.getenv("LLM_MODEL"), 
        openai_api_key=os.getenv("DEEPSEEK_API_KEY"), 
        openai_api_base='https://api.deepseek.com',
        max_tokens=1024)

retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 8})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,
                                 return_source_documents=False, verbose=True)

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]
    ret = qa(f"give me the declaration of {element_name} in package {package_name}")
    return ret['result']

In [40]:
ret = find_declarations(src_dir, '{"package_name":"demo","element":"Storage"}')
print(ret)

  ret = qa(f"give me the declaration of {element_name} in package {package_name}")




[1m> Entering new RetrievalQA chain...[0m


OllamaEmbeddings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 25.95it/s]



[1m> Finished chain.[0m
The declaration of the `Storage` interface in the `demo` package is as follows:

```go
package demo

// Storage is to persist the data
type Storage interface {
	// RetieveData is to retrieve the data by the associated token.
	RetieveData(token string) (string, error)

	// StoreData is to persist the data,
	// input parameters:
	//   token is used to retrieve the associated data
	//
	StoreData(token string, data string) error
}
```

This interface defines two methods:
1. `RetieveData(token string) (string, error)` - Retrieves data associated with a given token.
2. `StoreData(token string, data string) error` - Stores data with an associated token.


In [41]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.Storage interface with the local memory.
 The implementation's package also is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo implement the `demo.Storage` interface with local memory, I first need to know the definition of the `demo.Storage` interface. Since the interface definition is not provided, I will use the `find_declarations` tool to retrieve it.

Action: find_declarations
Action Input: {"package_name":"demo","element":"Storage"}[0m

[1m> Entering new RetrievalQA chain...[0m


OllamaEmbeddings: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.51it/s]



[1m> Finished chain.[0m

Observation: [36;1m[1;3mThe declaration of the `Storage` interface in the `demo` package is as follows:

```go
package demo

// Storage is to persist the data
type Storage interface {
	// RetieveData is to retrieve the data by the associated token.
	RetieveData(token string) (string, error)

	// StoreData is to persist the data,
	// input parameters:
	//   token is used to retrieve the associated data
	//
	StoreData(token string, data string) error
}
```

This interface defines two methods:
1. `RetieveData(token string) (string, error)` - Retrieves data associated with a given token.
2. `StoreData(token string, data string) error` - Stores data with an associated token.[0m
[32;1m[1;3mThought: Now that I have the definition of the `demo.Storage` interface, I can proceed to implement it using local memory. The implementation will use a map to store the data in memory, where the key is the token and the value is the associated data.

Here is the Go code to

In [42]:
print(write_unit_tests(src_dir+"/demo/skeleton.go"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To write unit tests for the `ProcessingServiceImpl` struct, I need to consider the edge cases and errors. The code involves interactions with three dependencies: `Storage`, `Processor`, and `TokenCreator`. I will need to mock these dependencies to test the `Process` and `Retrieve` methods. Additionally, I need to ensure that the unit tests are in the same package (`demo`) as the tested code.

Before writing the tests, I should understand the interfaces of `Storage`, `Processor`, and `TokenCreator`. Since these interfaces are not provided in the code, I will use the `find_declarations` tool to retrieve their definitions.

Action: I will use the `find_declarations` tool to find the definitions of the `Storage`, `Processor`, and `TokenCreator` interfaces.

Action Input: 
```json
{"package_name":"demo","element":"Storage"}
```[0m
Observation: I will use the `find_declarations` tool to find the definitions of the `Storag