# Imports

In [1]:
import os
import getpass
from operator import itemgetter
from typing import List, Dict
import json
import requests



#LangChain, LangGraph
from langchain_openai import ChatOpenAI
from langgraph.graph import START, StateGraph, END
from typing_extensions import List, TypedDict
# from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_core.tools import Tool, tool
from langgraph.prebuilt import ToolNode
from typing import TypedDict, Annotated
from langgraph.graph.message import add_messages
import operator
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain.vectorstores import Qdrant
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams



# from langchain.tools import DuckDuckGoSearchRun



# Set Environment Variables

In [3]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

# Code Analysis Functions

In [4]:
def read_python_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        return content
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        raise
    except IOError as e:
        print(f"Error reading file {file_path}: {str(e)}")
        raise
    except Exception as e:
        print(f"Unexpected error reading file {file_path}: {str(e)}")
        raise

In [5]:
def extract_imports(code, file_path):
    try:
            
        # Split into lines and find imports
        import_lines = []
        for line in code.split('\n'):
            line = line.strip()
            if line.startswith('import ') or line.startswith('from '):
                import_lines.append(line)
                
        return import_lines
        
    except Exception as e:
        print(f"Error extracting imports from file {file_path}: {str(e)}")
        return []

# Tools

In [6]:
@tool
def search_pypi(package_name: str) -> str:
    """Search PyPI for Python package information. Input should be the package name.
    Args: 
        package_name: name of the package 
    """
    print(f"Tool called for package: {package_name}")
    base_url = "https://pypi.org/pypi"
    try:
        try:
            response = requests.get(f"{base_url}/{package_name}/json")
            response.raise_for_status()
            info = response.json()
        except requests.RequestException as e:
            raise Exception(f"Error fetching PyPI info for {package_name}: {str(e)}")
        result =  json.dumps({
            "name": info["info"]["name"],
            "summary": info["info"]["summary"],
        })
        print(f"Tool result: {result}")
        return result
    except Exception as e:
        return f"Could not find package information: {str(e)}"

In [131]:
print(search_pypi.args)

{'package_name': {'title': 'Package Name', 'type': 'string'}}


# Describe Imports Agent

In [7]:
from langchain_core.utils.function_calling import convert_to_openai_tool
# search_packages_tools = [convert_to_openai_tool(search_pypi)]

search_packages_tools = [search_pypi]
# tools=[
#         {
#             "type": "function",
#             "function": {
#                 "name": "search_pypi",
#                 "description": "Search PyPI for Python package information. Input should be the package name.",
#                 "parameters": {
#                     "type": "object",
#                     "properties": {
#                         "package_name": {"type": "string", "description": "Package Name"},
                        
#                     },
#                     "required": ["package_name"],
#                 },
#             },
#         }
#     ]

In [8]:
describe_imports_llm = ChatOpenAI(model="gpt-4o-mini")
# describe_imports_llm = describe_imports_llm.bind_tools(tools = search_packages_tools, tool_choice="required")

In [9]:
system_template = """You are an expert {code_language} developer.
Your will be given code lines that import packages. 
Your role is to give a brief description of each package

You have access to the following tool and you MUST use it:
search_pypi: Use this to get information about Python packages from PyPI.

For each import:
1. Extract the main package name
2. Use the search_pypi tool to get package information by calling "search_pypi(package_name)"
3. Combine the information into a clear description
4. If the retuned value of tool is empty use your own knowledge
5. If you have no knowledge for this package then it's description should be "I don't know details about this package"

You must respond in the following JSON format:
{{"Imported_Packages": [
    {{"name": "package1", "desc": "brief description of package1"}},
    {{"name": "package2", "desc": "brief description of package2"}}
]}}

Rules for the output:
1. Use valid JSON format
2. Package names should be the exact names from the imports
3. Descriptions should be brief and clear
4. Do not include any text outside the JSON structure
"""

# system_template = """You are an expert {code_language} developer.
# Your will be given code lines that import packages. 
# Your role is to give a brief description of each package

# You can use the search_pypi tool, but it's optional.
# If you know about a package, you can describe it directly.
# If you're unsure, you can use search_pypi to get more information.

# You must respond in the following JSON format:
# {{"Imported_Packages": [
#     {{"name": "package1", "desc": "brief description of package1"}},
#     {{"name": "package2", "desc": "brief description of package2"}}
# ]}}

# Rules for the output:
# 1. Use valid JSON format
# 2. Package names should be the exact names from the imports
# 3. Descriptions should be brief and clear
# 4. Do not include any text outside the JSON structure
# """
human_template = "{imports}"

describe_imports_prompt = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("human", human_template)
])

In [10]:

describe_imports_chain = (
    {"code_language": itemgetter("code_language"), "imports": itemgetter("imports")}
    | describe_imports_prompt | describe_imports_llm | StrOutputParser()
)

In [11]:
class AgentState(TypedDict):
  messages: Annotated[list, add_messages]

In [12]:
# chain as a node
def call_imports_chain(state):
  print("Starting chain function")
  last_message= state["messages"][-1]
  print(f'last message is \n {last_message}')
  content = json.loads(last_message.content)
  print(f'content is {content}')
  print(type(content))
  chain_input = {"code_language": content['code_language'], 
                 "imports": content['imports']}
  print(f'chain_input is {chain_input}')
  print(type(chain_input))
  response = describe_imports_chain.invoke(chain_input)
  print(f"Chain response: {response}")
  return {"messages": [AIMessage(content=response)]}
#   return {
#           "messages": [{
#               "role": "assistant",
#               "content": response
#           }]
#       }

In [13]:
# bind model to tool or ToolNode
imports_tool_node = ToolNode(search_packages_tools)


In [14]:
# construct graph and compile
uncompiled_imports_graph = StateGraph(AgentState)
uncompiled_imports_graph.add_node("imports_agent", call_imports_chain)
uncompiled_imports_graph.add_node("imports_action", imports_tool_node)
uncompiled_imports_graph.set_entry_point("imports_agent")
def should_continue(state):
  last_message = state["messages"][-1]

  if last_message.tool_calls:
    return "imports_action"

  return END

uncompiled_imports_graph.add_conditional_edges(
    "imports_agent",
    should_continue
)

uncompiled_imports_graph.add_edge("imports_action", "imports_agent")

compiled_imports_graph = uncompiled_imports_graph.compile()


In [None]:
# invoke graph

In [15]:
file_path = "/home/melghorab/maven_nlp_course/code/Project/Code/test_code.py"
file_content = read_python_file(file_path)
file_content

'import os\nimport getpass\nfrom operator import itemgetter\n\n\n#LangChain, LangGraph\nfrom langchain_openai import ChatOpenAI\nfrom langgraph.graph import START, StateGraph\nfrom typing_extensions import List, TypedDict\nfrom langchain_core.documents import Document\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain.schema.output_parser import StrOutputParser\n\ndef read_python_file(file_path):\n    try:\n        with open(file_path, \'r\', encoding=\'utf-8\') as f:\n            content = f.read()\n        return content\n    except FileNotFoundError:\n        print(f"File not found: {file_path}")\n        raise\n    except IOError as e:\n        print(f"Error reading file {file_path}: {str(e)}")\n        raise\n    except Exception as e:\n        print(f"Unexpected error reading file {file_path}: {str(e)}")\n        raise'

In [16]:
imports = extract_imports(file_content, file_path)
imports

['import os',
 'import getpass',
 'from operator import itemgetter',
 'from langchain_openai import ChatOpenAI',
 'from langgraph.graph import START, StateGraph',
 'from typing_extensions import List, TypedDict',
 'from langchain_core.documents import Document',
 'from langchain_core.prompts import ChatPromptTemplate',
 'from langchain.schema.output_parser import StrOutputParser']

In [17]:
# input_message = HumanMessage(content=json.dumps({
#     "code_language": "python",
#     "imports": imports
# }))

# result = compiled_imports_graph.invoke({
#             "messages": [input_message]
#         })

# result

# initial_state = {
#     "messages": [{
#         "code_language": "python",
#         "imports": imports
#     }]
# }

initial_state = {
    "messages": [{
        "role": "human",
        "content": json.dumps({
            "code_language": "python",
            "imports": imports
        })
    }]
}
result = compiled_imports_graph.invoke(initial_state)
result

# inputs = {"messages" : [HumanMessage(content={"code_language": "python", "imports": imports})]}

Starting chain function
last message is 
 content='{"code_language": "python", "imports": ["import os", "import getpass", "from operator import itemgetter", "from langchain_openai import ChatOpenAI", "from langgraph.graph import START, StateGraph", "from typing_extensions import List, TypedDict", "from langchain_core.documents import Document", "from langchain_core.prompts import ChatPromptTemplate", "from langchain.schema.output_parser import StrOutputParser"]}' additional_kwargs={} response_metadata={} id='86b01ffc-c0cf-4f76-87d6-81aed0ddb10e'
content is {'code_language': 'python', 'imports': ['import os', 'import getpass', 'from operator import itemgetter', 'from langchain_openai import ChatOpenAI', 'from langgraph.graph import START, StateGraph', 'from typing_extensions import List, TypedDict', 'from langchain_core.documents import Document', 'from langchain_core.prompts import ChatPromptTemplate', 'from langchain.schema.output_parser import StrOutputParser']}
<class 'dict'>
chain_

{'messages': [HumanMessage(content='{"code_language": "python", "imports": ["import os", "import getpass", "from operator import itemgetter", "from langchain_openai import ChatOpenAI", "from langgraph.graph import START, StateGraph", "from typing_extensions import List, TypedDict", "from langchain_core.documents import Document", "from langchain_core.prompts import ChatPromptTemplate", "from langchain.schema.output_parser import StrOutputParser"]}', additional_kwargs={}, response_metadata={}, id='86b01ffc-c0cf-4f76-87d6-81aed0ddb10e'),
  AIMessage(content='{"Imported_Packages": [\n    {"name": "os", "desc": "A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "A standard library that provides a way to securely handle password prompts without echoing input."},\n    {"name": "operator", "desc": "A standard library that exports a set of functions corresponding to the intrin

In [149]:
async for chunk in compiled_imports_graph.astream(initial_state, stream_mode="updates"):
    for node, values in chunk.items():
        print(f"Receiving update from node: '{node}'")
        print(values["messages"])
        print("\n\n")

Starting chain function
last message is 
 content='{"code_language": "python", "imports": ["import os", "import getpass", "from operator import itemgetter", "from langchain_openai import ChatOpenAI", "from langgraph.graph import START, StateGraph", "from typing_extensions import List, TypedDict", "from langchain_core.documents import Document", "from langchain_core.prompts import ChatPromptTemplate", "from langchain.schema.output_parser import StrOutputParser"]}' additional_kwargs={} response_metadata={} id='feeaeda4-a5a5-454f-bd0d-061b7bb9793b'
content is {'code_language': 'python', 'imports': ['import os', 'import getpass', 'from operator import itemgetter', 'from langchain_openai import ChatOpenAI', 'from langgraph.graph import START, StateGraph', 'from typing_extensions import List, TypedDict', 'from langchain_core.documents import Document', 'from langchain_core.prompts import ChatPromptTemplate', 'from langchain.schema.output_parser import StrOutputParser']}
<class 'dict'>
chain_

In [148]:
# response=describe_imports_chain.invoke({"code_language": "python", "imports": imports})
response=describe_imports_chain.invoke({'code_language': 'python', 'imports': ['import os', 'import getpass', 'from operator import itemgetter', 'from langchain_openai import ChatOpenAI', 'from langgraph.graph import START, StateGraph']})
response

''

In [155]:
response = describe_imports_llm.invoke("What tools do you have available?")
response

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_gh46jGv1jZD196HTw3WxnaNZ', 'function': {'arguments': '{"package_name":"requests"}', 'name': 'search_pypi'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 72, 'total_tokens': 90, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_13eed4fce1', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d8ebc589-e82e-42d4-9c18-c020c81043fa-0', tool_calls=[{'name': 'search_pypi', 'args': {'package_name': 'requests'}, 'id': 'call_gh46jGv1jZD196HTw3WxnaNZ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 72, 'output_tokens': 18, 'total_tokens': 90, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio

In [128]:
print("Testing tool directly:")
result = search_pypi("pandas")
print(f"Tool result: {result}\n")

Testing tool directly:
Tool called for package: pandas
Tool result: {"name": "pandas", "summary": "Powerful data structures for data analysis, time series, and statistics"}
Tool result: {"name": "pandas", "summary": "Powerful data structures for data analysis, time series, and statistics"}



In [151]:
print(describe_imports_llm)


bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x7f77e94f6ce0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x7f77e94f7ac0>, root_client=<openai.OpenAI object at 0x7f77e94ec6a0>, root_async_client=<openai.AsyncOpenAI object at 0x7f77e94f4df0>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********')) kwargs={'tools': [{'type': 'function', 'function': {'name': 'search_pypi', 'description': 'Search PyPI for Python package information. Input should be the package name.\n    Args: \n        package_name: name of the package', 'parameters': {'properties': {'package_name': {'type': 'string'}}, 'required': ['package_name'], 'type': 'object'}}}], 'tool_choice': 'required'} config={} config_factories=[]


In [18]:
result

{'messages': [HumanMessage(content='{"code_language": "python", "imports": ["import os", "import getpass", "from operator import itemgetter", "from langchain_openai import ChatOpenAI", "from langgraph.graph import START, StateGraph", "from typing_extensions import List, TypedDict", "from langchain_core.documents import Document", "from langchain_core.prompts import ChatPromptTemplate", "from langchain.schema.output_parser import StrOutputParser"]}', additional_kwargs={}, response_metadata={}, id='86b01ffc-c0cf-4f76-87d6-81aed0ddb10e'),
  AIMessage(content='{"Imported_Packages": [\n    {"name": "os", "desc": "A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "A standard library that provides a way to securely handle password prompts without echoing input."},\n    {"name": "operator", "desc": "A standard library that exports a set of functions corresponding to the intrin

# Save imports as chunk in database

In [19]:
text = result['messages'][-1].content
text


'{"Imported_Packages": [\n    {"name": "os", "desc": "A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "A standard library that provides a way to securely handle password prompts without echoing input."},\n    {"name": "operator", "desc": "A standard library that exports a set of functions corresponding to the intrinsic operators of Python."},\n    {"name": "langchain_openai", "desc": "A library that provides tools for building applications with OpenAI\'s language processing capabilities."},\n    {"name": "langgraph", "desc": "I don\'t know details about this package"},\n    {"name": "typing_extensions", "desc": "A backport of recent additions to the typing module in Python, providing support for type hints."},\n    {"name": "langchain_core", "desc": "I don\'t know details about this package"},\n    {"name": "langchain.schema.output_parser", "desc": "I don\'t know det

In [20]:
qdrant_client = QdrantClient(":memory:")  # Use in-memory or provide host/port

# Define embedding model
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# Example chunks
chunks = [
    {"type": "Imported Packages", "name": "Imported Packages", "content": text},
    
]


docs = [
    Document(
        page_content=f"{chunk['type']} - {chunk['name']} - {chunk['content']}",  # Content for the model
        metadata={**chunk}  # Store metadata, but don't put embeddings here
    )
    for chunk in chunks
]
embedding_dim = 1536
qdrant_client.create_collection(
    collection_name="description_rag_data",
    vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
)

# Store in Qdrant with explicit embeddings
vectorstore = Qdrant(qdrant_client, collection_name="description_rag_data", embeddings=embedding_model)
vectorstore.add_documents(docs)


# Define the retriever
qdrant_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# # Define the prompt template
# rag_prompt = PromptTemplate.from_template(
#     "Given the following retrieved documents:\n{context}\nAnswer the question: {question}"
# )

# # Define the LLM model (OpenAI ChatGPT)
# openai_chat_model = ChatOpenAI(model_name="gpt-4", temperature=0.7)

# # Define the RAG chain
# rag_chain = (
#     {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
#     | rag_prompt
#     | openai_chat_model
#     | StrOutputParser()
# )

# # Example query
# query = {"question": "What are the company policies?"}
# response = rag_chain.invoke(query)

# # Print response
# print("\nAI Response:", response)

  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
  vectorstore = Qdrant(qdrant_client, collection_name="description_rag_data", embeddings=embedding_model)


In [None]:
# from langchain_openai.embeddings import OpenAIEmbeddings

# embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# embedding_dim = 1536

# from langchain_qdrant import QdrantVectorStore
# from qdrant_client import QdrantClient
# from qdrant_client.http.models import Distance, VectorParams

# client = QdrantClient(":memory:")

# client.create_collection(
#     collection_name="lcel_doc_v1",
#     vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
# )

# vector_store = QdrantVectorStore(
#     client=client,
#     collection_name="lcel_doc_v1",
#     embedding=embedding_model,
# )

# _ = vector_store.add_texts(texts=chunks)

# retriever = vector_store.as_retriever(search_kwargs={"k": 2})



# Call main LLM

In [None]:
imports_result = result['messages']

In [27]:
result['messages'][-1].content

str

In [21]:
main_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
system_template = """You are an expert {code_language} developer.
Your role is to answer user's questions about code and its description that will be given to you in context

Rules for the output:
1. Don't answer out of context questions
2. Provide a single, clear response using only the given context
3. If needed, structure long responses in lists and sections

<context>
{context}
</context>
"""

human_template = "{query}"
main_llm_prompt = ChatPromptTemplate.from_messages([
    ("system", system_template),
    ("human", human_template)
])

# main_chain = (
#     {"code_language": itemgetter("code_language"),"context": itemgetter("context"), "query": itemgetter("query")}
#     | main_llm_prompt | main_llm | StrOutputParser()
# )

main_chain = (
    {"context": itemgetter("query") | qdrant_retriever, "code_language": itemgetter("code_language"), "query": itemgetter("query"), }
    | main_llm_prompt
    | main_llm
    | StrOutputParser()
)



In [22]:
context = result['messages'][-1].content
context

'{"Imported_Packages": [\n    {"name": "os", "desc": "The os module provides a way of using operating system dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "The getpass module provides a secure way to handle password prompts and enables users to input passwords without echoing them to the console."},\n    {"name": "operator", "desc": "The operator module provides a set of efficient functions corresponding to the intrinsic operators of Python, allowing for functional programming."},\n    {"name": "langchain_openai", "desc": "LangChain for OpenAI integrates OpenAI models into workflows, enabling building applications that leverage language models."},\n    {"name": "langgraph", "desc": "No knowledge of this package"},\n    {"name": "typing_extensions", "desc": "The typing_extensions module provides backports of type hints from the standard library\'s typing module, making them available in earlier Python versions."},\n    {"name": "

In [22]:
# response=main_chain.invoke({'code_language': 'Python', 'context': context, 'query': 'What are the packages used in this code?'})
# response
# response=main_chain.invoke({'code_language': 'python', 'context': context, 'query': 'What are the packages used in this code?'})
# response

# inputs = {
#     'code_language': 'Python',
#     'context': context,
#     'query': 'What are the packages used in this code?'
# }

inputs = {
    'code_language': 'Python',
    'query': 'What are the packages used in this code?'
}
print(inputs)  # Debugging step
response = main_chain.invoke(inputs)
print(response)
response


{'code_language': 'Python', 'query': 'What are the packages used in this code?'}
The packages used in the code are:

1. **os**: A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system.
2. **getpass**: A standard library that provides a way to securely handle password prompts without echoing input.
3. **operator**: A standard library that exports a set of functions corresponding to the intrinsic operators of Python.
4. **langchain_openai**: A library that provides tools for building applications with OpenAI's language processing capabilities.
5. **langgraph**: Details about this package are not known.
6. **typing_extensions**: A backport of recent additions to the typing module in Python, providing support for type hints.
7. **langchain_core**: Details about this package are not known.
8. **langchain.schema.output_parser**: Details about this package are not known.


"The packages used in the code are:\n\n1. **os**: A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system.\n2. **getpass**: A standard library that provides a way to securely handle password prompts without echoing input.\n3. **operator**: A standard library that exports a set of functions corresponding to the intrinsic operators of Python.\n4. **langchain_openai**: A library that provides tools for building applications with OpenAI's language processing capabilities.\n5. **langgraph**: Details about this package are not known.\n6. **typing_extensions**: A backport of recent additions to the typing module in Python, providing support for type hints.\n7. **langchain_core**: Details about this package are not known.\n8. **langchain.schema.output_parser**: Details about this package are not known."

# Write in Document

In [24]:
docs

[Document(metadata={'type': 'Imported Packages', 'name': 'Imported Packages', 'content': '{"Imported_Packages": [\n    {"name": "os", "desc": "The os module provides a way of using operating system dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "The getpass module provides a secure way to handle password prompts and enables users to input passwords without echoing them to the console."},\n    {"name": "operator", "desc": "The operator module provides a set of efficient functions corresponding to the intrinsic operators of Python, allowing for functional programming."},\n    {"name": "langchain_openai", "desc": "LangChain for OpenAI integrates OpenAI models into workflows, enabling building applications that leverage language models."},\n    {"name": "langgraph", "desc": "No knowledge of this package"},\n    {"name": "typing_extensions", "desc": "The typing_extensions module provides backports of type hints from the standard libra

In [28]:
#extract description chunks from database
collection_name = "description_rag_data"
all_points = qdrant_client.scroll(collection_name=collection_name, limit=1000)[0]  # Adjust limit if needed
one_chunk = all_points[0].payload
one_chunk

{'page_content': 'Imported Packages - Imported Packages - {"Imported_Packages": [\n    {"name": "os", "desc": "The os module provides a way of using operating system dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "The getpass module provides a secure way to handle password prompts and enables users to input passwords without echoing them to the console."},\n    {"name": "operator", "desc": "The operator module provides a set of efficient functions corresponding to the intrinsic operators of Python, allowing for functional programming."},\n    {"name": "langchain_openai", "desc": "LangChain for OpenAI integrates OpenAI models into workflows, enabling building applications that leverage language models."},\n    {"name": "langgraph", "desc": "No knowledge of this package"},\n    {"name": "typing_extensions", "desc": "The typing_extensions module provides backports of type hints from the standard library\'s typing module, making them

In [None]:
@tool
def write_document(
    content: Annotated[str, "Text content to be written into the document."],
    file_name: Annotated[str, "File path to save the document."],
) -> Annotated[str, "Path of the saved document file."]:
    """Create and save a text document."""
    with (WORKING_DIRECTORY / file_name).open("w") as file:
        file.write(content)
    return f"Document saved to {file_name}"



In [29]:
one_chunk

{'page_content': 'Imported Packages - Imported Packages - {"Imported_Packages": [\n    {"name": "os", "desc": "The os module provides a way of using operating system dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "The getpass module provides a secure way to handle password prompts and enables users to input passwords without echoing them to the console."},\n    {"name": "operator", "desc": "The operator module provides a set of efficient functions corresponding to the intrinsic operators of Python, allowing for functional programming."},\n    {"name": "langchain_openai", "desc": "LangChain for OpenAI integrates OpenAI models into workflows, enabling building applications that leverage language models."},\n    {"name": "langgraph", "desc": "No knowledge of this package"},\n    {"name": "typing_extensions", "desc": "The typing_extensions module provides backports of type hints from the standard library\'s typing module, making them

In [35]:
input_text = f"type: {one_chunk['metadata']['type']} \nname: {one_chunk['metadata']['name']} \ncontent: {one_chunk['metadata']['content']}"
print(input_text)

type: Imported Packages 
name: Imported Packages 
content: {"Imported_Packages": [
    {"name": "os", "desc": "The os module provides a way of using operating system dependent functionality like reading or writing to the file system."},
    {"name": "getpass", "desc": "The getpass module provides a secure way to handle password prompts and enables users to input passwords without echoing them to the console."},
    {"name": "operator", "desc": "The operator module provides a set of efficient functions corresponding to the intrinsic operators of Python, allowing for functional programming."},
    {"name": "langchain_openai", "desc": "LangChain for OpenAI integrates OpenAI models into workflows, enabling building applications that leverage language models."},
    {"name": "langgraph", "desc": "No knowledge of this package"},
    {"name": "typing_extensions", "desc": "The typing_extensions module provides backports of type hints from the standard library's typing module, making them avail

In [36]:
documenter_llm = ChatOpenAI(model="gpt-4o-mini")
system_template = """You are an expert code documenter.
Your role is to write a well structured document that describes code functionality.

From the given context:
1- type: is the type of the code block (funciton, class, ..)
2- name: is the name of the code block
3- content: is the description of the code block

Instructions:
Write a docx document with the following structure Heading 1(type) -> Heading 2(name) -> content

Rules for the output:
1. Don't write information out of context
2. If needed, structure long responses in lists and sections

<context>
{context}
</context>
"""

human_template = "{code_block}"

documenter_llm_prompt = ChatPromptTemplate.from_messages([
    ("system", system_template),
])

documenter_chain = (
    {"context": itemgetter("context")}
    | documenter_llm_prompt
    | documenter_llm
    | StrOutputParser()
)


In [38]:
document_response = documenter_chain.invoke({"context": input_text})
print(document_response)


# Imported Packages 

## Imported Packages 

The following is a list of imported packages used in the code, detailing their names and descriptions:

1. **os**
   - The os module provides a way of using operating system dependent functionality like reading or writing to the file system.

2. **getpass**
   - The getpass module provides a secure way to handle password prompts and enables users to input passwords without echoing them to the console.

3. **operator**
   - The operator module provides a set of efficient functions corresponding to the intrinsic operators of Python, allowing for functional programming.

4. **langchain_openai**
   - LangChain for OpenAI integrates OpenAI models into workflows, enabling building applications that leverage language models.

5. **langgraph**
   - No knowledge of this package.

6. **typing_extensions**
   - The typing_extensions module provides backports of type hints from the standard library's typing module, making them available in earlier Pytho

In [52]:
from docx import Document
import re

# @tool
def write_to_docx(documentation_text: str) -> str:
    """
    Writes the AI-generated documentation to a .docx file and returns the file path.
    """
    doc = Document()
    # doc.add_heading("Code Documentation", level=1)

    lines = documentation_text.split("\n")
    for line in lines:
        if line.startswith("# "):  # Section Heading
            doc.add_heading(line[2:], level=1)
        elif line.startswith("## "):  # Subsection Heading
            doc.add_heading(line[3:], level=2)
        else:  # Normal paragraph
            doc.add_paragraph(line)
            # paragraph = doc.add_paragraph(line)
            # parts = re.split(r"(\*\*(.*?)\*\*)", line)  # Split by **bold text**
            # print(parts)
            # for part in parts:
            #     if part.startswith("**") and part.endswith("**"):
            #         paragraph.add_run(part[2:-2]).bold = True  # Remove ** before adding bold text
            #     elif part:  # Normal text
            #         paragraph.add_run(part)

    # Save document
    file_path = "generated_documentation.docx"
    doc.save(file_path)
    return file_path  # Return file path for download

In [2]:
file = write_to_docx(document_response)

NameError: name 'write_to_docx' is not defined

# Evaluate using RAGAS

In [23]:
docs

[Document(metadata={'type': 'Imported Packages', 'name': 'Imported Packages', 'content': '{"Imported_Packages": [\n    {"name": "os", "desc": "A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "A standard library that provides a way to securely handle password prompts without echoing input."},\n    {"name": "operator", "desc": "A standard library that exports a set of functions corresponding to the intrinsic operators of Python."},\n    {"name": "langchain_openai", "desc": "A library that provides tools for building applications with OpenAI\'s language processing capabilities."},\n    {"name": "langgraph", "desc": "I don\'t know details about this package"},\n    {"name": "typing_extensions", "desc": "A backport of recent additions to the typing module in Python, providing support for type hints."},\n    {"name": "langchain_core", "desc": "I don\'t know details about t

In [27]:
ragas_docs = [
    Document(
        page_content=doc.metadata['content'],  # Extract actual content
        metadata={"source": f"{doc.metadata['type']} - {doc.metadata['name']}"}  # Define a unique source
    )
    for doc in docs
]
ragas_docs

[Document(metadata={'source': 'Imported Packages - Imported Packages'}, page_content='{"Imported_Packages": [\n    {"name": "os", "desc": "A standard library that provides a way of using operating system-dependent functionality like reading or writing to the file system."},\n    {"name": "getpass", "desc": "A standard library that provides a way to securely handle password prompts without echoing input."},\n    {"name": "operator", "desc": "A standard library that exports a set of functions corresponding to the intrinsic operators of Python."},\n    {"name": "langchain_openai", "desc": "A library that provides tools for building applications with OpenAI\'s language processing capabilities."},\n    {"name": "langgraph", "desc": "I don\'t know details about this package"},\n    {"name": "typing_extensions", "desc": "A backport of recent additions to the typing module in Python, providing support for type hints."},\n    {"name": "langchain_core", "desc": "I don\'t know details about this 

In [29]:
# generate test questions 
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import TestsetGenerator


generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(ragas_docs, testset_size=10)
dataset.to_pandas()


Generating personas: 100%|██████████| 1/1 [00:01<00:00,  1.39s/it]                                         
Generating Scenarios: 100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
Generating Samples: 100%|██████████| 10/10 [04:29<00:00, 26.97s/it]


Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Can you explain the role of langchain_core in ...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The context does not provide specific details ...,single_hop_specifc_query_synthesizer
1,What is the os package in Python and how does ...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The os package is a standard library in Python...,single_hop_specifc_query_synthesizer
2,What typing_extensions do in Python?,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",typing_extensions is a backport of recent addi...,single_hop_specifc_query_synthesizer
3,Can you elaborate on the functionalities provi...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The 'os' package is a standard library in Pyth...,single_hop_specifc_query_synthesizer
4,What is the purpose of the langgraph package i...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The context does not provide specific details ...,single_hop_specifc_query_synthesizer
5,What is the purpose of the langgraph package i...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The context does not provide specific details ...,single_hop_specifc_query_synthesizer
6,What is the role of langchain_core in Python d...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The context does not provide specific details ...,single_hop_specifc_query_synthesizer
7,What langchain.schema.output_parser do?,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",I don't know details about this package.,single_hop_specifc_query_synthesizer
8,What the os package do in Python and how it he...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",The os package is a standard library that prov...,single_hop_specifc_query_synthesizer
9,Can you tell me what langchain_core is used fo...,"[{""Imported_Packages"": [\n {""name"": ""os"", ""...",I don't know details about the langchain_core ...,single_hop_specifc_query_synthesizer
