<a href="https://colab.research.google.com/github/Muhammad-BILAL-5/Coding-Assistant/blob/main/Self_Corrective_Coding_Assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade --quiet langchain langgraph langchain_cohere langchain_mistralai langsmith langchain_community langchain_core langchain_huggingface langchain-groq langchain-anthropic langchain-google-genai langchain-chroma faiss-cpu

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m20.2 MB/s[0m eta [36m0:00

In [2]:
from google.colab import userdata
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HF_TOKEN')
os.environ["GOOGLE_API_KEY"] = userdata.get("Gemini_Api_Key")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = userdata.get("langchai_api_key")
os.environ["LANGCHAIN_PROJECT"] = "Self-Corrective-Coding-Assistant"
os.environ["ANTHROPIC_API_KEY"] = userdata.get("anthropic_api_key")
os.environ["COHERE_API_KEY"] = userdata.get("cohere_api_key")
os.environ["MISTRAL_API_KEY"] = userdata.get("mistral_key")

In [3]:
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader

# LCEL docs
url = "https://python.langchain.com/docs/concepts/lcel/"
loader = RecursiveUrlLoader(
    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()

# Sort the list based on the URLs and get the text
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
concatenated_content = "\n\n\n --- \n\n\n".join(
    [doc.page_content for doc in d_reversed]
)

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

### Anthropic

from pydantic import BaseModel, Field

# Data model
class code(BaseModel):
    """Schema for code solutions to questions about LCEL."""

    prefix: str = Field(description="Description of the problem and approach")
    imports: str = Field(description="Code block import statements")
    code: str = Field(description="Code block not including import statements")


# Prompt to enforce tool use
code_gen_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """<instructions> You are a coding assistant with expertise in LCEL, LangChain expression language. \n
    Here is the LCEL documentation:  \n ------- \n  {context} \n ------- \n Answer the user  question based on the \n
    above provided documentation. Ensure any code you provide can be executed with all required imports and variables \n
    defined. Structure your answer: 1) a prefix describing the code solution, 2) the imports, 3) the functioning code block. \n
    Invoke the code tool to structure the output correctly. </instructions> \n Here is the user question:""",
        ),
        ("placeholder", "{messages}"),
    ]
)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-8b", temperature=0)

structured_llm = llm.with_structured_output(code, include_raw=True)


# Optional: Check for errors in case tool use is flaky
def check_output(tool_output):
    """Check for parse error or failure to call the tool"""

    # Error with parsing
    if tool_output["parsing_error"]:
        # Report back output and parsing errors
        print("Parsing error!")
        raw_output = str(tool_output["raw"].content)
        error = tool_output["parsing_error"]
        raise ValueError(
            f"Error parsing your output! Be sure to invoke the tool. Output: {raw_output}. \n Parse error: {error}"
        )

    # Tool was not invoked
    elif not tool_output["parsed"]:
        print("Failed to invoke tool!")
        raise ValueError(
            "You did not use the provided tool! Be sure to invoke the tool to structure the output."
        )
    return tool_output


# Chain with output check
code_chain_raw = (
    code_gen_prompt | structured_llm | check_output
)


def insert_errors(inputs):
    """Insert errors for tool parsing in the messages"""

    # Get errors
    error = inputs["error"]
    messages = inputs["messages"]
    messages += [
        (
            "assistant",
            f"Retry. You are required to fix the parsing errors: {error} \n\n You must invoke the provided tool.",
        )
    ]
    return {
        "messages": messages,
        "context": inputs["context"],
    }


# This will be run as a fallback chain
gemini_fallback_chain = insert_errors | code_chain_raw
N = 3  # Max re-tries
code_gen_chain_re_try = code_chain_raw.with_fallbacks(
    fallbacks=[gemini_fallback_chain] * N, exception_key="error"
)


def parse_output(solution):
    """When we add 'include_raw=True' to structured output,
    it will return a dict w 'raw', 'parsed', 'parsing_error'."""

    return solution["parsed"]


# Optional: With re-try to correct for failure to invoke tool
gemini_code_gen_chain = code_gen_chain_re_try | parse_output

In [5]:
from langchain_mistralai import ChatMistralAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

mistral_model = "codestral-latest"
llm = ChatMistralAI(model=mistral_model, temperature=0)

# LLM
mistral_code_gen_chain = llm.with_structured_output(code, include_raw=False)

# Test
question = "Write a function for fibonacci."
messages = [("user", question)]
result = mistral_code_gen_chain.invoke(messages)



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
### Router

from langchain_cohere import ChatCohere
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


# Data model
class mistral(BaseModel):
    """
    The internet. Use web_search for questions that are related to anything else than agents, prompt engineering, and adversarial attacks.
    """

    query: str = Field(description="The query to use when Genrating code by Mistral.mistral can respond in HTML , CSS, JAVASCRIPT, PYTHON , C# , F# , C++ and JAVA , etc.")


class vectorstore(BaseModel):
    """
    A vectorstore containing documents used for generating code only related to retrieval augmented generation(RAG) , Gnerative AI , Langchain, Langraph Agentic workflows . Use the vectorstore for questions on these topics.
    """

    query: str = Field(description="The query to use when searching the vectorstore.It includes knowledge only about LANGCHAIN, RAG and AI AGENTS ")


# Preamble
preamble = """You are an expert at routing a user question to a vectorstore Code Generator or Mistral LLM Code Gnerator .
The vectorstore contains documents used for generating code only related to retrieval augmented generation(RAG) , Langchain , Langraph Agentic workflows .
Use the vectorstore for questions on these topics. Otherwise, use Mistral AI for generating code to do any other tasks for programers. Mistral AI Code generator can genrate code in HTML , CSS , python , javascript , java , C++ and many pther languages """

# LLM with tool use and preamble
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_router = llm.bind_tools(
    tools=[mistral, vectorstore], preamble=preamble
)

# Prompt
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{question}"),
    ]
)

question_router = route_prompt | structured_llm_router
response = question_router.invoke(
    {"question": "how to build agentic rag?"}
)
print(response.response_metadata["tool_calls"])
response = question_router.invoke({"question": "build a calculator using python?"})
print(response.response_metadata["tool_calls"])


[{'id': 'vectorstore_786wr5kdre1q', 'type': 'function', 'function': {'name': 'vectorstore', 'arguments': '{"query":"agentic rag"}'}}]
[{'id': 'mistral_09fr79n8npr7', 'type': 'function', 'function': {'name': 'mistral', 'arguments': '{"query":"python calculator code"}'}}]


In [7]:
#routing

def route_query(state):


    print("---ROUTE QUESTION---")
    question = state["messages"][-1]

    source = question_router.invoke({"question": question})

    if len(source.additional_kwargs["tool_calls"]) == 0:
        raise "Router could not decide source"

    # Choose datasource
    datasource = source.additional_kwargs["tool_calls"][0]["function"]["name"]

    if datasource == "vectorstore":
        print("---ROUTE QUESTION TO RAG---")
        return {"source": "vectorstore"}
    elif datasource == "mistral":
        print("---ROUTE QUESTION TO MISTRAL---")
        return {"source": "mistral"}
    else:
        print("--- ERROR TO ROUTE QUESTION---")



In [8]:
from typing import TypedDict , Dict
from langchain_core.messages import BaseMessage

class GraphState(TypedDict):
  """
  Represents the state of our graph.

    Attributes:
        error : Binary flag for control flow to indicate whether test error was tripped
        source : The source of the question
        messages : With user question, error messages, reasoning
        generation : Code solution
        iterations : Number of tries
  """

  error: str
  source: str
  messages: str
  generation: str
  iterations: int

In [20]:
### Parameter

# Max tries
max_iterations = 2
# Reflect
# flag = 'reflect'
flag = "do not reflect"

### Nodes


def generate(state: GraphState):
    """
    Generate a code solution

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation
    """

    print("---GENERATING CODE SOLUTION---")

    # State
    messages = state["messages"]
    iterations = state["iterations"]
    error = state["error"]
    source = state["source"]
    # We have been routed back to generation with an error
    if error == "yes":
        messages += [
            (
                "user",
                "Now, try again. Invoke the code tool to structure the output with a prefix, imports, and code block:",
            )
        ]

    # Solution
    if source == "mistral":
        code_solution = mistral_code_gen_chain.invoke(messages)
    elif source == "vectorstore":
        code_solution = gemini_code_gen_chain.invoke(
            {"context": concatenated_content, "messages": messages}
        )
    else:
        raise "Router could not decide source"

    messages += [
        (
            "assistant",
            f"{code_solution.prefix} \n Imports: {code_solution.imports} \n Code: {code_solution.code}",
        )
    ]

    # Increment
    iterations = iterations + 1
    return {"generation": code_solution, "messages": messages, "iterations": iterations}


def code_check(state: GraphState):
    """
    Check code

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, error
    """

    print("---CHECKING CODE---")

    # State
    messages = state["messages"]
    code_solution = state["generation"]
    iterations = state["iterations"]

    # Get solution components
    imports = code_solution.imports
    code = code_solution.code

    # Check imports
    try:
        exec(imports)
    except Exception as e:
        print("---CODE IMPORT CHECK: FAILED---")
        error_message = [("user", f"Your solution failed the import test: {e}")]
        messages += error_message
        return {
            "generation": code_solution,
            "messages": messages,
            "iterations": iterations,
            "error": "yes",
        }

    # Check execution
    try:
        exec(imports + "\n" + code)
    except Exception as e:
        print("---CODE BLOCK CHECK: FAILED---")
        error_message = [("user", f"Your solution failed the code execution test: {e}")]
        messages += error_message
        return {
            "generation": code_solution,
            "messages": messages,
            "iterations": iterations,
            "error": "yes",
        }

    # No errors
    print("---NO CODE TEST FAILURES---")
    return {
        "generation": code_solution,
        "messages": messages,
        "iterations": iterations,
        "error": "no",
    }


def reflect(state: GraphState):
    """
    Reflect on errors

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation
    """
    print("---REFLECTING---")
    print("---GENERATING CODE SOLUTION---")

    # State
    messages = state["messages"]
    iterations = state["iterations"]
    code_solution = state["generation"]
    question = state["messages"][-1]
    datasource = state["source"]

    if datasource == "vectorstore":
        reflections = gemini_code_gen_chain.invoke(
            {"context": concatenated_content, "messages": messages}
        )
    elif datasource == "mistral":
        reflections = mistral_code_gen_chain.invoke(messages)
    else:
        raise "Router could not decide source"

    messages += [("assistant", f"Here are reflections on the error: {reflections}")]
    iterations = iterations + 1
    return {"generation": code_solution, "messages": messages, "iterations": iterations}


### Edges


def decide_to_finish(state: GraphState):
    """
    Determines whether to finish.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call

    """
    print("---DECIDING TO FINISH---")
    error = state["error"]
    iterations = state["iterations"]
    question = state["messages"][-1]


    if error == "no" or iterations == max_iterations:
        print("---DECISION: FINISH---")
        return "end"
    else:
        print("---DECISION: RE-TRY SOLUTION---")
        if flag == "reflect":
            return "reflect"
        else:
            return "generate"


In [21]:
from langgraph.graph import StateGraph,END

workflow = StateGraph(GraphState)

workflow.add_node("generate",generate)
workflow.add_node("code_check",code_check)
workflow.add_node("reflect",reflect)
workflow.add_node("decide_to_finish",decide_to_finish)
workflow.add_node("route_query", route_query)

workflow.set_entry_point("route_query")
workflow.add_edge("route_query", "generate")
workflow.add_edge("generate", "code_check")
workflow.add_conditional_edges(
    "code_check",
    decide_to_finish,
    {"reflect": "reflect", "generate": "generate","end":END},
)
workflow.add_edge("reflect", "generate")
workflow.add_edge("generate", END )

graph = workflow.compile()


In [None]:
from langchain_core.messages import HumanMessage
question = "generate code for creating a div in html?"

solution = graph.invoke({"messages": [("user", question)], "iterations": 0, "error": ""})

In [None]:
print(solution["generation"])