# MC Code RAG Workshop


### Dependencies


In [None]:
# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb bs4 python-dotenv

In [2]:
import os
from dotenv import load_dotenv,find_dotenv

# Use this line of code if you have a local .env file
load_dotenv(find_dotenv()) 

# LangSmith
import getpass
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.environ["LANGCHAIN_API_KEY"] #getpass.getpass()
 
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader, PyPDFium2Loader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document

# Txt loader
# loader = TextLoader("./docs/WMX3UserManual_a.txt")
loader = TextLoader("./docs/WMX3API_MCEval_Samplecodes.txt")
docs = loader.load()
# docs[0].page_content[:100000]
len(docs)

# # Text chunk 
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
# splits = text_splitter.split_documents(docs)

#Sample code chunk with dedicated separators
separators = ['``']  # Adjust based on actual document structure, `` is the end of each code snippet.
# separators = ['}']  # Adjust based on actual document structure
text_splitter = RecursiveCharacterTextSplitter(separators=separators, keep_separator=True, chunk_size=1000, chunk_overlap=200, add_start_index=True)
splits = text_splitter.split_documents(docs)


# Txt loader of python function list, for BM25 search
python_function_loader = TextLoader("./docs/WMX3API_FunctionPython.json")
python_function_docs = python_function_loader.load()
python_function_separators = ['{']  # Adjust based on actual document structure, `` is the end of each code snippet.
python_function_text_splitter = RecursiveCharacterTextSplitter(separators=python_function_separators, keep_separator=True, chunk_size=1000, chunk_overlap=200, add_start_index=True)
python_function_splits = python_function_text_splitter.split_documents(python_function_docs)


In [None]:
splits[1]
# Print the size of each chunk
for i, split in enumerate(splits):
    print(f"Chunk {i+1} size: {len(split.page_content)}")

In [3]:


embedding_model=OpenAIEmbeddings(model="text-embedding-3-large")   #text-embedding-3-large   #text-embedding-ada-002    #text-embedding-3-small

# If txt vectorstore exists
if os.path.exists("Vectorstore/chromadb-MCCoder"):
        vectorstore = Chroma(
                    embedding_function=embedding_model,
                    persist_directory="Vectorstore/chromadb-MCCoder",
                    ) 
else:
        # Load from chunks and save to disk
        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model, persist_directory="Vectorstore/chromadb-MCCoder") 




# # If pdf vectorstore exists
# vectorstore_path = "Vectorstore/chromadb-MCCoder"
#  # Load from chunks and save to disk
# vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model,  persist_directory=vectorstore_path) 


In [4]:

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("Write python code to move axis 1 to 233")
retrieved_docs

[Document(metadata={'source': './docs/WMX3API_MCEval_Samplecodes.txt', 'start_index': 879}, page_content="``\n\n\n\n# Write python code to move Axis 1 by a 200 distance with velocity 2000.\n# Write python code to Start a relative position command of Axis 1 by a 200 distance and 2000 velocity.\n    # Axes = [1]\n\n    # Start a relative position command of Axis 1 with 200 distance and 2000 velocity.\n    # Create a command value of target as 200.\n    posCommand = Motion_PosCommand()\n    posCommand.profile.type = ProfileType.Trapezoidal\n    posCommand.axis = 1\n    posCommand.target = 200\n    posCommand.profile.velocity = 2000\n    posCommand.profile.acc = 10000\n    posCommand.profile.dec = 10000\n\n    # Execute command to move from current position to a specified distance relatively. e.g. 'Move 100..'\n    ret = Wmx3Lib_cm.motion.StartMov(posCommand)\n    if ret!=0:\n        print('StartMov error code is ' + str(ret) + ': ' + Wmx3Lib_cm.ErrorToString(ret))\n        return\n\n    #

In [6]:
docsws=vectorstore.similarity_search_with_score('Write python code to move axis 1 to 233', k=10)
docsws

[(Document(metadata={'source': './docs/WMX3API_MCEval_Samplecodes.txt', 'start_index': 879}, page_content="``\n\n\n\n# Write python code to move Axis 1 by a 200 distance with velocity 2000.\n# Write python code to Start a relative position command of Axis 1 by a 200 distance and 2000 velocity.\n    # Axes = [1]\n\n    # Start a relative position command of Axis 1 with 200 distance and 2000 velocity.\n    # Create a command value of target as 200.\n    posCommand = Motion_PosCommand()\n    posCommand.profile.type = ProfileType.Trapezoidal\n    posCommand.axis = 1\n    posCommand.target = 200\n    posCommand.profile.velocity = 2000\n    posCommand.profile.acc = 10000\n    posCommand.profile.dec = 10000\n\n    # Execute command to move from current position to a specified distance relatively. e.g. 'Move 100..'\n    ret = Wmx3Lib_cm.motion.StartMov(posCommand)\n    if ret!=0:\n        print('StartMov error code is ' + str(ret) + ': ' + Wmx3Lib_cm.ErrorToString(ret))\n        return\n\n    

## Ensemble retriever (using Reciprocal Rank Fusion to rerank)

In [9]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# vectordb = vectorstore.as_retriever(search_kwargs={"k": 2})
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_documents(splits)
bm25_retriever.k = 5

# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, retriever], weights=[0.5, 0.5])

ensemble_docs = ensemble_retriever.invoke("Write python code to move axis 111 to 233")

ensemble_docs



[Document(metadata={'source': './docs/WMX3API_MCEval_Samplecodes.txt', 'start_index': 3290}, page_content="``\n\n\n\n# Write python code to move Axis 0 to position 180 with 1000 velocity, and then move Axis 0 by a 200 distance and 2000 velocity.\n# Write python code to Start an absolute position command of Axis 0 to position 180 with 1000 velocity, and then start a relative position command of Axis 0 by a 200 distance and 2000 velocity.\n\n\n    # Axes = [0]\n    # Create a command value of target as 180.\n    posCommand = Motion_PosCommand()\n    posCommand.profile.type = ProfileType.Trapezoidal\n    posCommand.axis = 0\n    posCommand.target = 180\n    posCommand.profile.velocity = 1000\n    posCommand.profile.acc = 10000\n    posCommand.profile.dec = 10000\n\n    # Execute command to move from current position to specified absolute position.\n    ret = Wmx3Lib_cm.motion.StartPos(posCommand)\n    if ret!=0:\n        print('StartPos error code is ' + str(ret) + ': ' + Wmx3Lib_cm.Error

## Reranking (Compare cosine similarity by myself)

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
# from utils.embeddings_utils import get_embedding, cosine_similarity
import numpy as np

from openai import OpenAI
client = OpenAI()

def get_embedding(text, model="text-embedding-3-large"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding


# Reranking function
def rerank_documents(question, retrieved_docs, top_n=5):
    question_embedding = get_embedding(
        question
    )
    # print(question_embedding)

    # question_embedding = embedding_model.embed_text(question)
    doc_embeddings = [get_embedding(doc.page_content) for doc in retrieved_docs]
    similarities = cosine_similarity([question_embedding], doc_embeddings)[0]
    
    ranked_indices = np.argsort(similarities)[::-1]  # Sort by descending similarity
    # ranked_docs = [retrieved_docs[i] for i in ranked_indices[:top_n]]
    ranked_docs_with_similarities = [(retrieved_docs[i], similarities[i]) for i in ranked_indices[:top_n]]
    
    for doc, similarity in ranked_docs_with_similarities:
        print(f"Document: {doc.page_content[:100]}... Similarity: {similarity:.4f}")

    return ranked_docs_with_similarities

question = "Write python code to move axis 1 to 233"

ranked_docs = rerank_documents(question, ensemble_docs, 5)
ranked_docs



Document: ``



# Write python code to move Axis 1 by a 200 distance with velocity 2000.
# Write python code t... Similarity: 0.5553
Document: ``



# Write python code to move Axis 0 to position 180 with 1000 velocity, and then move Axis 0 by... Similarity: 0.5498
Document: # Write python code to move Axis 0 to 180 with velocity 1000.
# Write python code to start an absolu... Similarity: 0.5317
Document: ``



# Write python code to Start an absolute position linear interpolation motion command of Axis ... Similarity: 0.5208
Document: ``



# Write python code to Start a relative triggered position command of Axis 1 with 180 distance... Similarity: 0.5132


[(Document(metadata={'source': './docs/WMX3API_MCEval_Samplecodes.txt', 'start_index': 879}, page_content="``\n\n\n\n# Write python code to move Axis 1 by a 200 distance with velocity 2000.\n# Write python code to Start a relative position command of Axis 1 by a 200 distance and 2000 velocity.\n    # Axes = [1]\n\n    # Start a relative position command of Axis 1 with 200 distance and 2000 velocity.\n    # Create a command value of target as 200.\n    posCommand = Motion_PosCommand()\n    posCommand.profile.type = ProfileType.Trapezoidal\n    posCommand.axis = 1\n    posCommand.target = 200\n    posCommand.profile.velocity = 2000\n    posCommand.profile.acc = 10000\n    posCommand.profile.dec = 10000\n\n    # Execute command to move from current position to a specified distance relatively. e.g. 'Move 100..'\n    ret = Wmx3Lib_cm.motion.StartMov(posCommand)\n    if ret!=0:\n        print('StartMov error code is ' + str(ret) + ': ' + Wmx3Lib_cm.ErrorToString(ret))\n        return\n\n    

# LLMs playground

## Groq

In [66]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_core.messages import HumanMessage
from langchain_ollama import ChatOllama
import re

load_dotenv(find_dotenv()) 

# Preparation of documents for RAG-------------------------
# Vectorstore, for retrieval
embedding_model=OpenAIEmbeddings(model="text-embedding-3-large")   #text-embedding-3-large   #text-embedding-ada-002    #text-embedding-3-small

# If pdf vectorstore exists
vectorstore_path = "Vectorstore/chromadb-MCCoder"
if os.path.exists(vectorstore_path):
    vectorstore = Chroma(
                    embedding_function=embedding_model,
                    persist_directory=vectorstore_path,
                    ) 
    print("load from disk: " + vectorstore_path)
else:
    # Load from chunks and save to disk
    # vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model, persist_directory=vectorstore_path) 
    print("load from chunks")

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})


def CoderLLM(user_question, code_context):

    # # Groq
    # chatllm = ChatGroq(
    #     temperature=0.2,
    #     model="llama-3.1-8b-instant")   # llama-3.1-8b-instant,  llama3-70b-8192,  llama-3.1-70b-versatile, llama-3.1-405b-reasoning, mixtral-8x7b-32768
    
    # # Tongyi Qwen
    # chatllm = ChatTongyi(
    #     temperature=0.2,
    #     model="qwen-max")   # qwen-turbo, qwen-plus, qwen-max,  qwen-max-longcontext
    
    # Ollama
    chatllm = ChatOllama(
    model="deepseek-coder-v2",            # codellama:7b , codellama:34b, tinyllama, codegeex4, deepseek-coder-v2
    temperature=0.2,
    # other params...
)



    # Prompt for code generation
    prompt_template = """Write a python code based on the following Question and Context. You need to choose the correct codes from the Context to answer the Question.
    1. Review the question carefully and find all the 'Axis number', IO Inputs and Outputs, and add them to the first lines of the generated code in the following format: 
    # Axes = [Axis number 1, Axis number 2, ...]
    # Inputs = [byte.bit 1, byte.bit 2, ...]
    # Outputs = [byte.bit 1, byte.bit 2, ...]
    For instance, if the question is '...Axis 9..., ...Axis 12..., ...Axis 2..., Input 0.3 and 1.2, ...Output 3.4 and 6.1', then 
    # Axes = [9, 12, 2]
    # Inputs = [0.3, 1.2, ...]
    # Outputs = [3.4, 6.1, ...]
    2. Include all the generated codes within one paragraph between ```python and ``` tags. 
    3. Don't import any library.
    4. Don't create any functions or example usage.
    5. You need to wait until the axis reaches the target position and stops, unless otherwise specified.
    ----------------------------------------------

    Question: 
    {question}

    Context: 
    {context}

        """

    prompt = ChatPromptTemplate.from_template(prompt_template)

    chain = prompt | chatllm
    answer = chain.invoke({"context": code_context, "question": user_question})
    return answer


# Joins the page content of each document with double newline
def format_docs(docs):
   return "\n\n".join(doc.page_content for doc in docs)

# Extracts code snippets written in Python from the given text
def extract_code(text):
    # Define the regular expression pattern to find text between ```python and ```
    pattern = r"```python(.*?)```"

    # Use re.findall to find all occurrences
    matches = re.findall(pattern, text, re.DOTALL)

    # Return the matches, join them if there are multiple matches
    return "\n\n---\n\n".join(matches)



user_question = 'Write python code to move Axis 19 as a distance of 32.0.'
retrieval_result = format_docs(retriever.invoke(user_question))

answers = CoderLLM(user_question, retrieval_result)

from IPython.display import display, Markdown
print(answers.content)
# display(Markdown(str(answers)))

load from disk: Vectorstore/chromadb-MCCoder
 Here's the Python code that establishes synchronous control between master axis 0 and a slave axis 1, then moves Axis 0 to position 188 with velocity 1200:

```python
# Import necessary modules
from Wmx3Lib import *

# Initialize API buffer resources.
Wmx3Lib_buf = ApiBuffer()
Wmx3Lib_cm = CommandManager()

# Establish synchronous control between master axis 0 and a slave axis 1.
ret = Wmx3Lib_cm.sync.SetSyncMasterSlave(0, 1)
if ret != 0:
    print('SetSyncMasterSlave error code is ' + str(ret) + ': ' + Wmx3Lib_cm.ErrorToString(ret))
    return

# Create a command with target position 188 and velocity 1200 for the master axis (Axis 0).
posCommand = Motion_PosCommand()
posCommand.profile.type = ProfileType.Trapezoidal
posCommand.axis = 0
posCommand.target = 188
posCommand.profile.velocity = 1200
posCommand.profile.acc = 10000
posCommand.profile.dec = 10000

# Execute the command to move Axis 0 to the specified position with synchronization.
