# MC Code RAG Workshop


### Dependencies


In [None]:
# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb bs4 python-dotenv

In [15]:
import os
from dotenv import load_dotenv,find_dotenv

# Use this line of code if you have a local .env file
load_dotenv(find_dotenv()) 

# LangSmith
import getpass
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.environ["LANGCHAIN_API_KEY"] #getpass.getpass()
 
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader, PyPDFium2Loader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document

# Txt loader
# loader = TextLoader("./docs/WMX3UserManual_a.txt")
loader = TextLoader("./docs/WMX3API_MCEval_Samplecodes.txt")
docs = loader.load()
# docs[0].page_content[:100000]
len(docs)

# # Text chunk 
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
# splits = text_splitter.split_documents(docs)

#Sample code chunk with dedicated separators
separators = ['``']  # Adjust based on actual document structure, `` is the end of each code snippet.
# separators = ['}']  # Adjust based on actual document structure
text_splitter = RecursiveCharacterTextSplitter(separators=separators, keep_separator=True, chunk_size=1000, chunk_overlap=200, add_start_index=True)
splits = text_splitter.split_documents(docs)


In [16]:
splits[1]
# Print the size of each chunk
for i, split in enumerate(splits):
    print(f"Chunk {i+1} size: {len(split.page_content)}")

Chunk 1 size: 857
Chunk 2 size: 943
Chunk 3 size: 1447
Chunk 4 size: 1653
Chunk 5 size: 2325
Chunk 6 size: 586
Chunk 7 size: 1265
Chunk 8 size: 1285
Chunk 9 size: 1826
Chunk 10 size: 1665
Chunk 11 size: 3069
Chunk 12 size: 2779
Chunk 13 size: 4314
Chunk 14 size: 1487
Chunk 15 size: 1476
Chunk 16 size: 1966
Chunk 17 size: 115
Chunk 18 size: 1869
Chunk 19 size: 1623
Chunk 20 size: 1411
Chunk 21 size: 1287
Chunk 22 size: 1467
Chunk 23 size: 1773
Chunk 24 size: 1771
Chunk 25 size: 1907
Chunk 26 size: 3815
Chunk 27 size: 1709
Chunk 28 size: 2938
Chunk 29 size: 3243
Chunk 30 size: 3210
Chunk 31 size: 1409
Chunk 32 size: 2276
Chunk 33 size: 1251
Chunk 34 size: 1280
Chunk 35 size: 1373
Chunk 36 size: 2406
Chunk 37 size: 2577
Chunk 38 size: 3239
Chunk 39 size: 3115
Chunk 40 size: 4468
Chunk 41 size: 4838
Chunk 42 size: 2602
Chunk 43 size: 6790
Chunk 44 size: 5598
Chunk 45 size: 5457
Chunk 46 size: 5614
Chunk 47 size: 6043
Chunk 48 size: 6413
Chunk 49 size: 6254
Chunk 50 size: 6501
Chunk 51 size

In [17]:


embedding_model=OpenAIEmbeddings(model="text-embedding-3-large")   #text-embedding-3-large   #text-embedding-ada-002    #text-embedding-3-small

# If txt vectorstore exists
if os.path.exists("Vectorstore/chromadb-MCCoder"):
        vectorstore = Chroma(
                    embedding_function=embedding_model,
                    persist_directory="Vectorstore/chromadb-MCCoder",
                    ) 
# else:
        # Load from chunks and save to disk
        # vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model, persist_directory="Vectorstore/chromadb") 




# # If pdf vectorstore exists
# vectorstore_path = "Vectorstore/chromadb-MCCoder"
#  # Load from chunks and save to disk
# vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model,  persist_directory=vectorstore_path) 


In [11]:

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

retrieved_docs = retriever.invoke("Execute path interpolation with look ahead of Axis 0, 1 and 2 ")
retrieved_docs

[Document(page_content='Overview \n\nPath interpolation with look ahead is a more advanced version of path interpolation with \nthe following features:', metadata={'source': './docs/WMX3UserManual_a.txt', 'start_index': 862601}),
 Document(page_content="``\n\n\n\n# Write python code to Execute path interpolation with look ahead of Axis 6 and Axis 1 with velocity 1000, consisting of four linear interpolations: (100,0),(100,100),(0,100),(0,0).\n# Axes = [6, 1]\n\n    Wmx3Lib_adv = AdvancedMotion(Wmx3Lib)\n\n    path = AdvMotion_PathIntplLookaheadCommand()\n    ret = Wmx3Lib_adv.advMotion.FreePathIntplLookaheadBuffer(0)\n    # Create the path interpolation with look ahead buffer\n    ret = Wmx3Lib_adv.advMotion.CreatePathIntplLookaheadBuffer(0, 1000)\n    if ret != 0:\n        print('CreatePathIntplLookaheadBuffer error code is ' + str(ret) + ': ' + Wmx3Lib_adv.ErrorToString(ret))\n        return\n\n    # Configure the path interpolation with look ahead channel\n    conf = AdvMotion_PathI

In [14]:
docsws=vectorstore.similarity_search_with_score('Execute path interpolation with look ahead of Axis 0, 1 and 2', k=4)
docsws

[(Document(page_content='Overview \n\nPath interpolation with look ahead is a more advanced version of path interpolation with \nthe following features:', metadata={'source': './docs/WMX3UserManual_a.txt', 'start_index': 862601}),
  0.7161130309104919),
 (Document(page_content="``\n\n\n\n# Write python code to Execute path interpolation with look ahead of Axis 6 and Axis 1 with velocity 1000, consisting of four linear interpolations: (100,0),(100,100),(0,100),(0,0).\n# Axes = [6, 1]\n\n    Wmx3Lib_adv = AdvancedMotion(Wmx3Lib)\n\n    path = AdvMotion_PathIntplLookaheadCommand()\n    ret = Wmx3Lib_adv.advMotion.FreePathIntplLookaheadBuffer(0)\n    # Create the path interpolation with look ahead buffer\n    ret = Wmx3Lib_adv.advMotion.CreatePathIntplLookaheadBuffer(0, 1000)\n    if ret != 0:\n        print('CreatePathIntplLookaheadBuffer error code is ' + str(ret) + ': ' + Wmx3Lib_adv.ErrorToString(ret))\n        return\n\n    # Configure the path interpolation with look ahead channel\n 

## Ensemble retriever (using Reciprocal Rank Fusion to rerank)

In [42]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# vectordb = vectorstore.as_retriever(search_kwargs={"k": 2})
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_documents(splits)
bm25_retriever.k = 5

# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, retriever], weights=[0.5, 0.5])

ensemble_docs = ensemble_retriever.get_relevant_documents("write python code to Execute path interpolation with look ahead of Axis 0, 1 and 2")

ensemble_docs



[Document(page_content="``\n\n\n\n# Write python code to Execute path interpolation with look ahead of Axis 6 and Axis 1 with velocity 1000, consisting of four linear interpolations: (100,0),(100,100),(0,100),(0,0).\n# Axes = [6, 1]\n\n    Wmx3Lib_adv = AdvancedMotion(Wmx3Lib)\n\n    path = AdvMotion_PathIntplLookaheadCommand()\n    ret = Wmx3Lib_adv.advMotion.FreePathIntplLookaheadBuffer(0)\n    # Create the path interpolation with look ahead buffer\n    ret = Wmx3Lib_adv.advMotion.CreatePathIntplLookaheadBuffer(0, 1000)\n    if ret != 0:\n        print('CreatePathIntplLookaheadBuffer error code is ' + str(ret) + ': ' + Wmx3Lib_adv.ErrorToString(ret))\n        return\n\n    # Configure the path interpolation with look ahead channel\n    conf = AdvMotion_PathIntplLookaheadConfiguration()\n\n    conf.axisCount = 2\n    conf.SetAxis(0, 6)\n    conf.SetAxis(1, 1)\n    conf.compositeVel = 1000\n    conf.compositeAcc = 2000\n    #  The commanded axes will automatically change to Idle operat

## Reranking (Compare cosine similarity by myself)

In [45]:
from sklearn.metrics.pairwise import cosine_similarity
# from utils.embeddings_utils import get_embedding, cosine_similarity
import numpy as np

from openai import OpenAI
client = OpenAI()

def get_embedding(text, model="text-embedding-3-large"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding


# Reranking function
def rerank_documents(question, retrieved_docs, top_n=5):
    question_embedding = get_embedding(
        question
    )
    # print(question_embedding)

    # question_embedding = embedding_model.embed_text(question)
    doc_embeddings = [get_embedding(doc.page_content) for doc in retrieved_docs]
    similarities = cosine_similarity([question_embedding], doc_embeddings)[0]
    
    ranked_indices = np.argsort(similarities)[::-1]  # Sort by descending similarity
    # ranked_docs = [retrieved_docs[i] for i in ranked_indices[:top_n]]
    ranked_docs_with_similarities = [(retrieved_docs[i], similarities[i]) for i in ranked_indices[:top_n]]
    
    for doc, similarity in ranked_docs_with_similarities:
        print(f"Document: {doc.page_content[:100]}... Similarity: {similarity:.4f}")

    return ranked_docs_with_similarities

question = "Execute path interpolation with look ahead of Axis 0, 1 and 2"

ranked_docs = rerank_documents(question, ensemble_docs, 5)
ranked_docs



Document: ``



# Write python code to Execute path interpolation with look ahead of Axis 6 and Axis 1 with ve... Similarity: 0.5681
Document: ``



# Write python code to Execute path interpolation with look ahead of Axis 4 and Axis 1 with ve... Similarity: 0.5677
Document: ``



# Write python code to Execute path interpolation with look ahead of Axis 6 and Axis 1 with ve... Similarity: 0.5655
Document: ``



# Write python code to Execute path interpolation with look ahead of Axis 9 and Axis 1 with ve... Similarity: 0.5581
Document: ``



# Write python code to Execute path interpolation with look ahead of Axis 8, 1 and 2 with velo... Similarity: 0.5561


[(Document(page_content="``\n\n\n\n# Write python code to Execute path interpolation with look ahead of Axis 6 and Axis 1 with velocity 1000, consisting of four linear interpolations: (100,0),(100,100),(0,100),(0,0).\n# Axes = [6, 1]\n\n    Wmx3Lib_adv = AdvancedMotion(Wmx3Lib)\n\n    path = AdvMotion_PathIntplLookaheadCommand()\n    ret = Wmx3Lib_adv.advMotion.FreePathIntplLookaheadBuffer(0)\n    # Create the path interpolation with look ahead buffer\n    ret = Wmx3Lib_adv.advMotion.CreatePathIntplLookaheadBuffer(0, 1000)\n    if ret != 0:\n        print('CreatePathIntplLookaheadBuffer error code is ' + str(ret) + ': ' + Wmx3Lib_adv.ErrorToString(ret))\n        return\n\n    # Configure the path interpolation with look ahead channel\n    conf = AdvMotion_PathIntplLookaheadConfiguration()\n\n    conf.axisCount = 2\n    conf.SetAxis(0, 6)\n    conf.SetAxis(1, 1)\n    conf.compositeVel = 1000\n    conf.compositeAcc = 2000\n    #  The commanded axes will automatically change to Idle opera