# MC Code RAG Workshop


Generate vector base and test RAG.

## Setup


### Hyper parameters


In [None]:
# p1=0  #Parameter 1 


### Dependencies


In [None]:
# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai chromadb bs4 python-dotenv

### Key from .env


In [1]:
import os
from dotenv import load_dotenv,find_dotenv

# Use this line of code if you have a local .env file
load_dotenv(find_dotenv()) 

# Or set it like this
# os.environ["OPENAI_API_KEY"] = "sk-..."

# Print this line to double check your API key
# print(os.environ["OPENAI_API_KEY"])

True

### LangSmith(enabled by default)


In [2]:
# Store all the keys in .env
import getpass
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.environ["LANGCHAIN_API_KEY"] #getpass.getpass()
print(os.environ["LANGCHAIN_API_KEY"])

ls__7cb2a18f629e4c10b5a34d49223fba88


### Import from


In [3]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader, PyPDFium2Loader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document

### Load File


In [None]:
# Web loader
# loader = WebBaseLoader(
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
# docs = loader.load()

In [None]:
# Pdf loader
file_path = './docs/WMX3SampleCodes.pdf'
# file_path = 'nais2023.pdf'
# loader = PyPDFLoader(file_path)
# loader = PyPDFLoader(file_path, extract_images=True)   #extract images as text as well

loader = PyPDFium2Loader(file_path,  extract_images=False) 
# docs = loader.load_and_split()
docs = loader.load()
print(docs[0])


In [None]:
len(docs)


In [4]:
# Txt loader
loader = TextLoader("./docs/WMX3SampleCodes.py")
docs = loader.load()
# docs[0].page_content[:100000]
docs

[Document(page_content=' \n"""#####PYTHON SAMPLE CODE#####\nThis is a typical python code of WMX3 from initialization, through motion execution, to termination/closing/shutting down. The Python script initializes and operates a motion control system using the WMX3 software library, sequentially executing steps for robust control in an industrial setting. It starts by creating and naming a device with \'CreateDevice(\'C:\\\\Program Files\\\\SoftServo\\\\WMX3\\\\\', DeviceType.DeviceTypeNormal, INFINITE)\' and \'SetDeviceName(\'WMX3initTest\')\', then begins communication with \'StartCommunication(INFINITE)\'. The script clears any amplifier alarms with \'ClearAmpAlarm(axis)\' and activates the servo with \'SetServoOn(axis, 1)\'. It executes a motion command using \'StartMov(posCommand)\' and concludes by shutting down the servo and stopping communication with \'SetServoOn(axis, 0)\' and \'StopCommunication(INFINITE)\'. This structured approach ensures each component is correctly set up 

### Chunk


In [5]:
separators = ['``']  # Adjust based on actual document structure
text_splitter = RecursiveCharacterTextSplitter(separators=separators, keep_separator=True, chunk_size=1000, chunk_overlap=200, add_start_index=True)
splits = text_splitter.split_documents(docs)

In [6]:
splits[1]

Document(page_content='``\n\n"""#####PYTHON SAMPLE CODE#####\nThis is a typical python code of WMX3 for a axis/servo/motor to move or do positioning. \n"""\nWmx3Lib = WMX3Api()\nCmStatus = CoreMotionStatus()\nWmx3Lib_cm = CoreMotion(Wmx3Lib)\n\n# Create a command value.\nposCommand = Motion_PosCommand()\nposCommand.profile.type = ProfileType.Trapezoidal\nposCommand.axis = 0\nposCommand.target = 1000\nposCommand.profile.velocity = 1000\nposCommand.profile.acc = 1000000\nposCommand.profile.dec = 1000000\n\n# Execute command to move to a specified absolute position. e.g. \'Move to Position 100..\'\nWmx3Lib_cm.motion.StartPos(posCommand)\n\n# Execute command to move from current position to a specified distance relatively. e.g. \'Move 100..\'\nWmx3Lib_cm.motion.StartMov(posCommand)\n\n# Wait until the axis moves to the target position and stops.\nWmx3Lib_cm.motion.Wait(0)\n#End', metadata={'source': './docs/WMX3SampleCodes.py', 'start_index': 3933})

In [None]:
help(RecursiveCharacterTextSplitter)

### Vectorstore - chroma


In [7]:
embedding_model=OpenAIEmbeddings(model="text-embedding-3-large")   #text-embedding-3-large   #text-embedding-ada-002    #text-embedding-3-small

# If txt vectorstore exists
# if os.path.exists("Vectorstore/chromadb"):
        # vectorstore = Chroma(
                #     embedding_function=embedding_model,
                #     persist_directory="Vectorstore/chromadb",
                #     ) 
# else:
        # Load from chunks and save to disk
        # vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model, persist_directory="Vectorstore/chromadb") 

# If pdf vectorstore exists
# vectorstore_path = "Vectorstore/chromadb-pdf"
vectorstore_path = "Vectorstore/chromadb-pdf-chunk1000"
 # Load from chunks and save to disk
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model,  persist_directory=vectorstore_path) 
# if os.path.exists(vectorstore_path):
#         vectorstore = Chroma(
#                     embedding_function=embedding_model,
#                     persist_directory=vectorstore_path,
#                     ) 
# else:
#         # Load from chunks and save to disk
#         vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model, persist_directory=vectorstore_path) 


In [8]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})
# retriever = vectorstore.similarity_search_with_score('a typical python code of WMX3 for a axis/servo/motor to move or do positioning.')

In [None]:
retriever

In [9]:
retrieved_docs = retriever.invoke("a typical python code of WMX3 for a axis/servo/motor to move or do positioning. ")
#print(retrieved_docs[0].page_content)
retrieved_docs 

[Document(page_content='``\n\n"""#####PYTHON SAMPLE CODE#####\nThis is a typical python code of WMX3 for a axis/servo/motor to move or do positioning. \n"""\nWmx3Lib = WMX3Api()\nCmStatus = CoreMotionStatus()\nWmx3Lib_cm = CoreMotion(Wmx3Lib)\n\n# Create a command value.\nposCommand = Motion_PosCommand()\nposCommand.profile.type = ProfileType.Trapezoidal\nposCommand.axis = 0\nposCommand.target = 1000\nposCommand.profile.velocity = 1000\nposCommand.profile.acc = 1000000\nposCommand.profile.dec = 1000000\n\n# Execute command to move to a specified absolute position. e.g. \'Move to Position 100..\'\nWmx3Lib_cm.motion.StartPos(posCommand)\n\n# Execute command to move from current position to a specified distance relatively. e.g. \'Move 100..\'\nWmx3Lib_cm.motion.StartMov(posCommand)\n\n# Wait until the axis moves to the target position and stops.\nWmx3Lib_cm.motion.Wait(0)\n#End', metadata={'source': './docs/WMX3SampleCodes.py', 'start_index': 3933}),
 Document(page_content='``\n\n"""#####

### Vectorstore - pinecone


In [None]:
from langchain_pinecone import PineconeVectorStore
index_name = os.environ["PINECONE_INDEX_NAME"]


In [None]:
# PineconeVectorStore.delete(delete_all=True)

In [None]:

docsearch = PineconeVectorStore.from_documents(splits, embedding_model, index_name=index_name)


In [None]:
docsearch = PineconeVectorStore.from_existing_index(index_name, embedding_model)


In [None]:
query = "what is the Architecture of wmx3?"
retrieved_docs = docsearch.similarity_search(query,k=8)
retrieved_docs

### Vectorstore - FAISS

In [None]:
embedding_model=OpenAIEmbeddings(model="text-embedding-3-large")   

from langchain_community.vectorstores import FAISS
vectorstore_path = "Vectorstore/FAISS-pdf-images"
db = FAISS.from_documents(splits, embedding_model) 

In [None]:
db.save_local(folder_path="Vectorstore/FAISS-pdf-images", index_name="myFaissIndex")

In [None]:
db = FAISS.load_local(folder_path="Vectorstore/FAISS-pdf-images",embeddings=embedding_model,index_name="myFaissIndex")

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)
docs

### Vectorstore - pathway

### serve Maximum marginal relevance search (MMR) Similarity search by vector ...

In [None]:
from langchain_community.vectorstores import Qdrant
qdrant = Qdrant.from_documents(
    splits,
    embedding_model,
    path="Vectorstore/Qdrant-pdf-images",  # Local mode with in-memory storage only
    collection_name="my_documents",
)

### Prompt template


In [None]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}

Helpful Answer:"""


custom_rag_prompt = PromptTemplate.from_template(template)


In [None]:
custom_rag_prompt = hub.pull("rlm/rag-prompt")

### Rag chain


In [None]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)   #gpt-4  #gpt-3.5-turbo

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
    

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)


In [None]:
format_docs


In [None]:

rag_chain.invoke("write an example code to close wmx3?")

In [None]:
for chunk in rag_chain.stream("write a sample code to initialize wmx3?"): 
    print(chunk, end="", flush=True)