In [2]:
from langchain_community.llms.llamafile import Llamafile

llm = Llamafile()

In [3]:
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredMarkdownLoader

# Load, chunk and index the contents of the blog.
loader = DirectoryLoader('./owasp_mds', glob="*.md", loader_cls=UnstructuredMarkdownLoader)
docs = loader.load()
len(docs)

91

In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [6]:
from langchain_core.runnables import RunnableParallel

prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [9]:
user_input = str(input("What web app question do you want to ask?"))
rag_chain_with_source.invoke(user_input)

{'context': [Document(page_content='Cross-Site Scripting (XSS) is a type of attack where malicious JavaScript code is injected into a displayed variable.', metadata={'source': 'owasp_mds\\Symfony_Cheat_Sheet.md'}),
  Document(page_content='Cross Site Scripting (XSS)\n\nXSS attacks are injection attacks where malicious scripts (such as JavaScript code snippets) are injected into trusted websites.', metadata={'source': 'owasp_mds\\Laravel_Cheat_Sheet.md'}),
  Document(page_content='Cross-Site Scripting (XSS) is a misnomer. Originally this term was derived from early versions of the attack that were primarily focused on stealing data cross-site. Since then, the term has widened', metadata={'source': 'owasp_mds\\Cross_Site_Scripting_Prevention_Cheat_Sheet.md'}),
  Document(page_content='XSS Defense Philosophy', metadata={'source': 'owasp_mds\\Cross_Site_Scripting_Prevention_Cheat_Sheet.md'}),
  Document(page_content='Model) based XSS and is an extension (and assumes comprehension) of the X