# Installing dependencies

In [131]:
# !pip install langchain -q
# !pip install openai -q
# !pip install PyPDF2 -q
# !pip install faiss-cpu -q
# !pip install tiktoken -q
# !pip install python-dotenv -q
# !pip install langchain_openai -q
# !pip install chromadb -q
# !pip install beautifulsoup4 -q
# !pip install playwright -q
# !playwright install

Downloading Chromium 121.0.6167.57 (playwright build v1097)[2m from https://playwright.azureedge.net/builds/chromium/1097/chromium-linux.zip[22m
Chromium 121.0.6167.57 (playwright build v1097) downloaded to /home/franco/.cache/ms-playwright/chromium-1097
Downloading FFMPEG playwright build v1009[2m from https://playwright.azureedge.net/builds/ffmpeg/1009/ffmpeg-linux.zip[22m
FFMPEG playwright build v1009 downloaded to /home/franco/.cache/ms-playwright/ffmpeg-1009
Downloading Firefox 121.0 (playwright build v1438)[2m from https://playwright.azureedge.net/builds/firefox/1438/firefox-ubuntu-22.04.zip[22m
Firefox 121.0 (playwright build v1438) downloaded to /home/franco/.cache/ms-playwright/firefox-1438
Downloading Webkit 17.4 (playwright build v1967)[2m from https://playwright.azureedge.net/builds/webkit/1967/webkit-ubuntu-22.04.zip[22m
Webkit 17.4 (playwright build v1967) downloaded to /home/franco/.cache/ms-playwright/webkit-1967


# API KEYS

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

# PDF Reading

In [2]:
from PyPDF2 import PdfReader

In [3]:
SOURCES = "./sources/"
PRINCIPLES = "NHRC-PDF-Principles_Of_Harm_Reduction.pdf"

In [4]:
pdf_reader = PdfReader(SOURCES + PRINCIPLES)

In [50]:
principles_text = ""
for page in pdf_reader.pages:
    text = page.extract_text()
    if text:
        principles_text += text

# Chunking

In [6]:
from langchain.text_splitter import  RecursiveCharacterTextSplitter

In [51]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 512,
    chunk_overlap  = 32,
    length_function = len,
)
principles_chunks = text_splitter.create_documents([principles_text])


# Vector DB

In [24]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

In [52]:
embeddings = OpenAIEmbeddings() # Embeddings model
vectorstore = Chroma.from_documents(documents=principles_chunks, embedding=OpenAIEmbeddings(), persist_directory="./chroma_db")
retriever = vectorstore.as_retriever()

# Chain

In [60]:
from langchain.llms import OpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory



In [166]:

template = """Answer the question based on the context and the chat history:
context: 
{context}

Question: {question}

Chat history: {chat_history}
"""
prompt = ChatPromptTemplate.from_template(template)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [167]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [168]:
from langchain_core.runnables import RunnableParallel

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def get_memory(memory):
    history = memory["chat_history"]
    l = []
    if history:
        for n, message in enumerate(history):
            if n % 2 == 0:
                l.append(f"Human: {message.content}")
            else:
                l.append(f"AI: {message.content}")
            
        return "\n\n".join(l)
    else:
        return ""

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | RunnablePassthrough.assign(history=(lambda x: get_memory(x["chat_history"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough(), "chat_history": memory.load_memory_variables}
).assign(answer=rag_chain_from_docs)



In [169]:
output = rag_chain_with_source.invoke("What are the harm reduction principles?")


In [170]:
output

{'context': [Document(page_content='FOUNDATIONAL  PRINCIPLES CENTRAL TO HARM REDUCTIONHarm reduction is a set of \npractical strategies and ideas aimed at reducing negative consequences associated with drug use. \nHarm Reduction is also a \nmovement for social justice built on a belief in, and respect for, the rights of people who use drugs.Harm reduction incorporates a spectrum of strategies that includes'),
  Document(page_content='safer use, managed use, abstinence, meeting people who use drugs “where they’re at,” and addressing conditions of use along with the use itself. Because harm reduction demands that interventions and policies designed to serve people who use drugs reflect specific individual and community needs, there is no universal definition of or formula for implementing harm reduction.\nHowever, National Harm Reduction Coalition considers the following \nprinciples central to harm reduction practice:'),
  Document(page_content='FOR MORE RESOURCES, VISIT   HARMREDUCTION

In [171]:
memory.save_context({"input":output["question"]}, {"output": output["answer"]})

In [172]:
rag_chain_with_source.invoke("What was my last question?")

{'context': [Document(page_content='b'),
  Document(page_content='b'),
  Document(page_content='b'),
  Document(page_content='b')],
 'question': 'What where my last question?',
 'chat_history': {'chat_history': [HumanMessage(content='What are the harm reduction principles?'),
   AIMessage(content='Based on the given context, the harm reduction principles are as follows:\n\n1. Accepts, for better or worse, that licit and illicit drug use is part of our world and chooses to work to minimize its harmful effects rather than simply ignore or condemn them.\n2. Affirms people who use drugs (PWUD) themselves as the primary agents of reducing the harms of their drug use and seeks to empower PWUD to share information and support each other in strategies which meet their actual conditions of use.\n3. Recognizes that the realities of poverty, class, racism, social isolation, past trauma, sex-based discrimination, and other social inequalities affect both people’s vulnerability to and capacity for 

In [174]:
rag_chain_with_source.invoke("cuáles son los principios de la reducción de daños?")

{'context': [Document(page_content='FOUNDATIONAL  PRINCIPLES CENTRAL TO HARM REDUCTIONHarm reduction is a set of \npractical strategies and ideas aimed at reducing negative consequences associated with drug use. \nHarm Reduction is also a \nmovement for social justice built on a belief in, and respect for, the rights of people who use drugs.Harm reduction incorporates a spectrum of strategies that includes'),
  Document(page_content='FOR MORE RESOURCES, VISIT   HARMREDUCTION.ORG\n/HarmReductionCoalitionRevised 2020\n/HarmReductionCoalition @harmreduction @harmreductionAccepts, for better or worse, that licit and illicit drug use is part of our world and chooses to work to minimize its harmful effects rather than simply ignore or condemn themPRINCIPLES OF HARM REDUCTIONFOUNDATIONS OF HARM REDUCTION\nHARM REDUCTION   INTERVENTIONS\nA philosophical and political'),
  Document(page_content='safer use, managed use, abstinence, meeting people who use drugs “where they’re at,” and addressing 

# More sources

In [150]:

from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader

from bs4 import BeautifulSoup as Soup

url = "https://js.langchain.com/docs/modules/memory/integrations/"
loader = RecursiveUrlLoader(
    url=url, max_depth=6
)
docs = loader.load()

In [151]:
len(docs)

1