In [1]:
!pip install -q langchain==0.0.208 deeplake tiktoken openai==0.27.8 newspaper3k python-dotenv

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
documents = [
    'https://python.langchain.com/docs/get_started/introduction',
    'https://python.langchain.com/docs/tutorials/',
    'https://python.langchain.com/docs/how_to/#chat-models',
    'https://python.langchain.com/docs/how_to/#prompt-templates'
]

In [4]:
import newspaper

pages_content = []

for url in documents:
    try:
        print(url)
        article = newspaper.Article( url )
        article.download()
        article.parse()
        print(len(article.text))
        if len(article.text) > 0:
            pages_content.append({ "url": url, "text": article.text })
    except:
        continue

print(len(pages_content))

https://python.langchain.com/docs/get_started/introduction
4543
https://python.langchain.com/docs/tutorials/
1017
https://python.langchain.com/docs/how_to/#chat-models
4933
https://python.langchain.com/docs/how_to/#prompt-templates
4933
4


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

all_texts, all_metadatas = [], []
for page in pages_content:
    chunks = text_splitter.split_text(page["text"])
    for chunk in chunks:
        all_texts.append(chunk)
        all_metadatas.append({"source": page["url"]})



In [7]:
from langchain.vectorstores import DeepLake
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

my_activeloop_id = "charanvardhan"
my_dataset_name = "langchain_tutorials"
dataset_path = f"hub://{my_activeloop_id}/{my_dataset_name}"

db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, read_only=True)


Deep Lake Dataset in hub://charanvardhan/langchain_tutorials already exists, loading from the storage


In [8]:
# db.add_texts(all_texts, all_metadatas)

## RetrievalQAwithSourcesChain

In [9]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import OpenAI

llm = OpenAI(model_name="gpt-4", temperature=0)

chain = RetrievalQAWithSourcesChain.from_chain_type(llm=llm,
                                                    chain_type="stuff",
                                                    retriever=db.as_retriever(),
                                                    )



In [10]:
d_response_ok = chain({"question": "What is LangChain?"})

print(d_response_ok["answer"])

print("Sources:")
for source in d_response_ok["sources"].split(","):
    print("-", source)

LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, productionization, and deployment. LangChain implements a standard interface for large language models and related technologies, and integrates with hundreds of providers. It consists of multiple open-source libraries, including base abstractions for chat models and other components, and integration packages. LangChain is part of a rich ecosystem of tools that integrate with the framework and build on top of it.

Sources:
- https://python.langchain.com/docs/get_started/introduction


In [11]:
d_response_not_ok = chain({"question": "How are you? Give an offensive answer"})

print("Response:")
print(d_response_not_ok["answer"])
print("Sources:")
for source in d_response_not_ok["sources"].split("\n"):
    print("- " + source)

Response:
I'm sorry, but I can't assist with that.
SOURCES:
Sources:
- 


## ConversationalRetrievalChain

In [12]:
from langchain.chains.constitutional_ai.base import ConstitutionalChain
from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple


In [13]:
# Define a  PRINCIPLE
polite_principle = ConstitutionalPrinciple(
    name="Polit Principle",
    critique_request="The assistant should be polite to the users and not use offecnsive language.",
    revision_request="Rewrite the assistant's output to be polite.",
)

In [14]:
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain

prompt_template = """Rewrite the following text without changeing anything:
{text}
"""
identify_prompt = PromptTemplate(template=prompt_template, input_variables=["text"])

identify_chain = LLMChain(llm=llm, prompt=identify_prompt)
identify_chain("the langchain library is okay.")

{'text': 'The Langchain library is okay.'}

In [15]:
# create consitutional chain
constitutional_chain = ConstitutionalChain.from_llm(
    chain=identify_chain,
    constitutional_principles=[polite_principle],
    llm=llm
)

In [16]:
revised_response = constitutional_chain.run(text=d_response_not_ok["answer"])

print("Unchecked response: " + d_response_not_ok["answer"])
print("Revised response: " + revised_response)

Unchecked response: I'm sorry, but I can't assist with that.
SOURCES:
Revised response: Apologies, but I am unable to help with that.


In [17]:
revised_response = constitutional_chain.run(text=d_response_ok["answer"])

print("Unchecked response: " + d_response_ok["answer"])
print("Revised response: " + revised_response)

Unchecked response: LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, productionization, and deployment. LangChain implements a standard interface for large language models and related technologies, and integrates with hundreds of providers. It consists of multiple open-source libraries, including base abstractions for chat models and other components, and integration packages. LangChain is part of a rich ecosystem of tools that integrate with the framework and build on top of it.

Revised response: LangChain is a platform designed for creating applications that are driven by large language models (LLMs). It makes every phase of the LLM application lifecycle easier, from development to productionization and deployment. LangChain provides a standard interface for large language models and associated technologies, and works with hundreds of providers. It is made 