# Building Generative AI Applications with LangChain and OpenAI API

In [None]:
!pip install langchain openai

Collecting langchain
  Downloading langchain-0.1.4-py3-none-any.whl (803 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.6/803.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.10.0-py3-none-any.whl (225 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.1/225.1 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.14 (from langchain)
  Downloading langchain_community-0.0.16-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1.16 (from langchain)
  Downloading langchain_core-0.1.16-py3-none-any.whl (230 kB)
[2

In [None]:
!pip install unstructured

Collecting unstructured
  Downloading unstructured-0.12.2-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting filetype (from unstructured)
  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)
Collecting emoji (from unstructured)
  Downloading emoji-2.10.0-py2.py3-none-any.whl (457 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m457.9/457.9 kB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
Collecting python-iso639 (from unstructured)
  Downloading python_iso639-2024.1.2-py3-none-any.whl (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.7/274.7 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langdetect (from unstructured)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

## Comprehensive Explanation of Components:
1. Model I/O: Interface with language models. It consists of Prompts, Models, and Output parsers
2. Data connection: Interface with application-specific data sources with data transformers, text splitters, vector stores, and retrievers
3. Chains: Construct a sequence of calls with other components of the AI application. some examples of chains are sequential chains, summarization chain, and Retrieval Q&A Chains
4. Agents: LangChain provides Agents which allow applications to utilize a dynamic chain of calls to various tools, including LLMs, based on user input.
5. Memory: Persist application state between runs of a chain
6. Callbacks: Log and stream steps of sequential chains in order to run the chains efficiently and monitor the resources consumption

# Load the directory

In [None]:
from langchain_community.document_loaders import DirectoryLoader

In [None]:
loader = DirectoryLoader('/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/LLM_tut/breakdown')

In [None]:
docs = loader.load()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
len(docs)

8

# Splitter

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

In [None]:
documents = text_splitter.split_documents(docs)

In [None]:
documents

[Document(page_content="Person: Hi, I'm stranded on the side of the road. I just realized I have a flat tire.", metadata={'source': '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/LLM_tut/breakdown/ FlatTire.txt'}),
 Document(page_content="Assistant: I'm sorry to hear that. First things first, make sure you're in a safe location. Do you", metadata={'source': '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/LLM_tut/breakdown/ FlatTire.txt'}),
 Document(page_content='location. Do you have a spare tire and a jack in your car?', metadata={'source': '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/LLM_tut/breakdown/ FlatTire.txt'}),
 Document(page_content='Person: Yes, I do.', metadata={'source': '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/LLM_tut/breakdown/ FlatTire.txt'}),
 Document(page_content='Assistant: Excellent! Put your car in park, use the jack to lift it, remove the flat tire, and', metadata={'source': '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/LLM

# Create Embeddings

In [None]:
!pip install -U langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.0.5-py3-none-any.whl (29 kB)
Collecting tiktoken<0.6.0,>=0.5.2 (from langchain-openai)
  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken, langchain-openai
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.[0m[31m
[0mSuccessfully installed langchain-openai-0.0.5 tiktoken-0.5.2


In [None]:
from langchain_openai import OpenAIEmbeddings

In [None]:
# from langchain.embeddings.openai import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=OPENAI_API_KEY
)

# Vector DB

In [None]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.4.22-py3-none-any.whl (509 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m509.0/509.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.109.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)
  Downloading uvicorn-0.27.0-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.3.3-py2.

In [None]:
from langchain_community.vectorstores import Chroma

In [None]:
db = Chroma.from_documents(documents, embeddings)

# Similarity Search

In [None]:
query = "What to do in case of flat tire?"
docs = db.similarity_search(query)
print(docs[0].page_content)

Person: Hi, I'm stranded on the side of the road. I just realized I have a flat tire.


In [None]:
query = "What to do when there is a squeaking sound form break?"
docs = db.similarity_search(query)
print(docs[0].page_content)

Person: My brakes are making a high-pitched squeaking sound whenever I press them.


# Q & A

In [None]:
from langchain_openai import ChatOpenAI

chatmodel = ChatOpenAI(
                  openai_api_key=OPENAI_API_KEY,
                  model = 'gpt-3.5-turbo',
                  temperature=0
                  )

In [None]:
from langchain.chains.question_answering import load_qa_chain

In [None]:
chain = load_qa_chain(llm=chatmodel, chain_type="map_reduce")

# Query

In [None]:
# write your query and perform similarity search to generate an answer
query = "What to do in case of flat tire?"

matching_docs = db.similarity_search(query)

answer =  chain.run(input_documents=matching_docs, question=query)

  warn_deprecated(


In [None]:
answer

'In case of a flat tire, you should replace it with the spare tire. Tighten the lug nuts securely and you should be able to continue driving.'

In [None]:
# write your query and perform similarity search to generate an answer
query = "The temperature gauge is in the red, what should be done?"

matching_docs = db.similarity_search(query)

answer =  chain.run(input_documents=matching_docs, question=query)

In [None]:
answer

'You should immediately pull over and turn off the engine to prevent any further damage. Check the coolant level and add coolant if needed. If the problem persists, it is recommended to seek professional assistance.'