# Import Libraries

In [1]:
from langchain.llms.openai import OpenAIChat
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.docarray import DocArrayInMemorySearch
from langchain.chains import RetrievalQA
from langchain.globals import set_debug
from dotenv import load_dotenv

set_debug(True)  # Enable debug mode
load_dotenv()  # Load the variables from .env file

True

# Document Loaders

LangChain provides different document loaders for different file formats. 

In [2]:
file_path = "./OutdoorClothingCatalog_1000.csv"

In [9]:
docs = CSVLoader(file_path=file_path, encoding="utf-8").load()  # Loading the documents

In [10]:
docs

[Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': './OutdoorClothingCatalog_1000.csv', 'row': 0}),
 Document(page_content=': 1\nname: Recycled Waterhog Dog Mat, Chevron Weave\ndescription: Protect your floors from spills 

In [11]:
len(docs)

48

# Document Transformers

`Document Transformers` are used to transform the documents before they are passed to embeddings.

In [12]:
# Splitter to split the data into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)

In [13]:
docs = splitter.split_documents(docs)  # Splitting the data

In [14]:
len(docs)

58

# Embeddings

LangChain provides different embeddings from different services. The most common one is `OpenAIEmbedding`.

In [15]:
embeddings = OpenAIEmbeddings()  # Embeddings model

In [16]:
embeddings.embed_query("What is the best jacket for hiking?")

[0.006429283823747075,
 0.0005334501573959064,
 0.006599856948787365,
 -0.0036738766037586953,
 -0.01373767468299089,
 0.01308162499010887,
 -0.017424672690389154,
 -0.031490370357175924,
 0.010614879336965357,
 -0.028944900007485258,
 -0.004395530986532148,
 0.007472402351141753,
 0.01160551411244689,
 -0.004992535760019955,
 -0.0020944375828181247,
 0.003381934686003935,
 0.031700305364828506,
 0.010792012232502866,
 0.016243783615730544,
 -0.008233419547850066,
 -0.010877299027853652,
 0.013895125938730327,
 0.014879200478053358,
 -0.009506155654017964,
 -0.011651437628201533,
 -0.009453671902104818,
 0.02544159606310557,
 -0.015928879241606537,
 0.02666184655471519,
 -0.011986022710801045,
 0.035216729781525194,
 0.015548370643252383,
 -0.04088499733988653,
 -0.0020271926010564257,
 0.0007618373426434454,
 0.0027062038189851266,
 -0.010601757933325788,
 -0.018579318957768626,
 0.020022626791992966,
 -0.007150938206520528,
 0.027685284373634364,
 -0.0010652601393368666,
 -0.00761017

# Vector Stores

LangChain provides different vector stores for different databases. The most simpler one to use without any setup is using `DocArrayInMemorySearch`

In [17]:
db = DocArrayInMemorySearch.from_documents(
    docs, embeddings
)  # Create the vector store in memory

2023-12-14 15:54:23.720878: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-14 15:54:24.174423: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-14 15:54:24.174459: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-14 15:54:24.176599: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-14 15:54:24.373524: I tensorflow/core/platform/cpu_feature_g

In [18]:
db.similarity_search("waterproof jacket")

[Document(page_content=": 25\nname: Women's PrimaLoft Downpour Jacket\ndescription: Our PrimaLoft hooded insulated jacket is redesigned to keep you warm and lightweight in any weather. It is wind and water resistant and offers two levels of comfort: light activity to 35° and moderate activity to -15°. The updated construction is made with PrimaLoft’s Cross Core technology, which is a fusion of PrimaLoft Gold insulation and Aerogel. This combination provides 15% more warmth without any additional weight. The polyester shell and lining is 100% recycled and machine washable. This jacket features an easy on/off elastic hood, an internal media pocket with audio port, two handpockets, one chest pocket, drawcord hem, and a take-along pocket for easy storage. Explore Cross Core and watch the video for more information.", metadata={'source': './OutdoorClothingCatalog_1000.csv', 'row': 25}),
 Document(page_content=': 11\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\ndescription: This technic

# Retrieval

There are chains in LangChain, that combines the above components to provide a simple interface for retrieval. The most common one is `RetrievalQA`

In [20]:
llm = OpenAIChat(
    temperature=0.0,
)



In [21]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

In [22]:
qa_chain("What is the best jacket for hiking?")

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is the best jacket for hiking?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the best jacket for hiking?",
  "context": ": 8\nname: Mountain Man Fleece Jacket\ndescription: Our best-value fleece jacket is designed with inspiration from our archives and made from 100% recycled polyester for unbeatable comfort and wear-anywhere style. \n\nSize & Fit: Slightly Fitted. Best with lightweight layer. Falls at hip. \n\nWhy We Love It: Our designers took inspiration from the  archives to create this ultrasoft fleece jacket. We love how the heritage styling is updated with a modern, slimming fit. Plus, it’s made from 100% recycled fleece

{'query': 'What is the best jacket for hiking?',
 'result': "The best jacket for hiking would be the Women's PrimaLoft Downpour Jacket."}