In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [3]:
file = 'wine_catalogue.csv'
loader = CSVLoader(file_path=file)

In [14]:
from langchain.indexes import VectorstoreIndexCreator

In [15]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [16]:
query = "Can you list all wines that cost between $5 and $6 \
    in a table in markdown and summarize each one?"

In [17]:
response = index.query(query)

In [18]:
display(Markdown(response))

 No, I don't know how to do that.

In [19]:
loader = CSVLoader(file_path=file)

In [20]:
docs = loader.load()

In [21]:
docs[0]

Document(page_content=': 0\ntitle: Fragolino Rosso Frizzante - Bottega\ntype: wine-based beverage\nkind: fragolino\ncountry: mild - doux - sweet - dolce\nregion: Italy\nprice_eur: 5.97\nvolume_lit: 0.75\norder_number: 70601-IT-RW-001', metadata={'source': 'wine_catalogue.csv', 'row': 0})

In [56]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [57]:
embed = embeddings.embed_query("Hi my name is Mustafa")

In [58]:
print(len(embed))

1536


In [59]:
print(embed[:5])

[-0.03693114593625069, -0.013942343182861805, -0.024672165513038635, -0.04150577634572983, -0.02076573856174946]


In [60]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [61]:
query = "Please suggest a wine that include 'Aimery' in name"

In [62]:
docs = db.similarity_search(query)

In [63]:
len(docs)

4

In [64]:
docs[0]

Document(page_content=": 5000\ntitle: Fleur de d'Artagnan Blanc CÃ´tes de Gascogne 2020 - Plaimont\ntype: white wine\nkind: dry\ncountry: France\nregion: Gascogne (FR)\nprice_eur: 4.88\nvolume_lit: 0.75\norder_number: 70011-FR-WW-BL-005-12", metadata={'source': 'wine_catalogue.csv', 'row': 5000})

In [65]:
retriever = db.as_retriever()

In [66]:
llm = ChatOpenAI(temperature = 0.0)


In [67]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [72]:
response = llm.call_as_llm(f"{qdocs} Question: Can you list all wines that include 'Blanc' in name \
    in a table in markdown and summarize each one?") 


In [73]:
display(Markdown(response))

| Title | Type | Kind | Country | Region | Price (EUR) | Volume (L) | Order Number |
|-------|------|------|---------|--------|-------------|------------|--------------|
| Fleur de d'Artagnan Blanc Côtes de Gascogne 2020 - Plaimont | White wine | Dry | France | Gascogne (FR) | 4.88 | 0.75 | 70011-FR-WW-BL-005-12: 6001 |
| Grauer Burgunder eins zu eins Kabinett trocken 2021 - A. Diehl | White wine | Dry | Germany | Palatinate | 7.49 | 0.75 | 70126-DE-WW-PG-002-12: 65 |
| Grauer Burgunder eins zu eins Kabinett trocken 2021 - A. Diehl | White wine | Dry | Germany | Palatinate | 7.49 | 0.75 | 70126-DE-WW-PG-002-12: 7990 |
| Horgelus Blanc Côtes de Gascogne 2021 - Domaine Horgelus | White wine | Dry | France | Gascogne (FR) | 6.53 | 0.75 | 70217-FR-WW-BL-001-11 |

- Fleur de d'Artagnan Blanc Côtes de Gascogne 2020 - Plaimont: A dry white wine from France's Gascogne region with a price of 4.88 EUR per 0.75 L bottle.
- Grauer Burgunder eins zu eins Kabinett trocken 2021 - A. Diehl: A dry white wine from Germany's Palatinate region with a price of 7.49 EUR per 0.75 L bottle. There are two orders with different order numbers.
- Horgelus Blanc Côtes de Gascogne 2021 - Domaine Horgelus: A dry white wine from France's Gascogne region with a price of 6.53 EUR per 0.75 L bottle.

In [74]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [76]:
query =  "Can you list all wines that cost between €5 and €6 \
    in a table in markdown and summarize each one?"

In [77]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [78]:
display(Markdown(response))

| Order Number | Title | Type | Kind | Country | Region | Price (EUR) | Volume (L) |
|--------------|-------|------|------|---------|--------|-------------|------------|
| 70011-FR-WW-BL-005-12 | Fleur de d'Artagnan Blanc CÃ´tes de Gascogne 2020 - Plaimont | White Wine | Dry | France | Gascogne (FR) | 4.88 | 0.75 |
| 70217-FR-WW-BL-001-11 | Horgelus Blanc CÃ´tes de Gascogne 2021 - Domaine Horgelus | White Wine | Dry | France | Gascogne (FR) | 6.53 | 0.75 |

- Fleur de d'Artagnan Blanc CÃ´tes de Gascogne 2020 - Plaimont: This is a white wine from France, specifically from the Gascogne region. It is a dry wine that costs €4.88 and comes in a 0.75 L bottle.
- Horgelus Blanc CÃ´tes de Gascogne 2021 - Domaine Horgelus: This is another white wine from France, also from the Gascogne region. It is a dry wine that costs €6.53 and comes in a 0.75 L bottle.

In [79]:
response = index.query(query, llm=llm)

In [80]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])