In [2]:
!pip install faiss-cpu langchain langchain-community langchain-openai pandas python-dotenv

Collecting faiss-cpu
  Using cached faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_x86_64.whl.metadata (4.8 kB)
Collecting langchain
  Using cached langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-community
  Using cached langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting langchain-openai
  Using cached langchain_openai-0.3.16-py3-none-any.whl.metadata (2.3 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl.metadata (89 kB)
Collecting python-dotenv
  Using cached python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting numpy<3.0,>=1.25.0 (from faiss-cpu)
  Using cached numpy-2.2.5-cp313-cp313-macosx_14_0_x86_64.whl.metadata (62 kB)
Collecting packaging (from faiss-cpu)
  Using cached packaging-25.0-py3-none-any.whl.metadata (3.3 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Using cached langchain_core-0.3.59-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<1.0.0,>=

In [4]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from pathlib import Path
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
import os
from dotenv import load_dotenv

# Load environment variables from a .env file
load_dotenv()

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

llm = ChatOpenAI(model="gpt-4o-mini")

In [10]:
import pandas as pd

file_path = ('data/customers-100.csv') # insert the path of the csv file
data = pd.read_csv(file_path)

#preview the csv file
data.head()

Unnamed: 0,Index,Customer Id,First Name,Last Name,Company,City,Country,Phone 1,Phone 2,Email,Subscription Date,Website
0,1,DD37Cf93aecA6Dc,Sheryl,Baxter,Rasmussen Group,East Leonard,Chile,229.077.5154,397.884.0519x718,zunigavanessa@smith.info,2020-08-24,http://www.stephenson.com/
1,2,1Ef7b82A4CAAD10,Preston,Lozano,Vega-Gentry,East Jimmychester,Djibouti,5153435776,686-620-1820x944,vmata@colon.com,2021-04-23,http://www.hobbs.com/
2,3,6F94879bDAfE5a6,Roy,Berry,Murillo-Perry,Isabelborough,Antigua and Barbuda,+1-539-402-0259,(496)978-3969x58947,beckycarr@hogan.com,2020-03-25,http://www.lawrence.com/
3,4,5Cef8BFA16c5e3c,Linda,Olsen,"Dominguez, Mcmillan and Donovan",Bensonview,Dominican Republic,001-808-617-6467x12895,+1-813-324-8756,stanleyblackwell@benson.org,2020-06-02,http://www.good-lyons.com/
4,5,053d585Ab6b3159,Joanna,Bender,"Martin, Lang and Andrade",West Priscilla,Slovakia (Slovak Republic),001-234-203-0635x76146,001-199-446-3860x3486,colinalvarado@miles.net,2021-04-17,https://goodwin-ingram.com/


In [11]:
loader = CSVLoader(file_path=file_path)
docs = loader.load_and_split()

In [15]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings()
index = faiss.IndexFlatL2(len(OpenAIEmbeddings().embed_query(" ")))
vector_store = FAISS(
    embedding_function=OpenAIEmbeddings(),
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [18]:
vector_store.add_documents(documents=docs)

['bf632dc4-16d8-4688-8f0a-b4461d0fa59e',
 '580bbfa3-2f9f-47d9-a2d6-ccdfed43958f',
 '0ace4771-c4fc-45e0-8e29-ea2d7aa8a95f',
 '70ddb807-ae27-490d-88b0-b3e18656e091',
 'bbeedd93-da8d-493b-9ce0-ea49da15538c',
 '9a581c70-693c-48b6-a71e-84f2a7332858',
 '521087e2-15cf-409c-bc68-df4627148117',
 '762ade53-787a-418b-801d-292e67e4417f',
 '9d723532-a141-44b6-acfe-ccc72a305d49',
 'd8a0a8a3-65bb-4742-bfc6-d640cdd567b8',
 'bcf012d4-1887-4739-b0bf-f308bb157c53',
 '8449190c-f0f5-4299-8bc0-093d4223808b',
 '5e74a3d0-40aa-4784-873e-1eb4527479ce',
 'e5c34a62-5e33-423c-8b14-47e485b64bee',
 '9f29dec3-e38c-45e0-b7b2-54087a2ccca6',
 '43278ce9-4caa-417b-bda0-da44cb84b895',
 '2b8ce990-15ad-43d3-9c15-9e454721fe0c',
 '354340f5-4a27-4844-a7f2-dfeb99150bae',
 'ed59bdae-a8d8-4a5d-8380-838e73d1fe8b',
 '2ec47f18-c20f-4074-ab03-247b18464881',
 'e7c9335c-1e03-4e6d-8057-a5c044802a2d',
 'fe1a2abb-d011-4950-907d-2fdbacc92881',
 'dda5d5fb-53d9-4e76-a086-f7cddcb2543e',
 '87135b8a-485a-4ea5-b0a6-c3475d642eab',
 'aaef7060-f1b3-

In [19]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

retriever = vector_store.as_retriever()

# Set up system prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
    
])

# Create the question-answer chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [27]:
answer= rag_chain.invoke({"input": "List down the first name of employees work in Murillo-Perry"})
answer['answer']

'The first name of the employee working at Murillo-Perry is Roy.'