In [1]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from typing import List, Dict, Any
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel
from langchain.pydantic_v1 import BaseModel
from langchain_core.output_parsers import StrOutputParser
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
def load_data(file_path: str) -> List[Document]:
    loader = CSVLoader(file_path=file_path)
    documents = loader.load()
    return documents# Exemple de création d'une instance de QuestionRequest


file_path_input = input("Entrez le chemin du fichier CSV : ")

# Utilisation du file_path de l'instance QuestionRequest
data = load_data(file_path_input)
data[:5]

[Document(page_content='Categorie;Nom;Marque;Caracteristiques;Prix en euro;QuantitÃ© disponible: electromenager;robot culinaire;kitchenaid;capacitÃ© 15 500w;89.99;15', metadata={'source': 'D:/Projet/AskyourCSV/data_processed.csv', 'row': 0}),
 Document(page_content='Categorie;Nom;Marque;Caracteristiques;Prix en euro;QuantitÃ© disponible: electromenager;machine cafÃ©;philips;programmable 12 1000w;79.99;20', metadata={'source': 'D:/Projet/AskyourCSV/data_processed.csv', 'row': 1}),
 Document(page_content='Categorie;Nom;Marque;Caracteristiques;Prix en euro;QuantitÃ© disponible: electromenager;aspirateur sac;dyson;puissance 1800w capacitÃ© 2l;129.99;10', metadata={'source': 'D:/Projet/AskyourCSV/data_processed.csv', 'row': 2}),
 Document(page_content='Categorie;Nom;Marque;Caracteristiques;Prix en euro;QuantitÃ© disponible: electromenager;lavevaisselle;bosch;12 couverts classe;399.99;5', metadata={'source': 'D:/Projet/AskyourCSV/data_processed.csv', 'row': 3}),
 Document(page_content='Categ

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(data)

In [5]:
vectorstore = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings(model="text-embedding-ada-002"),
)

In [6]:
retriever = RunnableLambda(vectorstore.similarity_search).bind(k=5)

In [7]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.6)

In [8]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [9]:
prompt = ChatPromptTemplate.from_template(template)

In [10]:
chain = (
  # Extract the question text from the QuestionRequest object
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt
    | llm
    | StrOutputParser()
)

In [11]:
# def extract_input(question_request: QuestionRequest) -> Dict[str, str]:
#     return {"question": question_request.question}

In [12]:
class Question(BaseModel):
    __root__: str

chain = chain.with_types(input_type=Question)

result = chain.invoke("Quel est le téléphone le moins cher")
print(result)

Le Xiaomi Mi 10 Lite est le téléphone le moins cher avec un prix de 299.0 euros.
