In [41]:
# Importation des bibliothèques nécessaires
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
# from langchain_ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import Ollama

In [42]:
# Étape 1 : Chargement des données
def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f]
    # Extraire les descriptions
    texts = []
    for item in data:
        if 'description' in item:
            description = item['description']
            if isinstance(description, list):  # Si c'est une liste, concaténer les éléments
                description = ' '.join(description)
            if isinstance(description, str) and description.strip():  # Vérifier que ce n'est pas vide
                texts.append(description.strip())
    return texts

# Charger les descriptions des produits
data = load_data('meta.jsonl')

print(f"Nombre total de descriptions chargées : {len(data)}")
for desc in data[:3]:
    print(f"- {desc}\n")


# # Étape 1 : Chargement des données
# # Chemin vers le fichier meta.jsonl
# meta_file_path = "meta.jsonl"

# # Charger uniquement les descriptions dans une liste
# descriptions = []
# with open(meta_file_path, 'r') as file:
#     for line in file:
#         data = json.loads(line)
#         if 'description' in data and data['description']:  # Vérifier la présence de la clé description
#             descriptions.append(data['description'])

# print(f"Nombre total de descriptions chargées : {len(descriptions)}")
# print("Exemple de descriptions :")
# for desc in descriptions[:3]:
#     print(f"- {desc}\n")

Nombre total de descriptions chargées : 448
- JUST LOOK, You can tell the difference. Make everyday more convenient, it is slim but has big rooms. If you are looking for a rich and luxurious appearance, look no further. These double shoulders are the perfect leather for creating attractive finished belts, straps and wallets. It doesn't only show the perfect weight for accessories where rugged durability is needed but also has a natural finish and coarse grain.

- Case does not need to be removed for charging. Camera opening allows unobstructed use of camera and flash. DandyCase proudly presents the premium "PERFECT PATTERN" from the line of stylish cases that will make your friends jealous! Stand out from the rest and show off your Apple iPhone 6 Plus with these one of a kind cases. These cases are made of a durable, yet slightly flexible, TPU material that won't stretch or tear and the patterns wont chip, fade, or peel. DandyCase PERFECT PATTERN cases come with a lifetime warranty aga

In [43]:
# Étape 2 : Prétraitement et segmentation des textes
def preprocess_texts(texts, chunk_size=512, chunk_overlap=128):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks = []
    for text in texts:
        if isinstance(text, str): # Vérifier que la description est une chaîne
            chunks.extend(splitter.split_text(text))

    print(f"Nombre total de chunks créés : {len(chunks)}")
    return chunks

chunks = preprocess_texts(data)
for desc in chunks[:3]:
    print(f"- {desc}\n")



# chunk_size = 512
# chunk_overlap = 128

# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=chunk_size, chunk_overlap=chunk_overlap
# )

# def preprocess_data(data):
#     """Divise les descriptions longues en morceaux."""
#     all_chunks = []
#     for item in data:
#         description = item.get("description", "")
#         if description:
#             chunks = text_splitter.split_text(description)
#             all_chunks.extend(chunks)
#     print(f"Nombre total de chunks créés : {len(all_chunks)}")
#     return all_chunks

# chunks = preprocess_data(data)


Nombre total de chunks créés : 969
- JUST LOOK, You can tell the difference. Make everyday more convenient, it is slim but has big rooms. If you are looking for a rich and luxurious appearance, look no further. These double shoulders are the perfect leather for creating attractive finished belts, straps and wallets. It doesn't only show the perfect weight for accessories where rugged durability is needed but also has a natural finish and coarse grain.

- Case does not need to be removed for charging. Camera opening allows unobstructed use of camera and flash. DandyCase proudly presents the premium "PERFECT PATTERN" from the line of stylish cases that will make your friends jealous! Stand out from the rest and show off your Apple iPhone 6 Plus with these one of a kind cases. These cases are made of a durable, yet slightly flexible, TPU material that won't stretch or tear and the patterns wont chip, fade, or peel. DandyCase PERFECT PATTERN cases come with a

- TPU material that won't str

In [None]:
# Étape 3 : Création d'un index vectoriel
embeddings = OllamaEmbeddings(model="llama3.2:latest")  # Utilisation des embeddings Ollama
vector_db = Chroma(persist_directory="vector_store", embedding_function=embeddings)

# Ajout des chunks à la base vectorielle
vector_db.add_texts(chunks)
vector_db.persist()
print("Base de données vectorielle créée et sauvegardée.")

In [38]:
# Étape 4 : Création d'un système de récupération (retrieval system)
retriever = vector_db.as_retriever()

In [39]:
# Étape 5 : Conception de la chaîne RAG
llm = Ollama(model="llama-3.2")  # Utilisation du modèle Ollama LLM
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

  llm = Ollama(model="llama-3.2")  # Utilisation du modèle Ollama LLM


In [40]:
# Étape 6 : Exécution de requêtes utilisateur
def query_rag(question):
    """Exécute une requête utilisateur dans la chaîne RAG."""
    response = qa_chain.run(question)
    print(f"Question : {question}\nRéponse : {response}\n")

# Exemple de requêtes utilisateur
query_rag("Tell me about OnePlus 6T")
query_rag("What are the features of the latest smartphones?")

  response = qa_chain.run(question)


OllamaEndpointNotFoundError: Ollama call failed with status code 404. Maybe your model is not found and you should pull the model with `ollama pull llama-3.2`.