In [18]:
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document


# Charger les guides depuis un fichier JSON
def load_guides(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        guides = json.load(f)
    return guides


# Charger les posts depuis un fichier JSON
def load_posts(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        posts = json.load(f)
    return posts


# Convertir les posts et guides en vecteurs et cr√©er un retriever LangChain
def index_data_embeddings(
    posts, guides, model_name="sentence-transformers/all-MiniLM-L6-v2"
):
    # Construire les textes et les objets Document pour les posts
    documents = []
    for p in posts:
        text_comments = ""
        for comment in p["comments"]:
            text_comments += comment + "\n"
        documents.append(
            Document(
                page_content=f"{p['titre']} - {p['contenu']}",
                metadata={
                    "comments": text_comments,
                    "url": p["url"],
                    "titre": p["titre"],
                    "contenu": p["contenu"],
                },
            )
        )

    # Construire les textes et les objets Document pour les guides
    for g in guides:
        documents.append(
            Document(
                page_content=f"{g['dataType']} - {g['type']} {g['subject']} : {g['title']} {(g['url'])}",
                metadata={
                    "dataType": g["dataType"],
                    "type": g["type"],
                    "subject": g["subject"],
                    "title": g["title"],
                    "category": g["category"],
                    "summary": g["summary"],
                    "url": g["url"],
                    "guideid": g["guideid"],
                },
            )
        )

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents)

    # Cr√©er des embeddings avec LangChain
    embedding_model = HuggingFaceEmbeddings(model_name=model_name)
    vector_store = FAISS.from_documents(splits, embedding_model)

    return vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 2, "score_threshold": 0.3},
    )

In [19]:
posts = load_posts("./data/techsupport_posts.json")

guides = load_guides("./data/guides.json")

retriever = index_data_embeddings(posts, guides)

In [20]:
from dotenv import load_dotenv
import os

# Charger les variables d'environnement
load_dotenv()
OPENAI_KEY = os.getenv("OPENAI_KEY")

In [21]:
import ollama
from langchain_core.language_models import LLM
from typing import List


class OllamaLLM(LLM):
    model: str = "mistral"

    def _call(self, prompt: str, stop: List[str] = None) -> str:
        response = ollama.chat(
            model=self.model, messages=[{"role": "user", "content": prompt}]
        )
        return response["message"]["content"]

    @property
    def _identifying_params(self) -> dict:
        return {"model": self.model}

    @property
    def _llm_type(self) -> str:
        return "ollama"

In [22]:
from langchain.prompts import PromptTemplate

# Le prompt pour le mod√®le
final_prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
L'utilisateur pose la question suivante :

‚û°Ô∏è {question}

Tu disposes uniquement des documents suivants : des guides techniques et des posts Reddit pertinents.
Ces contenus incluent des descriptions g√©n√©rales, des conseils pratiques, des solutions propos√©es par la communaut√©, et parfois des instructions techniques d√©taill√©es.

üéØ Ta mission :

    Base strictement ta r√©ponse sur les informations pr√©sentes dans les documents fournis ci-dessous ({context}).

    N'utilise aucune connaissance ext√©rieure. Si une information n‚Äôest pas pr√©sente, indique-le explicitement.

    Fournis une r√©ponse structur√©e, professionnelle et en fran√ßais.

üìö Sources disponibles :

{context}

üõ† Format de r√©ponse attendu :

üîç Analyse du probl√®me :

[Pr√©sente une synth√®se du probl√®me pos√©, uniquement en te basant sur les documents.]

‚úÖ V√©rifications pr√©alables recommand√©es :

[Liste les √©l√©ments √† inspecter ou tester avant toute manipulation, tels que sugg√©r√©s dans les documents.]

üìù Proc√©dure d√©taill√©e propos√©e :

[Structure la proc√©dure √©tape par √©tape : ‚Äú√âtape 1‚Äù, ‚Äú√âtape 2‚Äù‚Ä¶ en t‚Äôappuyant sur les guides ou les conseils Reddit.]

üí° Conseils ou pr√©cautions √† prendre :

[Ajoute ici uniquement les recommandations explicitement mentionn√©es dans les documents.]

üîó Sources consult√©es :

[Liste les URL des documents (guides ou posts Reddit) utilis√©s pour construire la r√©ponse, selon les m√©tadonn√©es disponibles.]

üìå Important :
Tu dois strictement t'appuyer sur les contenus fournis dans {context}.
Aucune inf√©rence ou ajout personnel n‚Äôest autoris√©. Si la r√©ponse n‚Äôest pas d√©ductible des documents, indique-le clairement.
""",
)

In [23]:
import requests


def get_guide_steps(guideid):
    url = f"https://www.ifixit.com/api/2.0/guides/{guideid}"
    response = requests.get(url)

    if response.status_code != 200:
        return {
            "error": f"√âchec de r√©cup√©ration du guide {guideid}, code: {response.status_code}"
        }

    data = response.json()
    steps = []

    cpt_steps = 0

    for step in data.get("steps", []):
        cpt_steps += 1
        step_texts = [
            line["text_rendered"]
            for line in step.get("lines", [])
            if "text_rendered" in line
        ]
        steps.append({"stepno": cpt_steps, "text": step_texts})

    return steps

In [24]:
def format_documents(docs):
    formatted_docs = []

    for doc in docs:
        guide_id = doc.metadata.get("guideid")
        if guide_id:
            guide_steps = get_guide_steps(guide_id)
            print(len(guide_steps))
            guide_infos = ""
            for guide in guide_steps:
                step_text = "\n".join(guide["text"])
                guide_infos += "\n" + f"Step {guide['stepno']}:\n" + step_text
            if guide_infos not in doc.page_content:
                doc.page_content += guide_infos

        metadata_text = "\n".join(
            f"{key}: {value}" for key, value in doc.metadata.items()
        )

        formatted_doc = f"""---\nüìÑ **Contenu** :\n{doc.page_content}\n\nüîñ **M√©tadonn√©es** :\n{metadata_text}\n"""
        formatted_docs.append(formatted_doc)

    print("\n\n".join(formatted_docs))
    return "\n\n".join(formatted_docs)

In [25]:
from langchain_openai import ChatOpenAI

# llm = OllamaLLM()

llm = ChatOpenAI(openai_api_key=OPENAI_KEY, model="gpt-4.1", temperature=0.0)

In [26]:
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough


# Cha√Æne RAG
def create_rag_chain(retriever):
    return (
        {
            "context": retriever | format_documents,
            "question": RunnablePassthrough(),
        }
        | final_prompt_template
        | llm
        | StrOutputParser()
    )

# Multi Query

In [27]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
Ta t√¢che est de g√©n√©rer cinq reformulations diff√©rentes de la question pos√©e par l‚Äôutilisateur afin de retrouver des documents pertinents dans une base de donn√©es vectorielle.
En proposant plusieurs perspectives sur la question, ton objectif est d‚Äôaider l‚Äôutilisateur √† surmonter certaines limites de la recherche par similarit√© bas√©e sur la distance.
Fournis uniquement ces questions alternatives, en ajoutant la question originale traduite, chacune s√©par√©e par un saut de ligne.
R√©pond en **Anglais**
Question initiale : {question}
""",
)

In [28]:
from json import dumps, loads


def get_unique_union(documents: list[list[Document]]) -> list[Document]:
    """Renvoie une liste de documents uniques √† partir d'une liste de listes de documents."""
    # Aplatir la liste
    flattened_docs = [
        dumps(doc.__dict__, sort_keys=True) for sublist in documents for doc in sublist
    ]
    # Supprimer les doublons
    unique_docs = list(set(flattened_docs))
    # Reconvertir en objets Document
    return [Document(**loads(doc)) for doc in unique_docs]

In [29]:
generate_queries = prompt_template | llm | StrOutputParser() | (lambda x: x.split("\n"))

In [30]:
retrieval_chain = generate_queries | retriever.map() | get_unique_union

In [31]:
question = "Pourquoi mon PC ne d√©marre pas ?"
queries = generate_queries.invoke(question)
print("üîç Queries g√©n√©r√©es :", queries)

üîç Queries g√©n√©r√©es : ["Why won't my computer turn on?", '', 'What could be causing my PC to fail to start up?', '', "What are the possible reasons my computer isn't powering up?", '', "How can I troubleshoot a PC that doesn't boot?", '', "What should I do if my computer won't start?"]


In [32]:
question = "Pourquoi mon PC ne d√©marre pas ?"
docs = retrieval_chain.invoke(question)
print("üîç Nombre de documents r√©cup√©r√©s :", len(docs))

  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3


üîç Nombre de documents r√©cup√©r√©s : 7


In [38]:
rag_chain = create_rag_chain(retrieval_chain)
response = rag_chain.invoke(question)
print(response)

  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3
No relevant docs were retrieved using the relevance score threshold 0.3


---
üìÑ **Contenu** :
After this, all was fine until, my next technical issue.

&#x200B;

What's happening is now when I press the power button sometimes my computer sometimes turns on and sometimes doesn't.

&#x200B;

When it doesn't turn on the power button lights up as if it was turning on, so does the power supply, and looking inside it seems everything is moving and working it just doesn't turn on my keyboard, mouse and doesn't display anything on my screen.

&#x200B;

When it doesn't turn on it takes a bit of faffing around such as turning the computer back off again by the power button, turning it on again, turning it off, switching the plugs off, switching plugs on, and then turning the computer on. Even sometimes I have to turn everything off, open the computer up and make sure everything is connected correctly and it will work, although when opening the computer everything seems connected well enough.

&#x200B;

üîñ **M√©tadonn√©es** :
comments: It sounds like they did not 