In [3]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [60]:
from langchain_mistralai import MistralAIEmbeddings, ChatMistralAI
import getpass
import os

if not os.environ.get("MISTRAL_API_KEY"):
  os.environ["MISTRAL_API_KEY"] = getpass.getpass(
      "Enter API key for Mistral AI: ")


llm = ChatMistralAI(model="open-mistral-nemo")


embeddings = MistralAIEmbeddings(model="mistral-embed")



In [59]:
from langchain_huggingface import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="microsoft/Phi-3-mini-4k-instruct",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 100,
        "top_k": 50,
        "temperature": 0.1,
    },
)
llm.invoke("Hugging Face is")

Downloading shards: 100%|██████████| 2/2 [03:01<00:00, 90.78s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.31s/it]
Device set to use cuda:0


'Hugging Face is a platform that provides access to a wide range of pre-trained models and tools for natural language processing (NLP) and computer vision (CV). It also offers a community of developers and researchers who can share their models and applications.\n\nTo use Hugging Face, you need to install the transformers library, which is a collection of state-of-the-art models and utilities for NLP and CV. You can install it using pip:\n\n```\n'

In [26]:
from langchain_chroma import Chroma

vector_store = Chroma(embedding_function=embeddings)

In [28]:
from langchain import hub
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict, Annotated
from typing import List

from langchain_community.document_loaders import PyPDFLoader

file_path = "./data/Plan_type_mémoire_d_étude.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
# Index chunks
ids = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

# Desired schema for response
class AnswerWithSources(TypedDict):
    """An answer to the question, with sources."""

    answer: str
    sources: Annotated[
        List[str],
        ...,
        "List of sources (author + year) used to answer the question",
    ]
    
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: AnswerWithSources


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke(
        {"question": state["question"], "context": docs_content})
    structured_llm = llm.with_structured_output(AnswerWithSources)
    response = structured_llm.invoke(messages)
    return {"answer": response}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()



In [56]:
import json

response = graph.invoke({"question": "Peux tu m'expliquer la démarche projet"})
print(json.dumps(response["answer"], indent=2))

{
  "answer": "La d\u00e9marche projet est une m\u00e9thode utilis\u00e9e pour atteindre les objectifs d'un projet. Elle comprend les activit\u00e9s d'ing\u00e9nierie logicielle, de donn\u00e9es et de ML, ainsi que la m\u00e9thode de gestion de projet utilis\u00e9e, les r\u00f4les et responsabilit\u00e9s des parties prenantes et les technologies et outils n\u00e9cessaires.",
  "sources": [
    "4. DEMARCHE PROJET 4.1. Principes de la d\u00e9marche projet 4.1.1. Activit\u00e9s d\u2019ing\u00e9nierie logicielle / ing\u00e9nierie de donn\u00e9es / ing\u00e9nierie ML 4.1.2. M\u00e9thode de gestion de projet utilis\u00e9e 4.1.3. R\u00f4les et responsabilit\u00e9s 4.1.4. Technologies et outils"
  ]
}
