In [3]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [11]:
import torch

if torch.cuda.is_available():
    gpu_count = torch.cuda.device_count()
    print(f"Number of GPUs available: {gpu_count}")
    for i in range(gpu_count):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPU available")

Number of GPUs available: 2
GPU 0: NVIDIA RTX A4000
GPU 1: NVIDIA RTX A4000


In [None]:
from langchain_mistralai import MistralAIEmbeddings, ChatMistralAI
import getpass
import os
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline


llm = HuggingFacePipeline.from_model_id(
    model_id="microsoft/Phi-3-mini-4k-instruct",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 100,
        "top_k": 50,
        "temperature": 0.1,
    },
)

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2")

Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.46s/it]


TypeError: '<' not supported between instances of 'str' and 'int'

In [59]:
from langchain_huggingface import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="microsoft/Phi-3-mini-4k-instruct",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 100,
        "top_k": 50,
        "temperature": 0.1,
    },
)


Downloading shards: 100%|██████████| 2/2 [03:01<00:00, 90.78s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.31s/it]
Device set to use cuda:0


'Hugging Face is a platform that provides access to a wide range of pre-trained models and tools for natural language processing (NLP) and computer vision (CV). It also offers a community of developers and researchers who can share their models and applications.\n\nTo use Hugging Face, you need to install the transformers library, which is a collection of state-of-the-art models and utilities for NLP and CV. You can install it using pip:\n\n```\n'

In [2]:
from langchain_chroma import Chroma

vector_store = Chroma(embedding_function=embeddings)

In [5]:
from langchain import hub
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict, Annotated
from typing import List

from langchain_community.document_loaders import PyPDFLoader

file_path = "./data/Plan_type_mémoire_d_étude.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
# Index chunks
ids = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

# Desired schema for response
class AnswerWithSources(TypedDict):
    """An answer to the question, with sources."""

    answer: str
    sources: Annotated[
        List[str],
        ...,
        "List of sources (author + year) used to answer the question",
    ]
    
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: AnswerWithSources


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke(
        {"question": state["question"], "context": docs_content})
    structured_llm = llm.with_structured_output(AnswerWithSources)
    response = structured_llm.invoke(messages)
    return {"answer": response}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacity of 15.70 GiB of which 66.62 MiB is free. Including non-PyTorch memory, this process has 14.95 GiB memory in use. Of the allocated memory 14.71 GiB is allocated by PyTorch, and 51.79 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [56]:
import json

response = graph.invoke({"question": "Peux tu m'expliquer la démarche projet"})
print(json.dumps(response["answer"], indent=2))

{
  "answer": "La d\u00e9marche projet est une m\u00e9thode utilis\u00e9e pour atteindre les objectifs d'un projet. Elle comprend les activit\u00e9s d'ing\u00e9nierie logicielle, de donn\u00e9es et de ML, ainsi que la m\u00e9thode de gestion de projet utilis\u00e9e, les r\u00f4les et responsabilit\u00e9s des parties prenantes et les technologies et outils n\u00e9cessaires.",
  "sources": [
    "4. DEMARCHE PROJET 4.1. Principes de la d\u00e9marche projet 4.1.1. Activit\u00e9s d\u2019ing\u00e9nierie logicielle / ing\u00e9nierie de donn\u00e9es / ing\u00e9nierie ML 4.1.2. M\u00e9thode de gestion de projet utilis\u00e9e 4.1.3. R\u00f4les et responsabilit\u00e9s 4.1.4. Technologies et outils"
  ]
}
