In [7]:
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader
from llama_index.llms.groq import Groq
from dotenv import load_dotenv
from llama_index.embeddings.gemini import GeminiEmbedding
from pincone import Pincone
from llama_parse import LlamaParse
from llama_index.core.node_parser import SentenceSplitter
from llama_index.ingestion import IngestionPipeline
from llama_index.vector_stores import PinconeVectorStore
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStroreIndex
import nest_asyncio
import os

nest_asyncio.apply()
load_dotenv()
parser  = LlamaParse(api_key=os.getenv("LLAMA_CLOUD-API_KEY"), result_type="markdown", verbose=True)
file_extractor = {".pdf":parser}
documents = SimpleDirectoryReader(input_files=["CV DZEUANG Martine.pdf"], file_extractor=file_extractor.load_data())
llm = Groq("llama-3.1-70b-versatile",api_key=os.getenv("GROQ_API_KEY"))
embedding_model = GeminiEmbedding(model= "model/embedding-001", api_key=os.getenv("GEMINI_API_KEY"))

Settings.llm = llm
Settings.embed_model = embedding_model
Settings.chunk_size = 1024

pinecone_client = Pincone(api_key=os.getenv("PINECONE_API_KEY"))
pinecone_index = pinecone_client.Index("llama-index-demo")
vector_store = PinconeVectorStore(pinecone_index=pinecone_index)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=25)
        ], 
        index=vector_store)

pipeline.run(documents=documents)

index = VectorStroreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)

response = query_engine.query("How would you describe this candidate?")
print(response)

ModuleNotFoundError: No module named 'llama_index'

In [6]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GeminiEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import Groq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import pinecone
import os
from dotenv import load_dotenv

load_dotenv()

# Load the document
loader = UnstructuredPDFLoader("CV DZEUANG Martine.pdf")
documents = loader.load()

# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=25)
texts = text_splitter.split_documents(documents)

# Create embeddings
embeddings = GeminiEmbeddings(model="model/embedding-001", api_key=os.getenv("GEMINI_API_KEY"))

# Initialize Pinecone
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment="your-environment")
index_name = "llama-index-demo"

# Add documents to Pinecone
docsearch = Pinecone.from_documents(texts, embeddings, index_name=index_name)

# Initialize the LLM
llm = Groq(model="llama-3.1-70b-versatile", api_key=os.getenv("GROQ_API_KEY"))

# Create a question answering chain
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

# Query the chain
query = "How would you describe this candidate?"
response = qa.run(query)
print(response)


ImportError: cannot import name 'GeminiEmbeddings' from 'langchain.embeddings' (c:\Users\augustin.morval\OneDrive - Wavestone\Bureau\Dev Workspace\Multitask_Chatbot\.venv\Lib\site-packages\langchain\embeddings\__init__.py)

In [5]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import pinecone
import os
from dotenv import load_dotenv

load_dotenv()

# Load the document
loader = UnstructuredPDFLoader("CV DZEUANG Martine.pdf")
documents = loader.load()

# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=25)
texts = text_splitter.split_documents(documents)

# Create embeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

# Initialize Pinecone
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment="your-environment")
index_name = "llama-index-demo"

# Add documents to Pinecone
docsearch = Pinecone.from_documents(texts, embeddings, index_name=index_name)

# Initialize the LLM
llm = ChatGroq(model_name="Gemma Model 2 - 9B", temperature=0.1, api_key=os.getenv("GROQ_API_KEY"))

# Define the prompt template
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer in French:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)

# Create a question answering chain
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

# Query the chain
query = "How would you describe this candidate?"
response = qa.run(query)
print(response)


OSError: [WinError 126] Le module spécifié est introuvable. Error loading "c:\Users\augustin.morval\OneDrive - Wavestone\Bureau\Dev Workspace\Multitask_Chatbot\.venv\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.