In [None]:
!pip install PyPDF2 langchain langchain-community groq pinecone-client cohere

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.2.5-py3-none-any.whl (974 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.6/974.6 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.2.5-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting groq
  Downloading groq-0.9.0-py3-none-any.whl (103 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pinecone-client
  Downloading pinecone_client-4.1.1-py3-none-any.whl (216 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.2/216.2 kB[0m [31m12

In [None]:
import os

os.environ['GROQ_API_KEY'] = 'YOUR_GROQ_API_KEY'
os.environ['PINECONE_API_KEY'] = 'YOUR_PINECONE_API_KEY6'
os.environ['COHERE_API_KEY'] = 'YOUR_COHERE_API_KEY'

In [None]:
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_groq import ChatGroq

groq_api_key = os.getenv('GROQ_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
cohere_api_key = os.getenv('COHERE_API_KEY')

llm_local = ChatOllama(model="mistral:instruct")
llm_groq = ChatGroq(
            groq_api_key=groq_api_key,
            #model_name='llama2-70b-4096'
            model_name='mixtral-8x7b-32768'
    )

# Read the PDF file
pdf = PyPDF2.PdfReader("demo.pdf")
pdf_text = ""
for page in pdf.pages:
    pdf_text += page.extract_text()

# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_text(pdf_text)

embeddings = OllamaEmbeddings(model="nomic-embed-text")

r1 = embeddings.embed_documents(
    texts
)

import pinecone

from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)

pc.create_index(
  name="testv1",
  dimension=768,
  metric="cosine",
  spec=ServerlessSpec(
    cloud="aws",
    region="us-east-1"
  )
)

index = pc.Index("testv1")

for i in range(len(texts)):
    index.upsert([((str(i),r1[i],{"text":texts[i]}))])

print("done upserting...")

def get_query_embdedding(text):
    embedding=embeddings.embed_query(text)
    return embedding


import cohere

# init client
co = cohere.Client(cohere_api_key)

query="when chandrayan landed on moon surface?"

question_embedding=get_query_embdedding(query)

query_result = index.query(vector=question_embedding, top_k=5, include_metadata=True)
similar_texts = []
# Extract metadata from query result
docs = {x["metadata"]['text']: i for i, x in enumerate(query_result["matches"])}



# Rerank the documents
rerank_docs = co.rerank(
    model="rerank-english-v3.0",
    query=query,
    documents=list(docs.keys()),
    top_n=5,
    return_documents=True
)
# print("rerank_docs...",rerank_docs)

# Extract reranked documents
reranked_texts = [doc.document.text for doc in rerank_docs.results]
reranked_texts

context=" ".join(reranked_texts)

Template = f"Based on the following context : {context} generate precise summary related to question : {query} Do not remove necessary information related to context. Consider `\n` as newline character."
# Filling the template with the actual context and question.
filled_template = Template.format(context=context, question=query)

#pip install groq

import os
from groq import Groq

client = Groq(
    api_key=groq_api_key ,
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": filled_template,
        }
    ],
    model="mixtral-8x7b-32768",
)

print(chat_completion.choices[0].message.content)