#### Library Installation

In [None]:
!pip install pinecone-client llama-index-vector-stores-pinecone

In [None]:
pip install llama-index-core

#### Library Imports

In [None]:
!pip install llama-index -U
!pip install langchain -U

In [1]:
from llama_index.core import (
    Settings,
    VectorStoreIndex, 
    StorageContext,
    SimpleDirectoryReader, 
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import time
import os

In [None]:
pip install llama-index-llms-groq

In [None]:
pip install pydantic -U

#### API Initialization

In [2]:
# loading the .env file
load_dotenv()

# initializing Groq and Pinecone APIs
GROQ_API_KEY = "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"
PINECONE_API_KEY = "08bc7db7-7446-49e0-badf-9ca0d76f1b04"
pc = Pinecone(api_key = PINECONE_API_KEY)

#### LLM and Embedding Model Configuration

In [3]:
llm = Groq(model = "llama3-8b-8192", api_key = GROQ_API_KEY)
embed_model = HuggingFaceEmbedding(model_name = "sentence-transformers/all-mpnet-base-v2")

Settings.llm = llm
Settings.embed_model = embed_model

#### Pinecone Vector DB Index Creation

In [4]:
# setting index_name
index_name = "llama3-groq-pinecone"

# checking existing indexes
existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]

# creating an index if it doesn't exists
if index_name not in existing_indexes: 
    pc.create_index(
        name = index_name, 
        dimension = 768, 
        metric = "cosine", 
        spec = ServerlessSpec(
                cloud = "aws", 
                region = "us-east-1"
        )
    )

# connecting to the created index
index = pc.Index(index_name)
time.sleep(1)

# describing index statistics
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

#### Loading the Data

In [6]:
from langchain_community.document_loaders import PyPDFLoader

In [10]:
documents= SimpleDirectoryReader("rag_pipeline_response.pdf").load_data()

ValueError: Directory rag_pipeline_response.pdf does not exist.

#### Upserting data in Pinecone

In [None]:
vector_store = PineconeVectorStore(pinecone_index = index)
storage_context = StorageContext.from_defaults(vector_store = vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context = storage_context
)

AttributeError: 'Document' object has no attribute 'get_doc_id'

#### Executing a Query through LlamaIndex

In [19]:
query_engine = index.as_query_engine()
response = query_engine.query("What did the Dothraki horde do?")

In [None]:
print(response)