In [6]:
import os
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone, PodSpec
from llama_index.embeddings.openai import OpenAIEmbedding
import nest_asyncio

In [7]:
# import os
# from dotenv import load_dotenv
# from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
# from llama_index.vector_stores.pinecone import PineconeVectorStore
# from pinecone import Pinecone, PodSpec
# from llama_index.embeddings.openai import OpenAIEmbedding
# import nest_asyncio

# from llama_index.embeddings.gemini import GeminiEmbedding
# Settings.embed_model = GeminiEmbedding(api_key=GOOGLE_API_KEY)

# code

In [34]:
from dotenv import dotenv_values

# Load the file manually since it's .txt
config = dotenv_values("dotenv.txt")

# Access keys
pinecone_api_key = config["PINECONE_API_KEY"]
pinecone_env = config["PINECONE_ENVIRONMENT"]
pinecone_index_name = config["PINECONE_INDEX"]
gemini_api_key = config["GEMINI_API_KEY"]


In [35]:
!pip install -q openai google-generativeai pinecone-client python-dotenv

In [36]:
pc = Pinecone(api_key=PINECONE_API_KEY)

In [37]:
pc

<pinecone.pinecone.Pinecone at 0x73c22fe64f10>

In [38]:
# Load the PDF Document
pdf_path = "/home/mohankalyan/Downloads/KosaraMohanKalyanResume-1.pdf"

# Example for Windows (note the 'r' before the string)
# pdf_path = r"C:\Users\yourusername\Downloads\attention-is-all-you-need.pdf"

try:
    documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
    print(f"Loaded {len(documents)} document(s).")
except Exception as e:
    print(f"Error loading the PDF file: {e}")
    print(f"Please check that the path is correct: {pdf_path}")
    exit()

Loaded 1 document(s).


In [39]:
index_name = "chatbot"
pinecone_environment = "us-east-1-aws" # Or your actual environment
embedding_dimension = 768 # Dimension of the 'all-MiniLM-L6-v2' model

In [40]:
if index_name not in pc.list_indexes().names():
    print(f"Creating Pinecone index: {index_name}")
    pc.create_index(
        name=index_name,
        dimension=embedding_dimension,
        metric="cosine",
        spec=PodSpec(environment=pinecone_environment)
    )
    print("Index created successfully.")
else:
    print(f"Pinecone index '{index_name}' already exists.")

Pinecone index 'chatbot' already exists.


In [41]:
pinecone_index = pc.Index(index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [42]:
import nest_asyncio

# Apply the patch
nest_asyncio.apply()

In [43]:
# --- 8. Create the Index ---
print("Creating index and storing embeddings in Pinecone... This may take a moment.")
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)
print("Finished indexing and storing.")

Creating index and storing embeddings in Pinecone... This may take a moment.


Upserted vectors:   0%|          | 0/1 [00:00<?, ?it/s]

Finished indexing and storing.


In [25]:
# Cell 6: Setup LlamaIndex Components (Same as before)
from llama_index.core import Settings, Document
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
llm = Gemini(
        model="models/gemini-1.5-flash",
        api_key=GOOGLE_API_KEY
    )
    
embed_model = GeminiEmbedding(
        model_name="models/embedding-001",
        api_key=GOOGLE_API_KEY
    )
    
    # Configure global settings
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 500  # Good for PDF text
Settings.chunk_overlap = 200  # More overlap for PDFs

  llm = Gemini(
  embed_model = GeminiEmbedding(


In [44]:
query_engine = index.as_query_engine()

response = query_engine.query("What is this document about?")
print(response)


This document is a resume for Kosara Mohan Kalyan, highlighting their skills, education, projects, and experience in software development and machine learning.



In [47]:
while True:
    q = input("Ask a question: ")
    if q.lower() in ["exit", "quit"]: break
    print(query_engine.query(q))


Ask a question:  skills


The individual possesses skills in C, Python, Java, HTML, CSS, machine learning, and MySQL.  They also have experience with data structures using C.



Ask a question:  exit
