In [None]:
!pip install llama-index
!pip install llama-index-vector-stores-qdrant
!pip install llama-index-embeddings-fastembed fastembed
!pip install llama-index-llms-google-genai

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import os
from google.colab import userdata

os.environ['GOOGLE_API_KEY'] = userdata.get("GOOGLE_API_KEY")

In [4]:
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.fastembed import FastEmbedEmbedding

In [5]:
llm = GoogleGenAI(model="gemini-2.5-flash",max_tokens=None,temperature=0.1)

In [6]:
embeddings = FastEmbedEmbedding(model_name = "jinaai/jina-embeddings-v2-base-en")

Access to the secret `HF_TOKEN` has not been granted on this notebook.
You will not be requested again.
Please restart the session if you want to be prompted again.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/367 [00:00<?, ?B/s]

onnx/model.onnx:   0%|          | 0.00/547M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [7]:
from llama_index.core import Settings

In [8]:
Settings.embed_model = embeddings
Settings.llm = llm

In [9]:
from llama_index.core import SimpleDirectoryReader

In [16]:
documents = SimpleDirectoryReader(input_files=["/content/Emergent-Build-Guide.pdf"],recursive=True).load_data()

In [10]:
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient, models

In [11]:
client = QdrantClient(path = "/tmp/db")

In [12]:
collection_name = "hybrid-search"

In [None]:
vector_store = QdrantVectorStore(
    collection_name=collection_name,
    client=client,
    enable_hybrid=True,
    fastembed_sparse_model="Qdrant/bm25",
    batch_size=20,
)

In [14]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [17]:
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)

  self._client.create_payload_index(


## Inference

In [24]:
inference_vector_store = QdrantVectorStore(client=client, collection_name = collection_name, enable_hybrid=True)

In [25]:
inference_index = VectorStoreIndex.from_vector_store(
        inference_vector_store,
    )

In [39]:
query_engine = inference_index.as_query_engine(
    similarity_top_k=4, sparse_top_k=5, vector_store_query_mode="hybrid"
)

In [40]:
response = query_engine.query("tips to instruct the copilot for Feature implementations")

In [41]:
response.response

'When instructing for feature implementations, it is recommended to implement one to two features per cycle, ensuring each is fully completed and thoroughly tested before moving to the next. For optimal prompting, use detailed modifiers, such as requesting hover states, transitions, and micro-interactions. Ask for comprehensive implementations that go beyond basic functionality, and specify quality expectations by mentioning modern design principles like hierarchy, contrast, balance, and movement. Additionally, be explicit and detailed in your instructions, provide context, and use positive language.'

In [None]:
response

## Re-ranking

In [None]:
!pip install llama-index-postprocessor-cohere-rerank

In [44]:
from llama_index.postprocessor.cohere_rerank import CohereRerank

In [48]:
cohere_rerank = CohereRerank(
    api_key=userdata.get("COHERE_API_KEY"), model="rerank-english-v3.0", top_n=3
)

In [49]:
query_engine = inference_index.as_query_engine(
    similarity_top_k=6,
    sparse_top_k=5,
    node_postprocessors=[cohere_rerank],
    vector_store_query_mode="hybrid"
)

In [50]:
response = query_engine.query("tips to instruct the copilot for Feature implementations")

In [51]:
response.response

'When instructing the copilot for feature implementations, it is recommended to implement one to two features per cycle, ensuring each feature is fully completed and thoroughly tested before moving to the next. Use detailed modifiers in your prompts, such as requesting hover states, transitions, and micro-interactions. Ask for comprehensive implementations that go beyond basic functionality, and specify quality expectations by mentioning modern design principles like hierarchy, contrast, balance, and movement.'