In [29]:
import pinecone
import os

api_key = os.environ["PINECONE_API_KEY"]
pinecone.init(api_key=api_key, environment="gcp-starter")

In [30]:
pinecone.delete_index("quickstart")
# dimensions are for text-embedding-ada-002
pinecone.create_index(
    "quickstart", dimension=1536 , metric="euclidean", pod_type="p1"
)

In [31]:
pinecone_index = pinecone.Index("quickstart")

In [32]:
# [Optional] drop contents in index
# pinecone_index.delete(deleteAll=True)

In [33]:
from llama_index.vector_stores import PineconeVectorStore

In [34]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [35]:
from pathlib import Path
from llama_hub.file.pymu_pdf.base import PyMuPDFReader

In [36]:
loader = PyMuPDFReader()
documents = loader.load(file_path="./data/llama2.pdf")

In [37]:
query_str = "Can you tell me about the key concepts for safety finetuning"

In [38]:
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.storage import StorageContext

In [39]:
service_context = ServiceContext.from_defaults(chunk_size=1024)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context, storage_context=storage_context
)

Upserted vectors:   0%|          | 0/108 [00:00<?, ?it/s]

In [40]:
from llama_index.embeddings import OpenAIEmbedding

embed_model = OpenAIEmbedding()

In [41]:
query_embedding = embed_model.get_query_embedding(query_str)

In [45]:
# construct vector store query
from llama_index.vector_stores import VectorStoreQuery

query_mode = "default"
# query_mode = "sparse"
# query_mode = "hybrid"

=
vector_store_query = VectorStoreQuery(
    query_embedding=query_embedding, similarity_top_k=2, mode=query_mode
)


In [46]:
# returns a VectorStoreQueryResult
query_result = vector_store.query(vector_store_query)
query_result

VectorStoreQueryResult(nodes=[TextNode(id_='eb52ba6a-d8c8-4693-bf97-0bd0cf39b374', embedding=[-0.023997657, -0.0063895341, 0.0151268179, -0.0264297575, -0.0186554715, 0.0240257736, -0.0124627547, 0.0101290634, -0.0312096067, -0.0504133552, 0.0112115592, 0.00364112062, -0.00922932755, -0.00803436525, 0.000524992647, 0.00890598446, 0.0324748605, -0.00322464108, 0.0120972367, -0.00813277345, -0.0155345108, 0.00930664875, -0.0270202085, -0.0107827783, -0.00525432, 0.0031894953, 0.0307878554, -0.031771943, -0.0209891628, -0.00488880184, -0.000540808367, 0.00209997059, 0.00443893392, -0.00530000962, -0.00379224843, -0.0143676652, 0.00754935062, 0.000284023234, 0.0320531093, -0.0197801422, 0.0251926184, 0.0206939373, -0.00389417168, -0.0270483252, 0.0002581031, 0.019723909, 0.0138053307, -0.0227464605, -0.032362394, 0.0211438052, 0.0435809828, 0.0118020112, -0.0195833258, -0.00189788151, -0.000668212422, 0.00771102216, 0.0154220443, 0.00854046643, 0.0123573169, 0.00139177975, -0.00634033, 0.0

In [47]:
from llama_index.schema import NodeWithScore
from typing import Optional

nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
    score: Optional[float] = None
    if query_result.similarities is not None:
        score = query_result.similarities[index]
    nodes_with_scores.append(NodeWithScore(node=node, score=score))

In [48]:
from llama_index.response.notebook_utils import display_source_node

for node in nodes_with_scores:
    display_source_node(node, source_length=1000)

**Node ID:** eb52ba6a-d8c8-4693-bf97-0bd0cf39b374<br>**Similarity:** 0.340792418<br>**Text:** advice). The attack vectors explored consist of psychological manipulation (e.g., authority manipulation),
logic manipulation (e.g., false premises), syntactic manipulation (e.g., misspelling), semantic manipulation
(e.g., metaphor), perspective manipulation (e.g., role playing), non-English languages, and others.
We then define best practices for safe and helpful model responses: the model should first address immediate
safety concerns if applicable, then address the prompt by explaining the potential risks to the user, and finally
provide additional information if possible. We also ask the annotators to avoid negative user experience
categories (see Appendix A.5.2). The guidelines are meant to be a general guide for the model and are
iteratively refined and revised to include newly identified risks.
4.2.2
Safety Supervised Fine-Tuning
In accordance with the established guidelines from Section 4.2.1, we gather prompts and demonstrations
of safe model responses from trained annotato...<br>

**Node ID:** 3ef21600-06ad-4d89-99f2-6c7a9f6c9b92<br>**Similarity:** 0.383333564<br>**Text:** TruthfulQA ↑
ToxiGen ↓
MPT
7B
29.13
22.32
30B
35.25
22.61
Falcon
7B
25.95
14.53
40B
40.39
23.44
Llama 1
7B
27.42
23.00
13B
41.74
23.08
33B
44.19
22.57
65B
48.71
21.77
Llama 2
7B
33.29
21.25
13B
41.86
26.10
34B
43.45
21.19
70B
50.18
24.60
Table 11: Evaluation of pretrained LLMs on automatic safety benchmarks. For TruthfulQA, we present the
percentage of generations that are both truthful and informative (the higher the better). For ToxiGen, we
present the percentage of toxic generations (the smaller, the better).
Benchmarks give a summary view of model capabilities and behaviors that allow us to understand general
patterns in the model, but they do not provide a fully comprehensive view of the impact the model may have
on people or real-world outcomes; that would require study of end-to-end product deployments. Further
testing and mitigation should be done to understand bias and other social issues for the specific context
in which a system may be deployed. For this, it may be necess...<br>