In [None]:
!pip install -qU \
  langchain==0.1.1 \
  langchain-community==0.0.13 \
  openai==0.27.7 \
  tiktoken==0.4.0 \
  pinecone-client==3.1.0 \
  pinecone-datasets==0.7.0 \
  pinecone-notebooks==0.1.1

In [None]:
import pinecone_datasets

dataset = pinecone_datasets.load_dataset('amazon_toys_quora_all-MiniLM-L6-bm25')
dataset.head()

Unnamed: 0,id,values,sparse_values,metadata,blob
0,eac7efa5dbd3d667f26eb3d3ab504464,"[0.0077547780238091946, -0.02774387039244175, ...","{'indices': [2182291806, 4287202515, 148124445...",{'amazon_category_and_sub_category': 'Hobbies ...,{'text': 'Hornby 2014 Catalogue (Hornby): Pr...
1,b17540ef7e86e461d37f3ae58b7b72ac,"[0.002257382730022073, -0.03035414218902588, 0...","{'indices': [2118423442, 2177509083, 224097760...",{'amazon_category_and_sub_category': 'Hobbies ...,{'text': 'FunkyBuys® Large Christmas Holiday E...
2,348f344247b0c1a935b1223072ef9d8a,"[-0.003095218911767006, 0.016020774841308594, ...","{'indices': [2349888478, 3814962844, 310417642...",{'amazon_category_and_sub_category': 'Hobbies ...,{'text': 'CLASSIC TOY TRAIN SET TRACK CARRIAGE...
3,e12b92dbb8eaee78b22965d2a9bbbd9f,"[-0.024034591391682625, -0.048526741564273834,...","{'indices': [2182291806, 719182917, 1942275469...",{'amazon_category_and_sub_category': 'Hobbies ...,{'text': 'HORNBY Coach R4410A BR Hawksworth Co...
4,e33a9adeed5f36840ccc227db4682a36,"[-0.07078640908002853, 0.009733847342431545, 0...","{'indices': [2182291806, 2415375917, 369727517...",{'amazon_category_and_sub_category': 'Hobbies ...,{'text': 'Hornby 00 Gauge 0-4-0 Gildenlow Salt...


In [None]:
len(dataset)

10000

In [None]:
# we drop sparse_values as they are not needed for this example
dataset.documents.drop(['metadata'], axis=1, inplace=True)
dataset.documents.rename(columns={'blob': 'metadata'}, inplace=True)
# we will use rows of the dataset up to index 30_000
dataset.documents.drop(dataset.documents.index[30_000:], inplace=True)
len(dataset)

10000

In [None]:
import os

if not os.environ.get("PINECONE_API_KEY"):
    from pinecone_notebooks.colab import Authenticate
    Authenticate()

In [None]:
from pinecone import Pinecone

api_key = os.environ.get("PINECONE_API_KEY")

# configure client
pc = Pinecone(api_key="----")

In [None]:
from pinecone import ServerlessSpec

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

In [None]:
index_name = 'langchain-retrieval-augmentation-fast'

In [None]:
import time

if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

# we create a new index
pc.create_index(
        index_name,
        dimension=384,  # dimensionality of text-embedding-ada-002
        metric='dotproduct',
        spec=spec
    )

# wait for index to be initialized
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

Then we connect to the new index:

In [None]:
index = pc.Index(index_name)
# wait a moment for connection
time.sleep(1)

index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

We should see that the new Pinecone index has a `total_vector_count` of `0`, as we haven't added any vectors yet.

Now we upsert the data to Pinecone:

In [None]:
for batch in dataset.iter_documents(batch_size=100):
    index.upsert(batch)

PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Fri, 23 Aug 2024 16:51:30 GMT', 'Content-Type': 'application/json', 'Content-Length': '92', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '95', 'x-pinecone-request-id': '8331637323800620746', 'x-envoy-upstream-service-time': '4', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Sparse vector size 2211 exceeds the maximum size of 1000","details":[]}


In [None]:

PINECONE_ENVIRONMENT = "us-east-1"  # Replace with your actual environment

dataset.to_pinecone_index(
    index_name= index_name,
    batch_size = 3000,
)


KeyError: 'PINECONE_ENVIRONMENT'

We've now indexed everything. We can check the number of vectors in our index like so:

In [None]:
index.describe_index_stats()

In [None]:
pip install sentence-transformers


Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.met

In [None]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings

# Load the multilingual-e5-large model
model_name = 'sentence-transformers/all-MiniLM-L6-v2'

# Wrap the model name for LangChain, don't pass the model itself.
embed = HuggingFaceEmbeddings(model_name=model_name)


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Now initialize the vector store:

In [None]:
# Continue with your existing code
from langchain.vectorstores import Pinecone

text_field = "text"

# Assuming you already have a Pinecone index initialized
index = pc.Index(index_name)

vectorstore = Pinecone(
    index, embed.embed_query, text_field
)



Now we can query the vector store directly using `vectorstore.similarity_search`:

In [None]:
query = "who was Benito Mussolini?"

vectorstore.similarity_search(
    query,  # our search query
    k=3  # return 3 most relevant docs
)

[Document(page_content='Jorge Lorenzo Yamaha Moto GP 2012 1/10 Scale (Maisto): \n Maisto 1:10 Yamaha Jorge Lorenzo Moto GP 2012 Jorge Lorenzo Guerrero born May 4, 1987 in Palma, Balearic Islands), is a Spanish professional Motor Cycle racer. He is the 2006 and 2007 250cc World champion and the current 2012 Moto GP World Champion. He currently competes in the Moto GP class, riding for the factory Yamaha team. Diecast Body Rotating Wheels Bike Stand Attached Presented in Window Display Box \n Technical Details Item Weight358 g Product Dimensions20.3 x 7.6 x 12.7 cm Manufacturer recommended age:10 years and up Item model number31402 Assembly RequiredNo Batteries Required?No \xa0\xa0 Additional Information ASINB009HOSA0M Best Sellers Rank 310,665 in Toys & Games (See top 100) Shipping Weight358 g Delivery Destinations:Visit the Delivery Destinations Help page to see where this item can be delivered. Date First Available20 Feb. 2013 \xa0\xa0 Feedback \xa0Would you like to update product inf

In [None]:
from pydantic import BaseModel, Field
from langchain.llms.base import LLM
import requests

class CustomRunpodLLM(LLM):
    base_url: str = Field(...)
    model_name: str = Field(...)
    api_key: str = Field(...)

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def _call(self, prompt: str, stop=None, temperature=None, max_tokens=None, **kwargs) -> str:
        headers = {
            "Authorization": f"Bearer {self.api_key}"
        }

        payload = {
            "model": self.model_name,
            "messages": [{"role": "user", "content": prompt}],
            "temperature": temperature or 0.0,
            "max_tokens": max_tokens or 100
        }

        response = requests.post(
            f"{self.base_url}/chat/completions",
            json=payload,
            headers=headers
        )
        response.raise_for_status()

        return response.json()["choices"][0]["message"]["content"]

    @property
    def _identifying_params(self):
        return {"base_url": self.base_url, "model_name": self.model_name}

    @property
    def _llm_type(self):
        return "custom-runpod"


In [None]:
# Initialize the custom LLM with correct parameters
custom_llm = CustomRunpodLLM(
    base_url="https://api.runpod.ai/v2/vllm-/openai/v1",
    model_name="openchat/openchat-3.5-1210",
    api_key=""
)

# Example usage with LangChain's RetrievalQA
qa_custom = RetrievalQA.from_chain_type(
    llm=custom_llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

# Query the model
response = qa_custom({"query": "Roadkill Toys - Smudge (Squirrel) Plush Toy (Roadkill Toys)"})
print(response)


{'query': 'Roadkill Toys - Smudge (Squirrel) Plush Toy (Roadkill Toys)', 'result': ' The Roadkill Toys - Smudge (Squirrel) Plush Toy is a squirrel-themed plush toy that features a realistic squidgy effect, made using high-tech stuffing and plush material. The body and head and legs are made from specially-sourced plush material, and the body is partly stuffed with beads to give it extra dead weight. The blood and guts and gore are made using the latest, cutting edge stuff'}


In [None]:
qa.run(query)

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain

qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [None]:
qa_with_sources(query)

In [None]:
pc.delete_index(index_name)

---