In [6]:
from pinecone import Pinecone, ServerlessSpec

pinecone_key = "pcsk_4PR9Ek_TvzdWZi5dsDCvKCZStfatGWfV5Mog5hbS6ccmZGd4W5bA6Q5EAygQ1LQRVVxn83"

pc = Pinecone(api_key=pinecone_key)

index_name = "first-index"

if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model":"llama-text-embed-v2",
            "field_map":{"text": "chunk_text"}
        }
    )
else:
    print(f"Index `{index_name}` already exists")

Index `first-index` already exists


In [33]:
from helpers import parse_snippet
from helpers import get_docs
from helpers import pretty_print_dict

# url = "https://context7.com/context7/python_langchain_com-docs-introduction/llms.txt?tokens=10000"
url = "https://context7.com/context7/python_langchain_com-docs-introduction/llms.txt?tokens=100000"
docs = get_docs(url)
parsed_docs = list(map(lambda x: parse_snippet(x), docs))

parsed_docs = parsed_docs

In [35]:
len(docs)
pretty_print_dict(parsed_docs[-1])

{
    "TITLE": "Define and Use RunnableLambda with TypedDict Input",
    "DESCRIPTION": "Demonstrates how to create a RunnableLambda from a function that accepts a TypedDict as input. It shows how to convert this runnable into a tool using as_tool, access its description (which is 'Explanation of when to use tool.') and its automatically generated schema (e.g., {'title': 'My tool', 'type': 'object', 'properties': {'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'array', 'items': {'type': 'integer'}}}, 'required': ['a', 'b']}). The tool is then invoked with sample data, producing the output '6'.",
    "SOURCE": "https://python.langchain.com/docs/introduction/how_to/convert_runnable_to_tool",
    "LANGUAGE": "python",
    "CODE": "from typing import List\n\nfrom langchain_core.runnables import RunnableLambda\nfrom typing_extensions import TypedDict\n\nclass Args(TypedDict):\n    a: int\n    b: List[int]\n\ndef f(x: Args) -> str:\n    return str(x[\"a\"] * max(x[\"b\"]

In [8]:
index = pc.Index(index_name)

In [9]:
index.describe_index_stats()

{'dimension': 1024,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'ns1': {'vector_count': 10}},
 'total_vector_count': 10,
 'vector_type': 'dense'}

In [24]:
import uuid

parsed_docs_with_ids = [
    {"_id": str(uuid.uuid4()), **{("text" if k == "TITLE" else k): v for k, v in doc.items()}}
    for doc in parsed_docs
]

# parsed_docs_with_ids[1]

index.upsert_records("langchain", parsed_docs_with_ids)

In [14]:
records = [
    {"_id": "r1", "text": "# to create a numpy array use: np.array([1, 2, 3])", "category": "programming"}
]

index.upsert_records("ns1", records)

In [28]:
query = "Conversational bot"

results = index.search(
    namespace="langchain",
    query={
        "top_k": 2,
        "inputs": {
            'text': query
        }
    }
)

print(results['result']['hits'])

[{'_id': 'a2b33d0a-16b5-4e2c-ad96-96bafe1f7fdb',
 '_score': 0.31182077527046204,
 'fields': {'CODE': 'from langchain.chains import '
                    'create_history_aware_retriever, create_retrieval_chain\n'
                    'from langchain.chains.combine_documents import '
                    'create_stuff_documents_chain\n'
                    '\n'
                    'condense_question_system_template = (\n'
                    '    "Given a chat history and the latest user question "\n'
                    '    "which might reference context in the chat history, '
                    '"\n'
                    '    "formulate a standalone question which can be '
                    'understood "\n'
                    '    "without the chat history. Do NOT answer the '
                    'question, "\n'
                    '    "just reformulate it if needed and otherwise return '
                    'it as is."\n'
                    ')\n'
                    '\n'
         

In [18]:
reranked_results = index.search(
    namespace="ns1",
    query={
        "top_k": 5,
        "inputs": {
            'text': query
        }
    },
    rerank={
        "model": "bge-reranker-v2-m3",
        "top_n": 5,
        "rank_fields": ["text"]
    },
    fields=["category", "text"]
)

reranked_results

{'result': {'hits': [{'_id': 'rec1',
                      '_score': 0.10650458931922913,
                      'fields': {'category': 'history',
                                 'text': 'The Eiffel Tower was completed in '
                                         '1889 and stands in Paris, France.'}},
                     {'_id': 'rec7',
                      '_score': 0.06278920918703079,
                      'fields': {'category': 'history',
                                 'text': 'The Great Wall of China was built to '
                                         'protect against invasions.'}},
                     {'_id': 'rec5',
                      '_score': 3.21923362207599e-05,
                      'fields': {'category': 'literature',
                                 'text': 'Shakespeare wrote many famous plays, '
                                         'including Hamlet and Macbeth.'}},
                     {'_id': 'rec4',
                      '_score': 1.6187581422855146e-