In [1]:
%pip install lark

Note: you may need to restart the kernel to use updated packages.


In [44]:
from langchain.schema import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
import lark
import getpass
import pinecone
import warnings

# Disabling warnings:
warnings.filterwarnings("ignore")

In [45]:
PINECONE_API_KEY = getpass.getpass("Pinecone API Key:")
PINECONE_ENV = getpass.getpass("Pinecone Environment:")
PINECONE_INDEX = "INDEX_NAME_HERE"

In [18]:
# Initialize the Pinecone authentication:
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
index = pinecone.Index(index_name=PINECONE_INDEX)

In [19]:
docs = [
    Document(
        page_content="A tale about a young wizard and his journey in a magical school.",
        metadata={
            "title": "Harry Potter and the Philosopher's Stone",
            "author": "J.K. Rowling",
            "year_published": 1997,
            "genre": "Fiction",
            "isbn": "978-0747532699",
            "publisher": "Bloomsbury",
            "language": "English",
            "page_count": 223,
            "summary": "The first book in the Harry Potter series where Harry discovers his magical heritage.",
            "keywords": ["magic", "wizard", "Hogwarts"],
            "rating": 4.8,
        },
    ),
    Document(
        page_content="An epic tale of power, betrayal and love set in a fantastical world.",
        metadata={
            "title": "A Game of Thrones",
            "author": "George R.R. Martin",
            "year_published": 1996,
            "genre": "Fantasy",
            "isbn": "978-0553103540",
            "publisher": "Bantam",
            "language": "English",
            "page_count": 694,
            "summary": "The first book in A Song of Ice and Fire series, introducing the intricate world of Westeros.",
            "keywords": ["dragons", "kingdoms", "power struggle"],
            "rating": 4.6,
        },
    ),
    Document(
        page_content="A futuristic society where firemen burn books to maintain order.",
        metadata={
            "title": "Fahrenheit 451",
            "author": "Ray Bradbury",
            "year_published": 1953,
            "genre": "Science Fiction",
            "isbn": "978-1451673319",
            "publisher": "Simon & Schuster",
            "language": "English",
            "page_count": 249,
            "summary": "In a future society, books are banned and firemen are tasked to burn any they find, leading one fireman to question his role.",
            "keywords": ["dystopia", "censorship", "rebellion"],
            "rating": 4.4,
        },
    ),
    Document(
        page_content="A young woman's life in the South during the Civil War and Reconstruction.",
        metadata={
            "title": "Gone with the Wind",
            "author": "Margaret Mitchell",
            "year_published": 1936,
            "genre": "Historical Fiction",
            "isbn": "978-0684830681",
            "publisher": "Macmillan",
            "language": "English",
            "page_count": 1037,
            "summary": "The tale of Scarlett O'Hara and her love affair with Rhett Butler, set against the backdrop of the American Civil War.",
            "keywords": ["civil war", "love", "southern life"],
            "rating": 4.3,
        },
    ),
    Document(
        page_content="A story about a hobbit's journey to destroy a powerful ring.",
        metadata={
            "title": "The Lord of the Rings",
            "author": "J.R.R. Tolkien",
            "year_published": 1954,
            "genre": "Fantasy",
            "isbn": "978-0618640157",
            "publisher": "Houghton Mifflin",
            "language": "English",
            "page_count": 1216,
            "summary": "The epic tale of Frodo Baggins and his quest to destroy the One Ring, accompanied by a group of diverse companions.",
            "keywords": ["fantasy", "epic", "Middle-earth"],
            "rating": 4.7,
        },
    ),
]

In [20]:
# Create the embeddings and new index:
embeddings = OpenAIEmbeddings()
try:
    pinecone.create_index(PINECONE_INDEX, dimension=1536)
except Exception as e:
    print(e)

(400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=UTF-8', 'date': 'Fri, 18 Aug 2023 17:28:34 GMT', 'x-envoy-upstream-service-time': '1347', 'content-length': '131', 'server': 'envoy'})
HTTP response body: The index exceeds the project quota of 1 pods by 1 pods. Upgrade your account or change the project settings to increase the quota.



In [31]:
# index.delete(delete_all=True) # uncomment to delete all vectors in the index
vectorstore = Pinecone.from_documents(docs, embeddings, index_name=PINECONE_INDEX)

In [46]:
from langchain.chat_models import ChatOpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [47]:
# Basic Info
basic_info = [
    AttributeInfo(name="title", description="The title of the book", type="string"),
    AttributeInfo(name="author", description="The author of the book", type="string"),
    AttributeInfo(
        name="year_published",
        description="The year the book was published",
        type="integer",
    ),
]

# Detailed Info
detailed_info = [
    AttributeInfo(
        name="genre", description="The genre of the book", type="string or list[string]"
    ),
    AttributeInfo(
        name="isbn",
        description="The International Standard Book Number for the book",
        type="string",
    ),
    AttributeInfo(
        name="publisher",
        description="The publishing house that published the book",
        type="string",
    ),
    AttributeInfo(
        name="language",
        description="The primary language the book is written in",
        type="string",
    ),
    AttributeInfo(
        name="page_count", description="Number of pages in the book", type="integer"
    ),
]

# Analysis
analysis = [
    AttributeInfo(
        name="summary",
        description="A brief summary or description of the book",
        type="string",
    ),
    AttributeInfo(
        name="keywords",
        description="A few keywords relevant to the book's content",
        type="list[string]",
    ),
    AttributeInfo(
        name="rating",
        description="An average rating for the book (from reviews), ranging from 1-5",
        type="float",
    ),
]

# Combining all lists into metadata_field_info
metadata_field_info = basic_info + detailed_info + analysis

In [34]:
document_content_description = "Brief summary of a movie"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm, vectorstore, document_content_description, metadata_field_info, verbose=True
)

In [35]:
# Looking for sci-fi books
retriever.get_relevant_documents("What are some sci-fi books?")



query='sci-fi' filter=None limit=None


[Document(page_content='A futuristic society where firemen burn books to maintain order.', metadata={'author': 'Ray Bradbury', 'genre': 'Science Fiction', 'isbn': '978-1451673319', 'keywords': ['dystopia', 'censorship', 'rebellion'], 'language': 'English', 'page_count': 249.0, 'publisher': 'Simon & Schuster', 'rating': 4.4, 'summary': 'In a future society, books are banned and firemen are tasked to burn any they find, leading one fireman to question his role.', 'title': 'Fahrenheit 451', 'year_published': 1953.0}),
 Document(page_content='An epic tale of power, betrayal and love set in a fantastical world.', metadata={'author': 'George R.R. Martin', 'genre': 'Fantasy', 'isbn': '978-0553103540', 'keywords': ['dragons', 'kingdoms', 'power struggle'], 'language': 'English', 'page_count': 694.0, 'publisher': 'Bantam', 'rating': 4.6, 'summary': 'The first book in A Song of Ice and Fire series, introducing the intricate world of Westeros.', 'title': 'A Game of Thrones', 'year_published': 199

In [37]:
retriever.get_relevant_documents(
    "Provide books with a rating over 4.0, that are either Fantasy or Science fiction"
)

query=' ' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=4.0), Operation(operator=<Operator.OR: 'or'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='Fantasy'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='Science fiction')])]) limit=None


[Document(page_content='An epic tale of power, betrayal and love set in a fantastical world.', metadata={'author': 'George R.R. Martin', 'genre': 'Fantasy', 'isbn': '978-0553103540', 'keywords': ['dragons', 'kingdoms', 'power struggle'], 'language': 'English', 'page_count': 694.0, 'publisher': 'Bantam', 'rating': 4.6, 'summary': 'The first book in A Song of Ice and Fire series, introducing the intricate world of Westeros.', 'title': 'A Game of Thrones', 'year_published': 1996.0}),
 Document(page_content="A story about a hobbit's journey to destroy a powerful ring.", metadata={'author': 'J.R.R. Tolkien', 'genre': 'Fantasy', 'isbn': '978-0618640157', 'keywords': ['fantasy', 'epic', 'Middle-earth'], 'language': 'English', 'page_count': 1216.0, 'publisher': 'Houghton Mifflin', 'rating': 4.7, 'summary': 'The epic tale of Frodo Baggins and his quest to destroy the One Ring, accompanied by a group of diverse companions.', 'title': 'The Lord of the Rings', 'year_published': 1954.0})]

In [40]:
# Querying for a book by J.K. Rowling:
retriever.get_relevant_documents(
    "I want all of the books that were published by J.K. Rowling"
)

query=' ' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='author', value='J.K. Rowling') limit=None


[Document(page_content='A tale about a young wizard and his journey in a magical school.', metadata={'author': 'J.K. Rowling', 'genre': 'Fiction', 'isbn': '978-0747532699', 'keywords': ['magic', 'wizard', 'Hogwarts'], 'language': 'English', 'page_count': 223.0, 'publisher': 'Bloomsbury', 'rating': 4.8, 'summary': 'The first book in the Harry Potter series where Harry discovers his magical heritage.', 'title': "Harry Potter and the Philosopher's Stone", 'year_published': 1997.0})]

In [41]:
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,
    verbose=True,
)

retriever.get_relevant_documents(
    query="Return 2 Fantasy books",
)



query=' ' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='Fantasy') limit=2


[Document(page_content='An epic tale of power, betrayal and love set in a fantastical world.', metadata={'author': 'George R.R. Martin', 'genre': 'Fantasy', 'isbn': '978-0553103540', 'keywords': ['dragons', 'kingdoms', 'power struggle'], 'language': 'English', 'page_count': 694.0, 'publisher': 'Bantam', 'rating': 4.6, 'summary': 'The first book in A Song of Ice and Fire series, introducing the intricate world of Westeros.', 'title': 'A Game of Thrones', 'year_published': 1996.0}),
 Document(page_content="A story about a hobbit's journey to destroy a powerful ring.", metadata={'author': 'J.R.R. Tolkien', 'genre': 'Fantasy', 'isbn': '978-0618640157', 'keywords': ['fantasy', 'epic', 'Middle-earth'], 'language': 'English', 'page_count': 1216.0, 'publisher': 'Houghton Mifflin', 'rating': 4.7, 'summary': 'The epic tale of Frodo Baggins and his quest to destroy the One Ring, accompanied by a group of diverse companions.', 'title': 'The Lord of the Rings', 'year_published': 1954.0})]