In [1]:
from openai import OpenAI
client = OpenAI()

def semantic_search(query, pdf_text):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "system", "content": f"You answer \
            semantic search questions ONLY ABOUT THE FOLLOWING TEXT: \
                {pdf_text}"},
                  {"role": "user", "content": query}]
    )
    
    return response.choices[0].message.content


In [2]:
from pypdf import PdfReader

def load_pdf_text(file_path):
    '''Loads text from a PDF file.'''
    # creating a pdf reader object
    reader = PdfReader(file_path)

    # extracting text from page
    text = "\n\n".join([page.extract_text() for page in reader.pages])
    
    return text


pdf_path = "./assets-resources/attention-paper.pdf"
pdf_text = load_pdf_text(pdf_path)

query = "What is the attention mechanism?"
semantic_search(query, pdf_text)

'An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is computed as a weighted sum of the values, where the weight assigned to each value is computed by a compatibility function of the query with the corresponding key. \n\nIn the context of the Transformer model, there are two primary forms of attention used:\n\n1. **Scaled Dot-Product Attention**: This involves computing the dot products of the query with all keys, dividing each by the square root of the key dimension, and applying a softmax function to obtain the weights on the values. The formula for this attention is:\n   \\[\n   Attention(Q, K, V) = softmax\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V\n   \\]\n\n2. **Multi-Head Attention**: Instead of performing a single attention function, the model linearly projects the queries, keys, and values multiple times (h times with different learned linear projections) and 

In [3]:
from pydantic import BaseModel, Field
from typing import List
class SemanticSearchResponse(BaseModel):
    answer: str = Field(..., description="The answer to the semantic search question")
    quote_sources: List[str] = Field(..., description="The source of the quote")

def structured_semantic_search(query, pdf_text):
    response = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[{"role": "system", "content": f"You answer \
            semantic search questions ONLY ABOUT THE FOLLOWING TEXT: \
                {pdf_text}"},
                  {"role": "user", "content": query}],
        response_format=SemanticSearchResponse
    )
    return response.choices[0].message.parsed

query = "What is the attention mechanism?"
output = structured_semantic_search(query, pdf_text)
output

SemanticSearchResponse(answer='An attention mechanism is a technique in neural networks that enables the model to focus on specific parts of the input sequence when producing each element of the output sequence. It works by mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is computed as a weighted sum of the values, with the weights determined by a compatibility function that measures how well each key matches the query. In the case of the Transformer model, a specific type called "Scaled Dot-Product Attention" is employed, where the compatibility is calculated by taking the dot product of the query and keys, scaling by the square root of the dimensionality of the keys, and applying a softmax function to obtain the weights on the values.', quote_sources=['Section 3.2 Attention', 'Section 3.2.1 Scaled Dot-Product Attention'])

In [4]:
output.answer
output.quote_sources

['Section 3.2 Attention', 'Section 3.2.1 Scaled Dot-Product Attention']