In [None]:
!pip install openai
!pip install PyPDF2 transformers rank_bm25 tiktoken

Collecting openai
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.2-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.7/383.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━━

In [None]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    print(f"Extracting text from PDF: {pdf_path}")
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page_num in range(len(reader.pages)):
            text += reader.pages[page_num].extract_text()
    return text

pdf_path = "dummy.pdf"
text = extract_text_from_pdf(pdf_path)
print(f"Extracted {len(text)} characters from PDF")
print(f"Extracted {len(text.split())} words from PDF")
print('-----------------------------------PDF Text--------------------------------------')
print(text[:200])
print('-----------------------------------END--------------------------------------')

Extracting text from PDF: dummy.pdf
Extracted 13939 characters from PDF
Extracted 1983 words from PDF
-----------------------------------PDF Text--------------------------------------
Certainly! Let's further expand on each section with more detailed explanations, examples, and applications across different domains. We'll aim 
to increase the level of detail to reach the desired le
-----------------------------------END--------------------------------------


In [None]:
def split_into_chunks(text, chunk_size=300):
    print(f"Splitting text into chunks of size {chunk_size}")
    words = text.split()
    chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    print(f"Created {len(chunks)} chunks")
    return chunks

chunks = split_into_chunks(text)

Splitting text into chunks of size 300
Created 7 chunks


In [None]:
UNCACHED_TOKEN_PRICE = 0.15 / 1000000
CACHED_TOKEN_PRICE = 0.08 / 1000000
COMPLETION_TOKEN_PRICE = 0.6 / 1000000

print('Price per one million tokens for prompts:')
print(f"Cache Token Cost: ${CACHED_TOKEN_PRICE:.8f}, \nUncached Token Cost: ${UNCACHED_TOKEN_PRICE:.8f}, \nCompletion Token Cost: ${COMPLETION_TOKEN_PRICE:.8f}")

Price per one million tokens for prompts:
Cache Token Cost: $0.00000008, 
Uncached Token Cost: $0.00000015, 
Completion Token Cost: $0.00000060


In [None]:
import tiktoken

def count_tokens(text, model="gpt-4o-mini"):
    enc = tiktoken.encoding_for_model(model)
    tokens = enc.encode(text)
    return len(tokens)

In [None]:
from openai import OpenAI
import os
from google.colab import userdata

openai_key = userdata.get('openai_key')
os.environ["OPENAI_API_KEY"] = openai_key
client = OpenAI()

In [None]:
total_cost_with_caching = 0
total_cost_without_caching = 0

def openai_query(system_prompt, user_prompt):
    global total_cost_with_caching, total_cost_without_caching

    user_prompt = user_prompt + 'Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else'
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": 'Document: ' + system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )

    prompt_tokens = completion.usage.prompt_tokens if hasattr(completion.usage, 'prompt_tokens') else 0
    cached_tokens = completion.usage.prompt_tokens_details.cached_tokens if hasattr(completion.usage, 'prompt_tokens_details') else 0
    uncached_tokens = prompt_tokens - cached_tokens
    completion_tokens = count_tokens(completion.choices[0].message.content)

    # Calculate cost
    cost_with_caching = (cached_tokens * CACHED_TOKEN_PRICE) + (uncached_tokens * UNCACHED_TOKEN_PRICE) + (completion_tokens * COMPLETION_TOKEN_PRICE)
    cost_without_caching = (prompt_tokens * UNCACHED_TOKEN_PRICE) + (completion_tokens * COMPLETION_TOKEN_PRICE)

    total_cost_with_caching += cost_with_caching
    total_cost_without_caching += cost_without_caching

    print(f"Prompt tokens: {prompt_tokens}, Uncached tokens: {uncached_tokens}, Cached tokens: {cached_tokens}, Completion tokens: {completion_tokens}")
    print(f"Cost with caching: ${cost_with_caching:.6f}, Cost without caching: ${cost_without_caching:.6f}")
    return completion.choices[0].message.content

def create_contextual_embeddings(chunks, document):
    print("Creating contextual embeddings...")
    contextual_chunks = []
    for i, chunk in enumerate(chunks):
        print(f"Processing chunk {i+1}/{len(chunks)}")
        response = openai_query(document, chunk)
        contextual_chunks.append(response)
    print(f"Created {len(contextual_chunks)} contextual embeddings")
    return contextual_chunks


contextual_chunks = create_contextual_embeddings(chunks, text)
percentage_savings = ((total_cost_without_caching - total_cost_with_caching) / total_cost_without_caching) * 100



Creating contextual embeddings...
Processing chunk 1/7
Prompt tokens: 3006, Uncached tokens: 190, Cached tokens: 2816, Completion tokens: 42
Cost with caching: $0.000279, Cost without caching: $0.000476
Processing chunk 2/7
Prompt tokens: 2997, Uncached tokens: 181, Cached tokens: 2816, Completion tokens: 59
Cost with caching: $0.000288, Cost without caching: $0.000485
Processing chunk 3/7
Prompt tokens: 3010, Uncached tokens: 194, Cached tokens: 2816, Completion tokens: 48
Cost with caching: $0.000283, Cost without caching: $0.000480
Processing chunk 4/7
Prompt tokens: 3007, Uncached tokens: 191, Cached tokens: 2816, Completion tokens: 48
Cost with caching: $0.000283, Cost without caching: $0.000480
Processing chunk 5/7
Prompt tokens: 3015, Uncached tokens: 199, Cached tokens: 2816, Completion tokens: 57
Cost with caching: $0.000289, Cost without caching: $0.000486
Processing chunk 6/7
Prompt tokens: 3007, Uncached tokens: 191, Cached tokens: 2816, Completion tokens: 49
Cost with cach

In [None]:
print(f"Total cost with caching: ${total_cost_with_caching:.6f}")
print(f"Total cost without caching: ${total_cost_without_caching:.6f}")
print(f"Savings from caching: ${total_cost_without_caching - total_cost_with_caching:.6f}")
print(f"Percentage savings from caching: {percentage_savings:.2f}%")

Total cost with caching: $0.001985
Total cost without caching: $0.003355
Savings from caching: $0.001371
Percentage savings from caching: 40.85%
