In [42]:
# to access the environment variable 
import os

from dotenv import load_dotenv

# take environment variables from .env.
load_dotenv()  

# if there is no value for "GOOGLE_API_KEY", then it takes None as input 
api_key = os.getenv("GOOGLE_API_KEY")


429 or 400 errors when testing explicit caching on Gemini API free tier is fundamentally due to quota and policy restrictions imposed by Google on free-tier usage:

429 RESOURCE_EXHAUSTED occurs because your free-tier account has zero tokens quota allocated for cached content storage. Even if we cache content is valid size-wise, we have no quota to store cached tokens.

400 INVALID_ARGUMENT for too-small cached content occurs because Gemini enforces a minimum token count (typically 4096 tokens) for caching to be useful and cost-effective.

## Explicit caching

In [15]:
import pypdf
from google import genai
from google.genai import types

client = genai.Client(api_key="AIzaSyAkcq0ezMy-VUuPrkof64Z--iBfKp0YZYY")

# Extract text from PDF

pdf_text = ""
for page in reader.pages:
    pdf_text += page.extract_text() + "\n"

# Cache it
cache_response = client.caches.create(
    #model="gemini-2.5-flash",
    model='models/gemini-2.0-flash-001',
    config=types.CreateCachedContentConfig(
        system_instruction="You are analyzing this document.",
        contents=[ types.Content(role="user", parts=[ types.Part(text=pdf_text) ]) ],
        ttl="500s"
    )
)

cache_id = cache_response.id
print("Cache ID:", cache_id)


ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'TotalCachedContentStorageTokensPerModelFreeTier limit exceeded for model gemini-2.0-flash: limit=0, requested=8441', 'status': 'RESOURCE_EXHAUSTED'}}

In [17]:
import pypdf
from google import genai
from google.genai import types

client = genai.Client(api_key="AIzaSyAkcq0ezMy-VUuPrkof64Z--iBfKp0YZYY")

# Extract text from PDF
reader = pypdf.PdfReader(r"C:\Users\Raghul mani M\Downloads\Step-by-Step Exploratory Data Analysis (EDA) using Python -.pdf")
pdf_text = ""
for page in reader.pages:
    pdf_text += page.extract_text() + "\n"

# Cache it
cache_response = client.caches.create(
    model='models/gemini-2.0-flash-001',
    config=types.CreateCachedContentConfig(
        system_instruction="You are analyzing this document.",
        contents=[ types.Content(role="user", parts=[ types.Part(text=pdf_text) ]) ],
        ttl="3600s"
    )
)

cache_id = cache_response.id
print("Cache ID:", cache_id)


ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'TotalCachedContentStorageTokensPerModelFreeTier limit exceeded for model gemini-2.0-flash: limit=0, requested=8441', 'status': 'RESOURCE_EXHAUSTED'}}

In [5]:
# Use the cache in a query
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="Summarize the document in 5 bullet points.",
    config=types.GenerateContentConfig(
        cached_content=cache.name
    )
)

print(response.text)

NameError: name 'cache' is not defined

## Implicit caching

In [39]:
from google import genai

# For implicit caching, use a compatible model
client = genai.Client()

# Generate a long text to be used as a common prefix
# This will ensure the token count is above the minimum threshold
long_common_prefix = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="Generate a detailed, multi-paragraph text explaining the history of the Roman Empire, including key figures like Augustus and Constantine. Make it very long."
).text


model_name = "gemini-2.5-flash"

print(f"Using a common prefix with approximately {len(long_common_prefix.split())} words.\n")

# First request
response_1 = client.models.generate_content(
    model=model_name,
    contents=f"{long_common_prefix} in the context of cost savings for AI workloads."
)

if response_1.usage_metadata:
    print("--- First Request ---")
    print(f"Prompt token count: {response_1.usage_metadata.prompt_token_count}")
    print(f"Cached token count: {response_1.usage_metadata.cached_content_token_count}\n")
else:
    print("No usage metadata found for Request 1.\n")

# Second request (sent immediately after)
# The API should now recognize the common prefix for implicit caching
response_2 = client.models.generate_content(
    model=model_name,
    contents=f"{long_common_prefix} What models support implicit caching?"
)

if response_2.usage_metadata:
    print("--- Second Request ---")
    print(f"Prompt token count: {response_2.usage_metadata.prompt_token_count}")
    print(f"Cached token count: {response_2.usage_metadata.cached_content_token_count}\n")
else:
    print("No usage metadata found for Request 2.\n")


Using a common prefix with approximately 2175 words.

--- First Request ---
Prompt token count: 2787
Cached token count: None

--- Second Request ---
Prompt token count: 2783
Cached token count: 2031



In [43]:
from google import genai
from google.genai import types
import io
import httpx

client = genai.Client()

# Direct URL to a PDF file (public and raw PDF data)
pdf_url = "https://arxiv.org/pdf/2312.11805.pdf"

# Download PDF content
pdf_bytes = io.BytesIO(httpx.get(pdf_url).content)

# Upload PDF to Gemini Files API
document = client.files.upload(
    file=pdf_bytes,
    config=dict(mime_type='application/pdf')
)

model_name = "gemini-2.5-flash"
system_instruction = "You are an expert analyzing research papers."

# Create cached content with the uploaded document
cache = client.caches.create(
    model=model_name,
    config=types.CreateCachedContentConfig(
        system_instruction=system_instruction,
        contents=[document]
    )
)

print(f"Cache created: {cache}")

# Use the cached content in a generation request
response = client.models.generate_content(
    model=model_name,
    contents="Summarize the key findings of this paper.",
    config=types.GenerateContentConfig(cached_content=cache.name)
)

print(f"Usage metadata: {response.usage_metadata}")
print("\nSummary:\n", response.text)


ClientError: 400 FAILED_PRECONDITION. {'error': {'code': 400, 'message': 'The File thnyohxk0nib is not in an ACTIVE state and usage is not allowed.', 'status': 'FAILED_PRECONDITION'}}