In [4]:
import langchain
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_google_vertexai import VertexAI
from langchain_google_vertexai.model_garden import ChatAnthropicVertex
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import textwrap
import simple_llm_cache
import llm_cache_stats_wrapper
import os

# In order to make it easy to run work projects and personal AI experiments, override these key values with the value of *_PERSONAL, if set.
if "OPENAI_API_KEY_PERSONAL" in os.environ:
    print("Using key from OPENAI_API_KEY_PERSONAL environment variable")
    os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY_PERSONAL"]
if "ANTHROPIC_API_KEY_PERSONAL" in os.environ:
    print("Using key from ANTHROPIC_API_KEY_PERSONAL environment variable")
    os.environ["ANTHROPIC_API_KEY"] = os.environ["ANTHROPIC_API_KEY_PERSONAL"]
if "GOOGLE_APPLICATION_CREDENTIALS_PERSONAL" in os.environ:
    print("Using key from GOOGLE_APPLICATION_CREDENTIALS_PERSONAL environment variable")
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.environ["GOOGLE_APPLICATION_CREDENTIALS_PERSONAL"]

verbose = False
temperature = 0.5
vertex_model_name_prefix = "vertexai-"

langchain.llm_cache = llm_cache_stats_wrapper.LlmCacheStatsWrapper(simple_llm_cache.SimpleLlmCache("llm-cache.json"))

def dump_cache_stats_since_last_call():
    print(langchain.llm_cache.get_cache_stats_summary())
    langchain.llm_cache.clear_cache_stats()

template = """Answer the following question as if you are a {character} character:
{question}
"""

prompt = PromptTemplate(
    input_variables=["character", "question"],
    template=template)

for model_name in [
    # "gpt-3.5-turbo",
    # "gpt-4",
    # "gpt-4o-mini",
    "gpt-4o",
    # "claude-3-haiku-20240307",
    "gemini-1.5-flash-preview-0514",
    "vertexai-claude-3-haiku@20240307",
]:
    if model_name.startswith("gpt-"):
        llm = ChatOpenAI(
            temperature=temperature,
            model_name = model_name)
    elif model_name.startswith("claude-"):
        llm = ChatAnthropic(
            temperature=temperature,
            model_name = model_name)
    elif model_name.startswith("gemini-"):
        llm = VertexAI(
            temperature=temperature,
            model_name = model_name)
    elif model_name.startswith(vertex_model_name_prefix):
        vertex_model_name = model_name[len(vertex_model_name_prefix) :]
        llm = ChatAnthropicVertex(
            temperature=temperature, model_name=vertex_model_name
        )

    chain = LLMChain(
        llm=llm,
        prompt=prompt,
        verbose=verbose)

    for trial in range(2):
        print(f"\n*** {model_name} trial {trial} ***")
        langchain.llm_cache.inner_cache.set_trial(trial)
        output = chain.predict(
            character="pirate",
            question="What is a neural network?")
        print(textwrap.fill(output, width=80))

print()
dump_cache_stats_since_last_call()

Using key from OPENAI_API_KEY_PERSONAL environment variable
Using key from GOOGLE_APPLICATION_CREDENTIALS_PERSONAL environment variable

*** gpt-4o trial 0 ***
Arrr, ye be askin' about a neural network, eh? Well, let me spin ye a yarn
that'll make it clear as the seven seas. A neural network be a mighty
contraption, much like a crew o' sailors workin' together on a grand ship.   Ye
see, it be made up o' nodes, which be like the crew members. These nodes be
organized in layers, like the decks o' a ship. The first layer be the input
layer, where ye be feedin' in yer data, like provisions loaded onto the ship.
Then, there be hidden layers, where the real work be done, like the crew
scrubbin' the decks and hoistin' the sails. Finally, ye got yer output layer,
where the results be spit out, like treasure found after a long voyage.  Each
node in a layer be connected to nodes in the next layer by weighted lines, like
ropes 'n pulleys. These weights be adjusted durin' trainin', like a crew
lea

In [5]:
# Use the async method to demonstrate that the cache works for sync and async calls
output = await chain.apredict(character="poet", question="What is a neural network?")
print(output)
print()
dump_cache_stats_since_last_call()

*clears throat and adopts a poetic tone*

A neural network, a wondrous thing,
A web of neurons, a symphony to sing.
Connections forged, like synapses alight,
Processing data, a dance of day and night.

Neurons firing, a chorus of the mind,
Patterns emerging, secrets to unwind.
Learning and adapting, a constant flow,
Unlocking insights, a world we come to know.

A neural network, a marvel to behold,
Mimicking the brain, its mysteries untold.
Algorithms and algorithms, a tapestry woven,
Revealing the hidden, the unseen, and the chosen.

Ah, the neural network, a canvas of the mind,
A tool to explore, to ponder, and to find.
A symphony of knowledge, a dance of the unknown,
A pathway to understanding, a world all its own.

LLM Cache: 1 hits, 0 misses
           0 new input tokens, 0 new output tokens, 65 total input tokens, 175 total output tokens
           new (this run) API cost: $0.00, total (including previously-cached runs) API cost: $0.00

