In [1]:
%pip install --upgrade modal
%pip install ipywidgets


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import modal

assert modal.__version__ > "0.49.0"
modal.__version__

'0.72.9'

In [3]:
from modal import app

app = modal.App(name="example-basic-notebook-app")


### Handling standard Python functions

Standard Python functions can of course be defined in a notebook and used on their own or be called within Modal functions.
Below the `double` function is defined in pure-Python, and called once locally.

In [4]:
def double(x: int) -> int:
    return x + x


double(5)

10

### Handling Modal Functions

If we wanted to run this trivial doubling function *in the cloud* we can write another function `double_with_modal` and decorate it with `@app.function` to register
the function with the Modal app.

To demonstrate that Modal functions you define in the notebook can be called by _other_ Modal functions, there's another function, `quadruple`, which uses `double` and `double_with_modal`.
For numbers greater than 1 million, this function spins up containers that run in Modal, which is a _very_ inefficient way to multiply a number by four, but you can do it if you please!

In [5]:
@app.function()
def double_with_modal(x: int) -> int:
    return x + x


@app.function()
def quadruple(x: int) -> int:
    if x <= 1_000_000:
        return double(x) + double(x)
    else:
        return double_with_modal.remote(x) + double_with_modal.remote(x)


with app.run():
    print(quadruple.local(100))   # running locally
    print(quadruple.remote(100))  # run remotely
    print("Doing a very inefficient remote multiplication just for fun!")
    result = quadruple.remote(10_000_000)

400
400
Doing a very inefficient remote multiplication just for fun!


In [6]:
# Evaluate the result created in above cell
result

40000000

### GPU-powered notebook cells!

Thanks to Modal's remote execution capabilities, your notebook can be running on your laptop or a cheap CPU-only instance and take advantage of serverless GPU container execution. Here's the basics.

In [7]:
# Define a Modal function with a GPU attached.
@app.function(gpu="any")
def hello_gpu():
    import subprocess
    subprocess.run("nvidia-smi", shell=True, check=True)
    return "hello from a remote GPU!"


# Start and run an ephemeral modal.App and execute the GPU-powered modal Function!
with app.run():
    result = hello_gpu.remote()
    assert result == "hello from a remote GPU!"

# After the app is finished you can continue executing other function's defined in your notebook and
# use the results of your GPU functions!
"This is the remote GPU's return value: " + result

"This is the remote GPU's return value: hello from a remote GPU!"

In [12]:
# load from modal volume
volume = modal.Volume.lookup("tpot-llm")


In [15]:
import pandas as pd
df = pd.read_parquet("/Users/frsc/Documents/Projects/tpot-llm/data/borg-ca-tpot.parquet")
df

Unnamed: 0_level_0,account_id,full_text,favorite_count,source,processed_text,processed_token_length
tweet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1452671200388333580,2784,"Tech founders have optimistic visions, but the...",91,borg,"Tech founders have optimistic visions, but the...",48
520677120477913089,2784,Utopian architects have inspired city planners...,80,borg,Utopian architects have inspired city planners...,20
938725590269677568,2784,I had a nightmare that bitcoin miners built a ...,78,borg,I had a nightmare that bitcoin miners built a ...,36
1456636622091522051,2784,Fast converging on a world where everyone is i...,50,borg,Fast converging on a world where everyone is i...,30
1218168859829723136,2784,"For many reasons, utilitarian consequentialism...",48,borg,"For many reasons, utilitarian consequentialism...",53
...,...,...,...,...,...,...
1772723059113296091,1744709995193614336,OH FUCK\n\nOh fuck\n\nThe quote “It is easy en...,21,ca,OH FUCK\n\nOh fuck\n\nThe quote “It is easy en...,62
1800177882318434576,1744709995193614336,"sometimes, you gotta look for your folks\n\ni ...",20,ca,"sometimes, you gotta look for your folks\n\ni ...",53
1828411771909964039,1744709995193614336,no amount of buddhism alleviates the suffering...,20,ca,no amount of buddhism alleviates the suffering...,21
1827825408240398748,1744709995193614336,the attachment/aversion says to experience:\n\...,20,ca,the attachment/aversion says to experience:\n\...,103


In [None]:
import pandas as pd
@app.function(volumes={"/tpot-llm": volume})
def f():
    # return contents
    return 

data = None
with app.run():
    data = f.remote()
    print(data)

    



## Embedding

In [17]:
GPU_CONFIG = modal.gpu.T4()
MODEL_ID = "BAAI/bge-base-en-v1.5"
MODEL_ID = "Alibaba-NLP/gte-Qwen2-7B-instruct"
BATCH_SIZE = 32
DOCKER_IMAGE = "ghcr.io/huggingface/text-embeddings-inference:turing-1.5"  # Turing for T4s  # Create the app before using it in decorators

import subprocess
import asyncio
import socket

def download_model():
    spawn_server().terminate()


# Create TEI image with all necessary dependencies
tei_image = (
    modal.Image.from_registry(DOCKER_IMAGE, add_python="3.10")
    .dockerfile_commands("ENTRYPOINT []")
    .pip_install(
        "httpx",
        "numpy~=1.26.4",
        "pandas~=2.2.2",
        "supabase",
        "tqdm",
        "seaborn",
        "openai",
        "toolz",
        "pyarrow",
    )
    .run_function(download_model, gpu=GPU_CONFIG)
)

with tei_image.imports():
    import httpx

# Add TEI server setup functions
def spawn_server() -> subprocess.Popen:
    process = subprocess.Popen(
        [
            "text-embeddings-router",
            "--model-id",
            MODEL_ID,
            "--port",
            "8000",
            "--max-client-batch-size",
            "128",
            "--dtype",
            "float16",
            "--auto-truncate",
        ]
    )

    while True:
        try:
            socket.create_connection(("127.0.0.1", 8000), timeout=1).close()
            print("Webserver ready!")
            return process
        except (socket.timeout, ConnectionRefusedError):
            retcode = process.poll()
            if retcode is not None:
                raise RuntimeError(f"launcher exited unexpectedly with code {retcode}")



# Now we can use app.cls since app is defined
@app.cls(
    gpu=GPU_CONFIG,
    image=tei_image,
    concurrency_limit=10,
    allow_concurrent_inputs=10,
)
class TextEmbeddingsInference:
    @modal.enter()
    def setup_server(self):
        self.process = spawn_server()
        self.client = httpx.AsyncClient(base_url="http://127.0.0.1:8000")

    @modal.exit()
    def teardown_server(self):
        self.process.terminate()

    @modal.method()
    async def embed(self, inputs: list[str]):
        retries = 3  # Number of retries
        for attempt in range(retries):
            try:
                resp = await self.client.post("/embed", json={"inputs": inputs})
                resp.raise_for_status()
                return resp.json()
            except httpx.ReadTimeout as e:
                if attempt < retries - 1:  # If not the last attempt
                    print(f"Timeout occurred, retrying... (Attempt {attempt + 1})")
                    await asyncio.sleep(1)  # Wait before retrying
                else:
                    raise e  # Raise the last exception if all retries fail


In [22]:
df.processed_text.tolist()[:32]


['Tech founders have optimistic visions, but their plans never work out. We end up with clickbait, outrage, depression, etc. In my article, I outline how to make tech products that tie society together instead of breaking it apart.',
 'Utopian architects have inspired city planners for centuries. Where are our utopian interface designers?',
 'I had a nightmare that bitcoin miners built a dyson sphere and Earth went dark\n\nAs we lay freezing to death, I though: *that* explains the fermi paradox',
 'Fast converging on a world where everyone is in 100+ “communities” but no one to bring them soup when sick or watch the kids.',
 'For many reasons, utilitarian consequentialism is lousy for self-understanding. Here’s one: asking “Am I helpful or harmful?” is soul-crushing compared to “Am I an agent of beauty/truth/love/etc?”',
 'Design theory *should* be the same discipline as political theory. Same core question ❓what ideas about human nature (preferences, goals, feelings) and relations (co

In [None]:
import numpy as np
x = df.processed_text.tolist()[:16]

with app.run():
    embedder = TextEmbeddingsInference()
    
    embeddings = embedder.embed.remote(x, order_outputs=True)
    embeddings = np.array(embeddings)
    np.save("embeddings.npy", embeddings)




In [8]:
from typing import List, Dict, Any
import requests


def get_available_models() -> List[Dict[str, Any]]:
    """Fetch available models from OpenRouter API

    Returns:
        List of model info dicts with fields like id, name, description
    """
    try:
        import requests

        response = requests.get("https://openrouter.ai/api/v1/models")
        response.raise_for_status()

        models_data = response.json()
        models = []

        # Extract models from data array
        for model_info in models_data.get("data", []):
            models.append(
                {
                    "id": model_info.get("id"),
                    "name": model_info.get("name"),
                    "description": model_info.get("description", ""),
                    "context_length": model_info.get("context_length", 0),
                    "pricing": model_info.get("pricing", {}),
                }
            )

        return sorted(models, key=lambda x: x["name"])

    except Exception as e:
        print(f"Error fetching models: {e}")
        return []

# response = requests.get("https://openrouter.ai/api/v1/models")
# response.json()
get_available_models()

[{'id': '01-ai/yi-large',
  'name': '01.AI: Yi Large',
  'description': 'The Yi Large model was designed by 01.AI with the following usecases in mind: knowledge search, data classification, human-like chat bots, and customer service.\n\nIt stands out for its multilingual proficiency, particularly in Spanish, Chinese, Japanese, German, and French.\n\nCheck out the [launch announcement](https://01-ai.github.io/blog/01.ai-yi-large-llm-launch) to learn more.',
  'context_length': 32768,
  'pricing': {'prompt': '0.000003',
   'completion': '0.000003',
   'image': '0',
   'request': '0'}},
 {'id': 'ai21/jamba-1-5-large',
  'name': 'AI21: Jamba 1.5 Large',
  'description': "Jamba 1.5 Large is part of AI21's new family of open models, offering superior speed, efficiency, and quality.\n\nIt features a 256K effective context window, the longest among open models, enabling improved performance on tasks like document summarization and analysis.\n\nBuilt on a novel SSM-Transformer architecture, it 

In [7]:
response = requests.get("https://openrouter.ai/api/v1/models")