In [1]:
from dotenv import load_dotenv
import helper

# Loading data from the enviroment variable
load_dotenv()

True

In [2]:
from anthropic import Anthropic


# Grabs API key from env

client = Anthropic()
model_name = "claude-sonnet-4-6"

## Motivation for Streaming

https://anthropic.skilljar.com/claude-with-the-anthropic-api/287734

When you make a standard API call, your code waits until the model has finished generating the **entire** response before returning anything. For short replies this is fine, but for longer responses it can mean several seconds of silence before anything appears.

**Streaming** solves this by sending tokens back to the client as they are generated — just like how ChatGPT displays text word-by-word as it types.

### Why use streaming?

- **Better user experience** — users see output immediately instead of waiting for the full response
- **Faster perceived latency** — the time to first token is much lower than time to full response
- **Early stopping** — you can interrupt generation if you already have what you need
- **Progress feedback** — useful for long outputs like code generation or document summarization

### When to use it

Use streaming when building interactive applications (chatbots, copilots, CLIs) where responsiveness matters. For batch processing or background jobs where you just need the final result, standard API calls are simpler.


In [None]:
messages = []

helper.add_user_message(messages=messages, content="Write a 1 sentence description of a sci fi world")


# 1 way to stream
stream = client.messages.create(
    model = model_name,
    max_tokens=1000,
    messages= messages,
    stream = True
)

for event in stream:
    print(event)

RawMessageStartEvent(message=Message(id='msg_01NqZ2ZNShNBNUwvfMMjMw8U', container=None, content=[], model='claude-sonnet-4-6', role='assistant', stop_reason=None, stop_sequence=None, type='message', usage=Usage(cache_creation=CacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo='global', input_tokens=19, output_tokens=1, server_tool_use=None, service_tier='standard')), type='message_start')
RawContentBlockStartEvent(content_block=TextBlock(citations=None, text='', type='text'), index=0, type='content_block_start')
RawContentBlockDeltaEvent(delta=TextDelta(text='In', type='text_delta'), index=0, type='content_block_delta')
RawContentBlockDeltaEvent(delta=TextDelta(text=' a', type='text_delta'), index=0, type='content_block_delta')
RawContentBlockDeltaEvent(delta=TextDelta(text=' distant', type='text_delta'), index=0, type='content_block_delta')
RawContentBlockDeltaEvent(delta=TextDelta(text=' fut

In [None]:
# Alt way to stream

messages = []

helper.add_user_message(messages,  content="Write a 1 sentence description of a sci fi world")

with client.messages.stream(
    model=model_name,
    max_tokens=1000,
    messages=messages
) as stream:
    # Just the txt from the text part of the stream
    for text in stream.text_stream:
        # On running you see that output is returned chunk by chunk
        # print(text, end="")
        pass

# Get full message
stream.get_final_message()

In a distant future where humanity has colonized the outer planets, a fragile peace between Earth's corporate-controlled government and the rebellious asteroid belt miners teeters on the edge of collapse when an ancient alien signal is detected emanating from the frozen core of Europa.

ParsedMessage(id='msg_019gPTh9rug15ruzPnknLwVJ', container=None, content=[ParsedTextBlock(citations=None, text="In a distant future where humanity has colonized the outer planets, a fragile peace between Earth's corporate-controlled government and the rebellious asteroid belt miners teeters on the edge of collapse when an ancient alien signal is detected emanating from the frozen core of Europa.", type='text', parsed_output=None)], model='claude-sonnet-4-6', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(cache_creation=CacheCreation(ephemeral_1h_input_tokens=0, ephemeral_5m_input_tokens=0), cache_creation_input_tokens=0, cache_read_input_tokens=0, inference_geo='global', input_tokens=19, output_tokens=58, server_tool_use=None, service_tier='standard'))