Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
81ed712
feat: wip - add thinking content to messages
rhys117 May 14, 2025
a47870a
Merge branch 'main' into 154-thinking
rhys117 Jun 6, 2025
b6e1bb0
chore: add thinking to capabilities
rhys117 Jun 6, 2025
ecb69c9
chore: pass thinking through from chat initialisation
rhys117 Jun 6, 2025
a014b77
chore: add very basic config for thinking budget through global confi…
rhys117 Jun 6, 2025
ddb0ae1
bug: fix config missing comma
rhys117 Jun 6, 2025
6d66491
chore: add streaming content
rhys117 Jun 6, 2025
7da672e
chore: rename to use existing reasoning capability
rhys117 Jun 6, 2025
c948b0e
Merge branch 'main' into 154-thinking
rhys117 Jun 22, 2025
6b4fb83
chore: rename to thinking
rhys117 Jun 22, 2025
7ec6733
Get thinking working with bedrock
hiemanshu Jun 27, 2025
8709018
Merge branch 'main' into 154-thinking
crmne Jul 16, 2025
b8fb932
Merge pull request #1 from recitalsoftware/154-thinking
rhys117 Jul 17, 2025
5577bae
chore: update anthropic capabilities with thinking
rhys117 Jul 18, 2025
5c02af2
chore: move temperature setting to param
rhys117 Jul 18, 2025
153440c
chore: use 'thinking' capability instead of reasoning in Model::Info
rhys117 Jul 18, 2025
627ffe0
chore: allow thinking capabilties on assumed models
rhys117 Jul 18, 2025
8a6453d
bug: fix call to check if thinking supported in 'with_thinking'
rhys117 Jul 18, 2025
cc1ce5f
test: add basic spec for anthropic models
rhys117 Jul 18, 2025
87fa6a5
Merge branch 'main' into 154-thinking
rhys117 Jul 18, 2025
06daa1c
bug: ensure render_payload args compatibility across all providers
rhys117 Jul 18, 2025
cfd37e4
Merge remote-tracking branch 'with_thinking/154-thinking' into jkogar…
jkogara Oct 29, 2025
bd657b6
Working version with streaming and tool use
jkogara Oct 29, 2025
8aea5af
Fix the non streaming version
jkogara Oct 29, 2025
3363d83
Updates for haiku-4-5
jkogara Oct 29, 2025
7a46873
Provide reasoning output in openai and openrouter
jkogara Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion lib/ruby_llm/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
@config = context&.config || RubyLLM.config
model_id = model || @config.default_model
with_model(model_id, provider: provider, assume_exists: assume_model_exists)
@temperature = nil
@thinking = @config.default_thinking
@thinking_budget = @config.default_thinking_budget
@temperature = @config.default_temperature
@messages = []
@tools = {}
@params = {}
Expand Down Expand Up @@ -58,7 +60,9 @@ def with_tools(*tools, replace: false)

def with_model(model_id, provider: nil, assume_exists: false)
@model, @provider = Models.resolve(model_id, provider:, assume_exists:, config: @config)
# # TODO: Currently the unsupported errors will not retrigger after model reassignment.
@connection = @provider.connection

self
end

Expand All @@ -67,6 +71,18 @@ def with_temperature(temperature)
self
end

def with_thinking(thinking: true, budget: nil, temperature: 1)
raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking?

@thinking = thinking

# Most thinking models require set temperature so force it 1 here, however allowing override via param.
@temperature = temperature
@thinking_budget = budget if budget

self
end

def with_context(context)
@context = context
@config = context.config
Expand Down Expand Up @@ -127,6 +143,8 @@ def complete(&) # rubocop:disable Metrics/PerceivedComplexity
tools: @tools,
temperature: @temperature,
model: @model,
thinking: @thinking,
thinking_budget: @thinking_budget,
params: @params,
headers: @headers,
schema: @schema,
Expand Down Expand Up @@ -163,6 +181,10 @@ def reset_messages!
@messages.clear
end

def thinking?
@thinking
end

def instance_variables
super - %i[@connection @config]
end
Expand Down
10 changes: 10 additions & 0 deletions lib/ruby_llm/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class Configuration
:model_registry_class,
# Rails integration
:use_new_acts_as,
# Default model settings
:default_temperature,
:default_thinking,
:default_thinking_budget,
# Connection configuration
:request_timeout,
:max_retries,
Expand Down Expand Up @@ -66,6 +70,12 @@ def initialize
@model_registry_class = 'Model'
@use_new_acts_as = false

# Default model settings
@default_thinking = false
@default_thinking_budget = 2048
@default_temperature = 0.7

# Logging configuration
@log_file = $stdout
@log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO
@log_stream_debug = ENV['RUBYLLM_STREAM_DEBUG'] == 'true'
Expand Down
1 change: 1 addition & 0 deletions lib/ruby_llm/error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class ConfigurationError < StandardError; end
class InvalidRoleError < StandardError; end
class ModelNotFoundError < StandardError; end
class UnsupportedAttachmentError < StandardError; end
class UnsupportedThinkingError < StandardError; end

# Error classes for different HTTP status codes
class BadRequestError < Error; end
Expand Down
4 changes: 3 additions & 1 deletion lib/ruby_llm/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ class Message
ROLES = %i[system user assistant tool].freeze

attr_reader :role, :model_id, :tool_calls, :tool_call_id, :input_tokens, :output_tokens,
:cached_tokens, :cache_creation_tokens, :raw
:cached_tokens, :cache_creation_tokens, :raw, :thinking, :signature
attr_writer :content

def initialize(options = {})
@role = options.fetch(:role).to_sym
@content = normalize_content(options.fetch(:content))
@thinking = options[:thinking]
@signature = options[:signature]
@model_id = options[:model_id]
@tool_calls = options[:tool_calls]
@tool_call_id = options[:tool_call_id]
Expand Down
2 changes: 1 addition & 1 deletion lib/ruby_llm/model/info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def supports?(capability)
capabilities.include?(capability.to_s)
end

%w[function_calling structured_output batch reasoning citations streaming].each do |cap|
%w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
define_method "#{cap}?" do
supports?(cap)
end
Expand Down
24 changes: 18 additions & 6 deletions lib/ruby_llm/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -490,7 +491,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -521,7 +523,8 @@
]
},
"capabilities": [
"function_calling"
"function_calling",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -1896,7 +1899,12 @@
},
"capabilities": [
"streaming",
"function_calling"
"function_calling",
"structured_output",
"reasoning",
"thinking",
"batch",
"citations"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -1946,7 +1954,9 @@
},
"capabilities": [
"streaming",
"function_calling"
"function_calling",
"structured_output",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down Expand Up @@ -1996,7 +2006,9 @@
},
"capabilities": [
"streaming",
"function_calling"
"function_calling",
"structured_output",
"thinking"
],
"pricing": {
"text_tokens": {
Expand Down
5 changes: 4 additions & 1 deletion lib/ruby_llm/provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def configuration_requirements
self.class.configuration_requirements
end

def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
def complete(messages, tools:, temperature:, model:, thinking:, thinking_budget:, # rubocop:disable Metrics/ParameterLists
params: {}, headers: {}, schema: nil, &)
normalized_temperature = maybe_normalize_temperature(temperature, model)

payload = Utils.deep_merge(
Expand All @@ -46,6 +47,8 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
tools: tools,
temperature: normalized_temperature,
model: model,
thinking: thinking,
thinking_budget: thinking_budget,
stream: block_given?,
schema: schema
),
Expand Down
22 changes: 15 additions & 7 deletions lib/ruby_llm/providers/anthropic/capabilities.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,20 @@ def supports_json_mode?(model_id)
end

def supports_extended_thinking?(model_id)
model_id.match?(/claude-3-7-sonnet/)
model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4|claude-haiku-4/)
end

def model_family(model_id)
case model_id
when /claude-sonnet-4/ then 'claude-sonnet-4'
when /claude-opus-4/ then 'claude-opus-4'
when /claude-3-7-sonnet/ then 'claude-3-7-sonnet'
when /claude-3-5-sonnet/ then 'claude-3-5-sonnet'
when /claude-3-5-haiku/ then 'claude-3-5-haiku'
when /claude-3-opus/ then 'claude-3-opus'
when /claude-3-sonnet/ then 'claude-3-sonnet'
when /claude-3-haiku/ then 'claude-3-haiku'
when /claude-4-5-haiku/ then 'claude-4-haiku'
else 'claude-2'
end
end
Expand Down Expand Up @@ -92,13 +95,17 @@ def modalities_for(model_id)
def capabilities_for(model_id)
capabilities = ['streaming']

if model_id.match?(/claude-3/)
if model_id.match?(/claude-3|claude-sonnet-4|claude-opus-4|claude-haiku-4/)
capabilities << 'function_calling'
capabilities << 'batch'
end

capabilities << 'reasoning' if model_id.match?(/claude-3-7|-4/)
capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7/)
# Extended thinking for Claude 3.7 and Claude 4
capabilities << 'thinking' if supports_extended_thinking?(model_id)

# Citations
capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/)

capabilities
end

Expand All @@ -116,9 +123,10 @@ def pricing_for(model_id)
output_per_million: prices[:output] * 0.5
}

if model_id.match?(/claude-3-7/)
standard_pricing[:reasoning_output_per_million] = prices[:output] * 2.5
batch_pricing[:reasoning_output_per_million] = prices[:output] * 1.25
# Add thinking output pricing for 3.7 and 4 models
if model_id.match?(/claude-3-7|claude-sonnet-4|claude-opus-4/)
standard_pricing[:thinking_output_per_million] = prices[:output] * 2.5
batch_pricing[:thinking_output_per_million] = prices[:output] * 1.25
end

{
Expand Down
31 changes: 25 additions & 6 deletions lib/ruby_llm/providers/anthropic/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ def completion_url
'/v1/messages'
end

def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
def render_payload(messages, tools:, temperature:, model:, thinking:, # rubocop:disable Metrics/ParameterLists
thinking_budget:, stream: false, schema: nil)
system_messages, chat_messages = separate_messages(messages)
system_content = build_system_content(system_messages, schema)

build_base_payload(chat_messages, model, stream).tap do |payload|
add_optional_fields(payload, system_content:, tools:, temperature:)
add_optional_fields(payload, system_content:, tools:, temperature:, thinking:, thinking_budget:)
end
end

Expand Down Expand Up @@ -51,34 +52,50 @@ def build_system_content(system_messages, schema)
def build_base_payload(chat_messages, model, stream)
{
model: model.id,
messages: chat_messages.map { |msg| format_message(msg) },
messages: chat_messages.map { |msg| format_message(msg) }.flatten,
stream: stream,
max_tokens: model.max_tokens || 4096
}
end

def add_optional_fields(payload, system_content:, tools:, temperature:)
def add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:, temperature:) # rubocop:disable Metrics/ParameterLists
payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
payload[:system] = system_content unless system_content.empty?
payload[:temperature] = temperature unless temperature.nil?
return unless thinking

payload[:thinking] = {
type: 'enabled',
budget_tokens: thinking_budget
}
end

def parse_completion_response(response)
data = response.body
RubyLLM.logger.debug("Anthropic response: #{data}")

content_blocks = data['content'] || []

thinking_content, signature = extract_thinking_content(content_blocks)
text_content = extract_text_content(content_blocks)
tool_use_blocks = Tools.find_tool_uses(content_blocks)

build_message(data, text_content, tool_use_blocks, response)
build_message(data, text_content, tool_use_blocks, thinking_content, signature, response)
end

def extract_thinking_content(blocks)
thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
thinking = thinking_blocks.map { |c| c['thinking'] }.join
signature = thinking_blocks.filter_map { |c| c['signature'] }.compact.first
[thinking, signature]
end

def extract_text_content(blocks)
text_blocks = blocks.select { |c| c['type'] == 'text' }
text_blocks.map { |c| c['text'] }.join
end

def build_message(data, content, tool_use_blocks, response)
def build_message(data, content, tool_use_blocks, thinking_content, signature, response) # rubocop:disable Metrics/ParameterLists
usage = data['usage'] || {}
cached_tokens = usage['cache_read_input_tokens']
cache_creation_tokens = usage['cache_creation_input_tokens']
Expand All @@ -89,6 +106,8 @@ def build_message(data, content, tool_use_blocks, response)
Message.new(
role: :assistant,
content: content,
thinking: thinking_content,
signature: signature,
tool_calls: Tools.parse_tool_calls(tool_use_blocks),
input_tokens: usage['input_tokens'],
output_tokens: usage['output_tokens'],
Expand Down
2 changes: 2 additions & 0 deletions lib/ruby_llm/providers/anthropic/streaming.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def build_chunk(data)
role: :assistant,
model_id: extract_model_id(data),
content: data.dig('delta', 'text'),
thinking: data.dig('delta', 'thinking'),
signature: data.dig('delta', 'signature'),
input_tokens: extract_input_tokens(data),
output_tokens: extract_output_tokens(data),
cached_tokens: extract_cached_tokens(data),
Expand Down
24 changes: 20 additions & 4 deletions lib/ruby_llm/providers/anthropic/tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,26 @@ def format_tool_call(msg)
content << format_tool_use_block(tool_call)
end

{
role: 'assistant',
content:
}
if msg.thinking
[
{
role: 'assistant',
content: [
{ type: 'thinking', thinking: msg.thinking, signature: msg.signature }
]

},
{
role: 'assistant',
content:
}
]
else
{
role: 'assistant',
content:
}
end
end

def format_tool_result(msg)
Expand Down
Loading