Singulate-AI · jkogara · Oct 30, 2025 · May 14, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
@@ -16,7 +16,9 @@ def initialize(model: nil, provider: nil, assume_model_exists: false, context: n
       @config = context&.config || RubyLLM.config
       model_id = model || @config.default_model
       with_model(model_id, provider: provider, assume_exists: assume_model_exists)
-      @temperature = nil
+      @thinking = @config.default_thinking
+      @thinking_budget = @config.default_thinking_budget
+      @temperature = @config.default_temperature
       @messages = []
       @tools = {}
       @params = {}
@@ -58,7 +60,9 @@ def with_tools(*tools, replace: false)
 
     def with_model(model_id, provider: nil, assume_exists: false)
       @model, @provider = Models.resolve(model_id, provider:, assume_exists:, config: @config)
+      # # TODO: Currently the unsupported errors will not retrigger after model reassignment.
       @connection = @provider.connection
+
       self
     end
 
@@ -67,6 +71,18 @@ def with_temperature(temperature)
       self
     end
 
+    def with_thinking(thinking: true, budget: nil, temperature: 1)
+      raise UnsupportedThinkingError, "Model #{@model.id} doesn't support thinking" if thinking && !@model.thinking?
+
+      @thinking = thinking
+
+      # Most thinking models require set temperature so force it 1 here, however allowing override via param.
+      @temperature = temperature
+      @thinking_budget = budget if budget
+
+      self
+    end
+
     def with_context(context)
       @context = context
       @config = context.config
@@ -127,6 +143,8 @@ def complete(&) # rubocop:disable Metrics/PerceivedComplexity
         tools: @tools,
         temperature: @temperature,
         model: @model,
+        thinking: @thinking,
+        thinking_budget: @thinking_budget,
         params: @params,
         headers: @headers,
         schema: @schema,
@@ -163,6 +181,10 @@ def reset_messages!
       @messages.clear
     end
 
+    def thinking?
+      @thinking
+    end
+
     def instance_variables
       super - %i[@connection @config]
     end

diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb
@@ -35,6 +35,10 @@ class Configuration
                   :model_registry_class,
                   # Rails integration
                   :use_new_acts_as,
+                  # Default model settings
+                  :default_temperature,
+                  :default_thinking,
+                  :default_thinking_budget,
                   # Connection configuration
                   :request_timeout,
                   :max_retries,
@@ -66,6 +70,12 @@ def initialize
       @model_registry_class = 'Model'
       @use_new_acts_as = false
 
+      # Default model settings
+      @default_thinking = false
+      @default_thinking_budget = 2048
+      @default_temperature = 0.7
+
+      # Logging configuration
       @log_file = $stdout
       @log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO
       @log_stream_debug = ENV['RUBYLLM_STREAM_DEBUG'] == 'true'

diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb
@@ -17,6 +17,7 @@ class ConfigurationError < StandardError; end
   class InvalidRoleError < StandardError; end
   class ModelNotFoundError < StandardError; end
   class UnsupportedAttachmentError < StandardError; end
+  class UnsupportedThinkingError < StandardError; end
 
   # Error classes for different HTTP status codes
   class BadRequestError < Error; end

diff --git a/lib/ruby_llm/message.rb b/lib/ruby_llm/message.rb
@@ -6,12 +6,14 @@ class Message
     ROLES = %i[system user assistant tool].freeze
 
     attr_reader :role, :model_id, :tool_calls, :tool_call_id, :input_tokens, :output_tokens,
-                :cached_tokens, :cache_creation_tokens, :raw
+                :cached_tokens, :cache_creation_tokens, :raw, :thinking, :signature
     attr_writer :content
 
     def initialize(options = {})
       @role = options.fetch(:role).to_sym
       @content = normalize_content(options.fetch(:content))
+      @thinking = options[:thinking]
+      @signature = options[:signature]
       @model_id = options[:model_id]
       @tool_calls = options[:tool_calls]
       @tool_call_id = options[:tool_call_id]

diff --git a/lib/ruby_llm/model/info.rb b/lib/ruby_llm/model/info.rb
@@ -38,7 +38,7 @@ def supports?(capability)
         capabilities.include?(capability.to_s)
       end
 
-      %w[function_calling structured_output batch reasoning citations streaming].each do |cap|
+      %w[function_calling structured_output batch reasoning citations streaming thinking].each do |cap|
         define_method "#{cap}?" do
           supports?(cap)
         end

diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -262,7 +262,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -490,7 +491,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -521,7 +523,8 @@
       ]
     },
     "capabilities": [
-      "function_calling"
+      "function_calling",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -1896,7 +1899,12 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "structured_output",
+      "reasoning",
+      "thinking",
+      "batch",
+      "citations"
     ],
     "pricing": {
       "text_tokens": {
@@ -1946,7 +1954,9 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "structured_output",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {
@@ -1996,7 +2006,9 @@
     },
     "capabilities": [
       "streaming",
-      "function_calling"
+      "function_calling",
+      "structured_output",
+      "thinking"
     ],
     "pricing": {
       "text_tokens": {

diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb
@@ -37,7 +37,8 @@ def configuration_requirements
       self.class.configuration_requirements
     end
 
-    def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
+    def complete(messages, tools:, temperature:, model:, thinking:, thinking_budget:, # rubocop:disable Metrics/ParameterLists
+                 params: {}, headers: {}, schema: nil, &)
       normalized_temperature = maybe_normalize_temperature(temperature, model)
 
       payload = Utils.deep_merge(
@@ -46,6 +47,8 @@ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, sc
           tools: tools,
           temperature: normalized_temperature,
           model: model,
+          thinking: thinking,
+          thinking_budget: thinking_budget,
           stream: block_given?,
           schema: schema
         ),

diff --git a/lib/ruby_llm/providers/anthropic/capabilities.rb b/lib/ruby_llm/providers/anthropic/capabilities.rb
@@ -39,17 +39,20 @@ def supports_json_mode?(model_id)
         end
 
         def supports_extended_thinking?(model_id)
-          model_id.match?(/claude-3-7-sonnet/)
+          model_id.match?(/claude-3-7-sonnet|claude-sonnet-4|claude-opus-4|claude-haiku-4/)
         end
 
         def model_family(model_id)
           case model_id
+          when /claude-sonnet-4/    then 'claude-sonnet-4'
+          when /claude-opus-4/      then 'claude-opus-4'
           when /claude-3-7-sonnet/  then 'claude-3-7-sonnet'
           when /claude-3-5-sonnet/  then 'claude-3-5-sonnet'
           when /claude-3-5-haiku/   then 'claude-3-5-haiku'
           when /claude-3-opus/      then 'claude-3-opus'
           when /claude-3-sonnet/    then 'claude-3-sonnet'
           when /claude-3-haiku/     then 'claude-3-haiku'
+          when /claude-4-5-haiku/   then 'claude-4-haiku'
           else 'claude-2'
           end
         end
@@ -92,13 +95,17 @@ def modalities_for(model_id)
         def capabilities_for(model_id)
           capabilities = ['streaming']
 
-          if model_id.match?(/claude-3/)
+          if model_id.match?(/claude-3|claude-sonnet-4|claude-opus-4|claude-haiku-4/)
             capabilities << 'function_calling'
             capabilities << 'batch'
           end
 
-          capabilities << 'reasoning' if model_id.match?(/claude-3-7|-4/)
-          capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7/)
+          # Extended thinking for Claude 3.7 and Claude 4
+          capabilities << 'thinking' if supports_extended_thinking?(model_id)
+
+          # Citations
+          capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7|claude-sonnet-4|claude-opus-4/)
+
           capabilities
         end
 
@@ -116,9 +123,10 @@ def pricing_for(model_id)
             output_per_million: prices[:output] * 0.5
           }
 
-          if model_id.match?(/claude-3-7/)
-            standard_pricing[:reasoning_output_per_million] = prices[:output] * 2.5
-            batch_pricing[:reasoning_output_per_million] = prices[:output] * 1.25
+          # Add thinking output pricing for 3.7 and 4 models
+          if model_id.match?(/claude-3-7|claude-sonnet-4|claude-opus-4/)
+            standard_pricing[:thinking_output_per_million] = prices[:output] * 2.5
+            batch_pricing[:thinking_output_per_million] = prices[:output] * 1.25
           end
 
           {

diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb
@@ -11,12 +11,13 @@ def completion_url
           '/v1/messages'
         end
 
-        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
+        def render_payload(messages, tools:, temperature:, model:, thinking:, # rubocop:disable Metrics/ParameterLists
+                           thinking_budget:, stream: false, schema: nil)
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages, schema)
 
           build_base_payload(chat_messages, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:, temperature:)
+            add_optional_fields(payload, system_content:, tools:, temperature:, thinking:, thinking_budget:)
           end
         end
 
@@ -51,34 +52,50 @@ def build_system_content(system_messages, schema)
         def build_base_payload(chat_messages, model, stream)
           {
             model: model.id,
-            messages: chat_messages.map { |msg| format_message(msg) },
+            messages: chat_messages.map { |msg| format_message(msg) }.flatten,
             stream: stream,
             max_tokens: model.max_tokens || 4096
           }
         end
 
-        def add_optional_fields(payload, system_content:, tools:, temperature:)
+        def add_optional_fields(payload, system_content:, tools:, thinking:, thinking_budget:, temperature:) # rubocop:disable Metrics/ParameterLists
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
           payload[:temperature] = temperature unless temperature.nil?
+          return unless thinking
+
+          payload[:thinking] = {
+            type: 'enabled',
+            budget_tokens: thinking_budget
+          }
         end
 
         def parse_completion_response(response)
           data = response.body
+          RubyLLM.logger.debug("Anthropic response: #{data}")
+
           content_blocks = data['content'] || []
 
+          thinking_content, signature = extract_thinking_content(content_blocks)
           text_content = extract_text_content(content_blocks)
           tool_use_blocks = Tools.find_tool_uses(content_blocks)
 
-          build_message(data, text_content, tool_use_blocks, response)
+          build_message(data, text_content, tool_use_blocks, thinking_content, signature, response)
+        end
+
+        def extract_thinking_content(blocks)
+          thinking_blocks = blocks.select { |c| c['type'] == 'thinking' }
+          thinking = thinking_blocks.map { |c| c['thinking'] }.join
+          signature = thinking_blocks.filter_map { |c| c['signature'] }.compact.first
+          [thinking, signature]
         end
 
         def extract_text_content(blocks)
           text_blocks = blocks.select { |c| c['type'] == 'text' }
           text_blocks.map { |c| c['text'] }.join
         end
 
-        def build_message(data, content, tool_use_blocks, response)
+        def build_message(data, content, tool_use_blocks, thinking_content, signature, response) # rubocop:disable Metrics/ParameterLists
           usage = data['usage'] || {}
           cached_tokens = usage['cache_read_input_tokens']
           cache_creation_tokens = usage['cache_creation_input_tokens']
@@ -89,6 +106,8 @@ def build_message(data, content, tool_use_blocks, response)
           Message.new(
             role: :assistant,
             content: content,
+            thinking: thinking_content,
+            signature: signature,
             tool_calls: Tools.parse_tool_calls(tool_use_blocks),
             input_tokens: usage['input_tokens'],
             output_tokens: usage['output_tokens'],

diff --git a/lib/ruby_llm/providers/anthropic/streaming.rb b/lib/ruby_llm/providers/anthropic/streaming.rb
@@ -16,6 +16,8 @@ def build_chunk(data)
             role: :assistant,
             model_id: extract_model_id(data),
             content: data.dig('delta', 'text'),
+            thinking: data.dig('delta', 'thinking'),
+            signature: data.dig('delta', 'signature'),
             input_tokens: extract_input_tokens(data),
             output_tokens: extract_output_tokens(data),
             cached_tokens: extract_cached_tokens(data),

diff --git a/lib/ruby_llm/providers/anthropic/tools.rb b/lib/ruby_llm/providers/anthropic/tools.rb
@@ -22,10 +22,26 @@ def format_tool_call(msg)
             content << format_tool_use_block(tool_call)
           end
 
-          {
-            role: 'assistant',
-            content:
-          }
+          if msg.thinking
+            [
+              {
+                role: 'assistant',
+                content: [
+                  { type: 'thinking', thinking: msg.thinking, signature: msg.signature }
+                ]
+
+              },
+              {
+                role: 'assistant',
+                content:
+              }
+            ]
+          else
+            {
+              role: 'assistant',
+              content:
+            }
+          end
         end
 
         def format_tool_result(msg)