From 33c5230cac9a817d547dbf707527f3f0cbd6cb8c Mon Sep 17 00:00:00 2001 From: Ufuk Kayserilioglu Date: Thu, 19 Mar 2026 21:58:08 +0200 Subject: [PATCH] Enforce frozen backing strings for StringView, Pool, and reset! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace rb_str_freeze (implicit freeze) with sv_check_frozen_string (explicit check) — backing strings must now be frozen before being passed to StringView.new, Pool.new, or reset!. This prevents silent mutation of strings that views point into. - Merge sv_check_string + sv_check_frozen into sv_check_frozen_string - Update all tests to pass frozen strings - Update README to document the frozen requirement --- README.md | 9 ++++---- ext/string_view/string_view.c | 6 ++--- ext/string_view/string_view.h | 9 ++++++-- ext/string_view/string_view_pool.c | 3 +-- test/parsing/sexp_parser.rb | 3 ++- test/test_pool.rb | 13 +++++------ test/test_string_view.rb | 36 ++++++++++++++---------------- 7 files changed, 40 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 180bba2..3981444 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ StringView methods are organized into three tiers based on their allocation beha ### Design decisions +- **Backing strings must be frozen.** `StringView.new`, `reset!`, and `Pool.new` all raise `FrozenError` if the string is not already frozen. This guarantees the backing bytes cannot be mutated or reallocated while views point into them. Use `String#view` (via `require "string_view/core_ext"`) if you want automatic freezing. - **`to_str` is private.** Ruby's internal coercion protocol (`rb_check_string_type`) ignores method visibility, so StringView works seamlessly with `Regexp#=~`, string interpolation, and `String#+`. But `respond_to?(:to_str)` returns `false`, so code that explicitly checks for string-like objects won't treat StringView as a drop-in String replacement. When coercion happens, it returns a frozen shared string (zero-copy for heap-allocated backings). - **All bang methods raise `FrozenError`.** StringView is immutable — `upcase!`, `gsub!`, `slice!`, etc. all raise immediately. - **`method_missing` is a safety net.** Any String method not yet implemented natively raises `NotImplementedError` with a message telling you to call `.to_s.method_name(...)` explicitly. No silent fallback. @@ -151,7 +152,7 @@ The gem includes a C extension that compiles during installation. It requires Ru ```ruby require "string_view" -# Create from a String (freezes the backing automatically) +# Create from a frozen String sv = StringView.new("Hello, world!") # Slicing returns StringView — zero copy @@ -179,7 +180,7 @@ sv.split(", ") # => ["Hello", "world!"] (Array of Strings) ```ruby # Simulate a large HTTP response or log chunk -buffer = File.read("large_file.txt") +buffer = File.read("large_file.txt").freeze sv = StringView.new(buffer) # Extract fields without copying the entire buffer @@ -196,7 +197,7 @@ body.match?(/\d{4}-\d{2}-\d{2}/) # Regex on the view ```ruby sv = StringView.new("initial content") -# Re-point at a different backing string +# Re-point at a different (frozen) backing string new_data = "different content" sv.reset!(new_data, 0, new_data.bytesize) sv.to_s # => "different content" @@ -250,7 +251,7 @@ For UTF-8 strings with actual multibyte content, StringView uses two techniques: ### Compilation -The extension is compiled with `-O3` and uses `__attribute__((always_inline))` on hot paths, `RTYPEDDATA_GET_DATA` for fast struct access (skipping type checks), and `FL_SET_RAW` for freeze (bypassing method dispatch). +The extension is compiled with `-O3` and uses `__attribute__((always_inline))` on hot paths and `RTYPEDDATA_GET_DATA` for fast struct access (skipping type checks). ## Development diff --git a/ext/string_view/string_view.c b/ext/string_view/string_view.c index 2e86fc4..dc27d81 100644 --- a/ext/string_view/string_view.c +++ b/ext/string_view/string_view.c @@ -155,8 +155,7 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) { rb_scan_args(argc, argv, "12", &str, &voffset, &vlength); - sv_check_string(str); - rb_str_freeze(str); + sv_check_frozen_string(str); long backing_len = RSTRING_LEN(str); @@ -218,8 +217,7 @@ static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlengt rb_check_frozen(self); string_view_t *sv = sv_get_struct(self); - sv_check_string(new_backing); - rb_str_freeze(new_backing); + sv_check_frozen_string(new_backing); long off = NUM2LONG(voffset); long len = NUM2LONG(vlength); diff --git a/ext/string_view/string_view.h b/ext/string_view/string_view.h index c007057..56b15b1 100644 --- a/ext/string_view/string_view.h +++ b/ext/string_view/string_view.h @@ -58,13 +58,18 @@ extern const rb_data_type_t string_view_type; /* Forward-declared helpers (defined in string_view.c) */ int sv_compute_single_byte(VALUE backing, rb_encoding *enc); -/* Validate that str is a T_STRING, raise TypeError otherwise. */ -SV_INLINE void sv_check_string(VALUE str) { +/* Validate that str is a frozen T_STRING. Raises TypeError if not a + * String, FrozenError if not frozen. */ +SV_INLINE void sv_check_frozen_string(VALUE str) { if (SV_UNLIKELY(!RB_TYPE_P(str, T_STRING))) { rb_raise(rb_eTypeError, "no implicit conversion of %s into String", rb_obj_classname(str)); } + if (SV_UNLIKELY(!OBJ_FROZEN(str))) { + rb_raise(rb_eFrozenError, + "string must be frozen; call .freeze before creating a view"); + } } /* Validate byte offset + length against a backing string's bytesize. diff --git a/ext/string_view/string_view_pool.c b/ext/string_view/string_view_pool.c index 9b78dd4..873bad5 100644 --- a/ext/string_view/string_view_pool.c +++ b/ext/string_view/string_view_pool.c @@ -84,8 +84,7 @@ static void pool_grow(sv_pool_t *pool, VALUE pool_obj) { */ static VALUE pool_initialize(VALUE self, VALUE str) { sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self); - sv_check_string(str); - rb_str_freeze(str); + sv_check_frozen_string(str); RB_OBJ_WRITE(self, &pool->backing, str); pool->base = RSTRING_PTR(str); diff --git a/test/parsing/sexp_parser.rb b/test/parsing/sexp_parser.rb index 1e07f15..a1987cb 100644 --- a/test/parsing/sexp_parser.rb +++ b/test/parsing/sexp_parser.rb @@ -71,10 +71,11 @@ def initialize(source, mode: :strict) when String then source else source.to_s end + raw = raw.freeze case mode when :string - @source = raw.freeze + @source = raw @slicer = ->(off, len) { @source.byteslice(off, len) } when :string_view @source = StringView.new(raw) diff --git a/test/test_pool.rb b/test/test_pool.rb index 9a2cdb8..507a1e3 100644 --- a/test/test_pool.rb +++ b/test/test_pool.rb @@ -77,10 +77,9 @@ def test_new assert_instance_of(StringView::Pool, pool) end - def test_new_freezes_backing + def test_new_requires_frozen_backing str = +"mutable" - StringView::Pool.new(str) - assert_predicate(str, :frozen?) + assert_raises(FrozenError) { StringView::Pool.new(str) } end def test_new_requires_string @@ -391,7 +390,7 @@ def test_view_slicing # --------------------------------------------------------------------------- def test_pool_survives_gc - pool = StringView::Pool.new(+"hello world") + pool = StringView::Pool.new("hello world") v = pool.view(0, 5) GC.start GC.start @@ -502,7 +501,7 @@ def test_new_with_frozen_string end def test_new_with_binary_string - str = "\x00\x01\xFF\xFE".b + str = "\x00\x01\xFF\xFE".b.freeze pool = StringView::Pool.new(str) v = pool.view(0, 4) assert_equal(Encoding::ASCII_8BIT, v.encoding) @@ -987,7 +986,7 @@ def test_alternating_small_and_large_iterations # --------------------------------------------------------------------------- def test_gc_during_pool_use - pool = StringView::Pool.new(+"test buffer for gc") + pool = StringView::Pool.new("test buffer for gc") views = [] 10.times do views << pool.view(0, 4) @@ -997,7 +996,7 @@ def test_gc_during_pool_use end def test_pool_and_views_survive_compaction - pool = StringView::Pool.new(+"compact me") + pool = StringView::Pool.new("compact me") v = pool.view(0, 7) GC.auto_compact = true GC.start diff --git a/test/test_string_view.rb b/test/test_string_view.rb index 306624a..af37dd3 100644 --- a/test/test_string_view.rb +++ b/test/test_string_view.rb @@ -16,10 +16,9 @@ def test_new_from_string assert_instance_of(StringView, sv) end - def test_new_freezes_backing_string + def test_new_requires_frozen_backing str = +"mutable" - StringView.new(str) - assert_predicate(str, :frozen?) + assert_raises(FrozenError) { StringView.new(str) } end def test_new_with_frozen_string @@ -132,12 +131,12 @@ def test_empty_on_zero_length_slice end def test_encoding - sv = StringView.new("hello".encode("UTF-8")) + sv = StringView.new("hello".encode("UTF-8").freeze) assert_equal(Encoding::UTF_8, sv.encoding) end def test_encoding_preserves_source - str = "hello".encode("ASCII") + str = "hello".encode("ASCII").freeze sv = StringView.new(str) assert_equal(Encoding::US_ASCII, sv.encoding) end @@ -875,7 +874,7 @@ def test_valid_encoding_true end def test_valid_encoding_invalid - bad = "\xFF\xFE".b.force_encoding("UTF-8") + bad = "\xFF\xFE".b.force_encoding("UTF-8").freeze sv = StringView.new(bad) refute_predicate(sv, :valid_encoding?) end @@ -1130,11 +1129,10 @@ def test_reset_basic assert_equal("goodbye", sv.to_s) end - def test_reset_freezes_new_backing + def test_reset_requires_frozen_backing sv = StringView.new("hello") new_backing = +"mutable string" - sv.reset!(new_backing, 0, 14) - assert_predicate(new_backing, :frozen?) + assert_raises(FrozenError) { sv.reset!(new_backing, 0, 14) } end def test_reset_with_offset_and_length @@ -1202,7 +1200,7 @@ def test_materialize_is_alias_for_to_s def test_scrub # Invalid UTF-8 byte sequence - bad = "hello \xFF world".b.force_encoding("UTF-8") + bad = "hello \xFF world".b.force_encoding("UTF-8").freeze sv = StringView.new(bad) result = sv.scrub("?") assert_instance_of(String, result) @@ -1332,26 +1330,26 @@ def test_aref_endless_range # --------------------------------------------------------------------------- def test_binary_encoding - str = "\x00\x01\x02\xFF\xFE".b + str = "\x00\x01\x02\xFF\xFE".b.freeze sv = StringView.new(str) assert_equal(Encoding::ASCII_8BIT, sv.encoding) end def test_binary_bytesize - str = "\x00\x01\x02\xFF\xFE".b + str = "\x00\x01\x02\xFF\xFE".b.freeze sv = StringView.new(str) assert_equal(5, sv.bytesize) end def test_binary_length_equals_bytesize - str = "\x00\x01\x02\xFF\xFE".b + str = "\x00\x01\x02\xFF\xFE".b.freeze sv = StringView.new(str) # For binary encoding, char == byte assert_equal(sv.bytesize, sv.length) end def test_binary_getbyte - str = "\x00\x01\xFF".b + str = "\x00\x01\xFF".b.freeze sv = StringView.new(str) assert_equal(0, sv.getbyte(0)) assert_equal(1, sv.getbyte(1)) @@ -1359,7 +1357,7 @@ def test_binary_getbyte end def test_binary_slice - str = "\x00\x01\x02\x03\x04\x05".b + str = "\x00\x01\x02\x03\x04\x05".b.freeze sv = StringView.new(str) result = sv[2, 3] assert_instance_of(StringView, result) @@ -1367,14 +1365,14 @@ def test_binary_slice end def test_binary_include - str = "\x00\x01\x02\xFF\xFE".b + str = "\x00\x01\x02\xFF\xFE".b.freeze sv = StringView.new(str) assert_includes(sv, "\xFF".b) refute_includes(sv, "\xAA".b) end def test_binary_each_byte - str = "\x00\xFF".b + str = "\x00\xFF".b.freeze sv = StringView.new(str) bytes = [] sv.each_byte { |b| bytes << b } @@ -1586,7 +1584,7 @@ def test_cmp_with_integer # --------------------------------------------------------------------------- def test_view_survives_gc_without_external_reference - sv = StringView.new(+"hello world") + sv = StringView.new("hello world") GC.start GC.start # View keeps backing alive — still works @@ -1596,7 +1594,7 @@ def test_view_survives_gc_without_external_reference end def test_multiple_views_into_same_backing - str = +"shared backing string" + str = "shared backing string" sv1 = StringView.new(str) sv2 = StringView.new(str, 7, 7) # "backing"