Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ StringView methods are organized into three tiers based on their allocation beha

### Design decisions

- **Backing strings must be frozen.** `StringView.new`, `reset!`, and `Pool.new` all raise `FrozenError` if the string is not already frozen. This guarantees the backing bytes cannot be mutated or reallocated while views point into them. Use `String#view` (via `require "string_view/core_ext"`) if you want automatic freezing.
- **`to_str` is private.** Ruby's internal coercion protocol (`rb_check_string_type`) ignores method visibility, so StringView works seamlessly with `Regexp#=~`, string interpolation, and `String#+`. But `respond_to?(:to_str)` returns `false`, so code that explicitly checks for string-like objects won't treat StringView as a drop-in String replacement. When coercion happens, it returns a frozen shared string (zero-copy for heap-allocated backings).
- **All bang methods raise `FrozenError`.** StringView is immutable — `upcase!`, `gsub!`, `slice!`, etc. all raise immediately.
- **`method_missing` is a safety net.** Any String method not yet implemented natively raises `NotImplementedError` with a message telling you to call `.to_s.method_name(...)` explicitly. No silent fallback.
Expand Down Expand Up @@ -151,7 +152,7 @@ The gem includes a C extension that compiles during installation. It requires Ru
```ruby
require "string_view"

# Create from a String (freezes the backing automatically)
# Create from a frozen String
sv = StringView.new("Hello, world!")

# Slicing returns StringView — zero copy
Expand Down Expand Up @@ -179,7 +180,7 @@ sv.split(", ") # => ["Hello", "world!"] (Array of Strings)

```ruby
# Simulate a large HTTP response or log chunk
buffer = File.read("large_file.txt")
buffer = File.read("large_file.txt").freeze
sv = StringView.new(buffer)

# Extract fields without copying the entire buffer
Expand All @@ -196,7 +197,7 @@ body.match?(/\d{4}-\d{2}-\d{2}/) # Regex on the view
```ruby
sv = StringView.new("initial content")

# Re-point at a different backing string
# Re-point at a different (frozen) backing string
new_data = "different content"
sv.reset!(new_data, 0, new_data.bytesize)
sv.to_s # => "different content"
Expand Down Expand Up @@ -250,7 +251,7 @@ For UTF-8 strings with actual multibyte content, StringView uses two techniques:

### Compilation

The extension is compiled with `-O3` and uses `__attribute__((always_inline))` on hot paths, `RTYPEDDATA_GET_DATA` for fast struct access (skipping type checks), and `FL_SET_RAW` for freeze (bypassing method dispatch).
The extension is compiled with `-O3` and uses `__attribute__((always_inline))` on hot paths and `RTYPEDDATA_GET_DATA` for fast struct access (skipping type checks).

## Development

Expand Down
6 changes: 2 additions & 4 deletions ext/string_view/string_view.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {

rb_scan_args(argc, argv, "12", &str, &voffset, &vlength);

sv_check_string(str);
rb_str_freeze(str);
sv_check_frozen_string(str);

long backing_len = RSTRING_LEN(str);

Expand Down Expand Up @@ -218,8 +217,7 @@ static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlengt
rb_check_frozen(self);
string_view_t *sv = sv_get_struct(self);

sv_check_string(new_backing);
rb_str_freeze(new_backing);
sv_check_frozen_string(new_backing);

long off = NUM2LONG(voffset);
long len = NUM2LONG(vlength);
Expand Down
9 changes: 7 additions & 2 deletions ext/string_view/string_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,18 @@ extern const rb_data_type_t string_view_type;
/* Forward-declared helpers (defined in string_view.c) */
int sv_compute_single_byte(VALUE backing, rb_encoding *enc);

/* Validate that str is a T_STRING, raise TypeError otherwise. */
SV_INLINE void sv_check_string(VALUE str) {
/* Validate that str is a frozen T_STRING. Raises TypeError if not a
* String, FrozenError if not frozen. */
SV_INLINE void sv_check_frozen_string(VALUE str) {
if (SV_UNLIKELY(!RB_TYPE_P(str, T_STRING))) {
rb_raise(rb_eTypeError,
"no implicit conversion of %s into String",
rb_obj_classname(str));
}
if (SV_UNLIKELY(!OBJ_FROZEN(str))) {
rb_raise(rb_eFrozenError,
"string must be frozen; call .freeze before creating a view");
}
}

/* Validate byte offset + length against a backing string's bytesize.
Expand Down
3 changes: 1 addition & 2 deletions ext/string_view/string_view_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ static void pool_grow(sv_pool_t *pool, VALUE pool_obj) {
*/
static VALUE pool_initialize(VALUE self, VALUE str) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
sv_check_string(str);
rb_str_freeze(str);
sv_check_frozen_string(str);

RB_OBJ_WRITE(self, &pool->backing, str);
pool->base = RSTRING_PTR(str);
Expand Down
3 changes: 2 additions & 1 deletion test/parsing/sexp_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ def initialize(source, mode: :strict)
when String then source
else source.to_s
end
raw = raw.freeze

case mode
when :string
@source = raw.freeze
@source = raw
@slicer = ->(off, len) { @source.byteslice(off, len) }
when :string_view
@source = StringView.new(raw)
Expand Down
13 changes: 6 additions & 7 deletions test/test_pool.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,9 @@ def test_new
assert_instance_of(StringView::Pool, pool)
end

def test_new_freezes_backing
def test_new_requires_frozen_backing
str = +"mutable"
StringView::Pool.new(str)
assert_predicate(str, :frozen?)
assert_raises(FrozenError) { StringView::Pool.new(str) }
end

def test_new_requires_string
Expand Down Expand Up @@ -391,7 +390,7 @@ def test_view_slicing
# ---------------------------------------------------------------------------

def test_pool_survives_gc
pool = StringView::Pool.new(+"hello world")
pool = StringView::Pool.new("hello world")
v = pool.view(0, 5)
GC.start
GC.start
Expand Down Expand Up @@ -502,7 +501,7 @@ def test_new_with_frozen_string
end

def test_new_with_binary_string
str = "\x00\x01\xFF\xFE".b
str = "\x00\x01\xFF\xFE".b.freeze
pool = StringView::Pool.new(str)
v = pool.view(0, 4)
assert_equal(Encoding::ASCII_8BIT, v.encoding)
Expand Down Expand Up @@ -987,7 +986,7 @@ def test_alternating_small_and_large_iterations
# ---------------------------------------------------------------------------

def test_gc_during_pool_use
pool = StringView::Pool.new(+"test buffer for gc")
pool = StringView::Pool.new("test buffer for gc")
views = []
10.times do
views << pool.view(0, 4)
Expand All @@ -997,7 +996,7 @@ def test_gc_during_pool_use
end

def test_pool_and_views_survive_compaction
pool = StringView::Pool.new(+"compact me")
pool = StringView::Pool.new("compact me")
v = pool.view(0, 7)
GC.auto_compact = true
GC.start
Expand Down
36 changes: 17 additions & 19 deletions test/test_string_view.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ def test_new_from_string
assert_instance_of(StringView, sv)
end

def test_new_freezes_backing_string
def test_new_requires_frozen_backing
str = +"mutable"
StringView.new(str)
assert_predicate(str, :frozen?)
assert_raises(FrozenError) { StringView.new(str) }
end

def test_new_with_frozen_string
Expand Down Expand Up @@ -132,12 +131,12 @@ def test_empty_on_zero_length_slice
end

def test_encoding
sv = StringView.new("hello".encode("UTF-8"))
sv = StringView.new("hello".encode("UTF-8").freeze)
assert_equal(Encoding::UTF_8, sv.encoding)
end

def test_encoding_preserves_source
str = "hello".encode("ASCII")
str = "hello".encode("ASCII").freeze
sv = StringView.new(str)
assert_equal(Encoding::US_ASCII, sv.encoding)
end
Expand Down Expand Up @@ -875,7 +874,7 @@ def test_valid_encoding_true
end

def test_valid_encoding_invalid
bad = "\xFF\xFE".b.force_encoding("UTF-8")
bad = "\xFF\xFE".b.force_encoding("UTF-8").freeze
sv = StringView.new(bad)
refute_predicate(sv, :valid_encoding?)
end
Expand Down Expand Up @@ -1130,11 +1129,10 @@ def test_reset_basic
assert_equal("goodbye", sv.to_s)
end

def test_reset_freezes_new_backing
def test_reset_requires_frozen_backing
sv = StringView.new("hello")
new_backing = +"mutable string"
sv.reset!(new_backing, 0, 14)
assert_predicate(new_backing, :frozen?)
assert_raises(FrozenError) { sv.reset!(new_backing, 0, 14) }
end

def test_reset_with_offset_and_length
Expand Down Expand Up @@ -1202,7 +1200,7 @@ def test_materialize_is_alias_for_to_s

def test_scrub
# Invalid UTF-8 byte sequence
bad = "hello \xFF world".b.force_encoding("UTF-8")
bad = "hello \xFF world".b.force_encoding("UTF-8").freeze
sv = StringView.new(bad)
result = sv.scrub("?")
assert_instance_of(String, result)
Expand Down Expand Up @@ -1332,49 +1330,49 @@ def test_aref_endless_range
# ---------------------------------------------------------------------------

def test_binary_encoding
str = "\x00\x01\x02\xFF\xFE".b
str = "\x00\x01\x02\xFF\xFE".b.freeze
sv = StringView.new(str)
assert_equal(Encoding::ASCII_8BIT, sv.encoding)
end

def test_binary_bytesize
str = "\x00\x01\x02\xFF\xFE".b
str = "\x00\x01\x02\xFF\xFE".b.freeze
sv = StringView.new(str)
assert_equal(5, sv.bytesize)
end

def test_binary_length_equals_bytesize
str = "\x00\x01\x02\xFF\xFE".b
str = "\x00\x01\x02\xFF\xFE".b.freeze
sv = StringView.new(str)
# For binary encoding, char == byte
assert_equal(sv.bytesize, sv.length)
end

def test_binary_getbyte
str = "\x00\x01\xFF".b
str = "\x00\x01\xFF".b.freeze
sv = StringView.new(str)
assert_equal(0, sv.getbyte(0))
assert_equal(1, sv.getbyte(1))
assert_equal(255, sv.getbyte(2))
end

def test_binary_slice
str = "\x00\x01\x02\x03\x04\x05".b
str = "\x00\x01\x02\x03\x04\x05".b.freeze
sv = StringView.new(str)
result = sv[2, 3]
assert_instance_of(StringView, result)
assert_equal("\x02\x03\x04".b, result.to_s)
end

def test_binary_include
str = "\x00\x01\x02\xFF\xFE".b
str = "\x00\x01\x02\xFF\xFE".b.freeze
sv = StringView.new(str)
assert_includes(sv, "\xFF".b)
refute_includes(sv, "\xAA".b)
end

def test_binary_each_byte
str = "\x00\xFF".b
str = "\x00\xFF".b.freeze
sv = StringView.new(str)
bytes = []
sv.each_byte { |b| bytes << b }
Expand Down Expand Up @@ -1586,7 +1584,7 @@ def test_cmp_with_integer
# ---------------------------------------------------------------------------

def test_view_survives_gc_without_external_reference
sv = StringView.new(+"hello world")
sv = StringView.new("hello world")
GC.start
GC.start
# View keeps backing alive — still works
Expand All @@ -1596,7 +1594,7 @@ def test_view_survives_gc_without_external_reference
end

def test_multiple_views_into_same_backing
str = +"shared backing string"
str = "shared backing string"
sv1 = StringView.new(str)
sv2 = StringView.new(str, 7, 7) # "backing"

Expand Down
Loading