Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 237 additions & 22 deletions ext/string_view/string_view.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,220 @@ SV_INLINE int sv_single_byte_optimizable(string_view_t *sv);
SV_INLINE int sv_is_utf8(string_view_t *sv);
static long sv_utf8_char_count(const char *p, long len);

/*
* Initialize (or reinitialize) a string_view_t's fields from a frozen backing
* string. Caller is responsible for freeing any prior stride_idx.
*/
static void sv_init_fields(VALUE obj, string_view_t *sv, VALUE backing,
const char *base, rb_encoding *enc,
long offset, long length) {
RB_OBJ_WRITE(obj, &sv->backing, backing);
sv->base = base;
sv->enc = enc;
sv->offset = offset;
sv->length = length;
sv->single_byte = sv_compute_single_byte(backing, enc);
sv->charlen = -1;
sv->stride_idx = NULL;
}

/* ========================================================================= */
/* StringView::Pool */
/* ========================================================================= */

static VALUE cStringViewPool;

#define POOL_INITIAL_CAP 32
#define POOL_MAX_GROW 4096

typedef struct {
VALUE backing; /* frozen String that owns the bytes */
const char *base; /* cached RSTRING_PTR(backing) */
rb_encoding *enc; /* cached encoding */
int single_byte; /* cached single-byte flag */
long backing_len; /* cached RSTRING_LEN(backing) */
VALUE views; /* Ruby Array of pre-allocated StringView objects */
long next_idx; /* index of next available view in the array */
long capacity; /* current size of the views array */
} sv_pool_t;

static void pool_mark(void *ptr) {
sv_pool_t *pool = (sv_pool_t *)ptr;
if (pool->backing != Qnil)
rb_gc_mark_movable(pool->backing);
if (pool->views != Qnil)
rb_gc_mark_movable(pool->views);
}

static void pool_compact(void *ptr) {
sv_pool_t *pool = (sv_pool_t *)ptr;
if (pool->backing != Qnil) {
pool->backing = rb_gc_location(pool->backing);
pool->base = RSTRING_PTR(pool->backing);
}
if (pool->views != Qnil) {
pool->views = rb_gc_location(pool->views);
}
}

static size_t pool_memsize(const void *ptr) {
const sv_pool_t *pool = (const sv_pool_t *)ptr;
size_t size = sizeof(sv_pool_t);
/* Each pre-allocated view is a separate GC object with a string_view_t
* struct. Report their cost here so ObjectSpace.memsize_of gives a
* realistic picture of the pool's total footprint. */
size += (size_t)pool->capacity * sizeof(string_view_t);
return size;
}

static const rb_data_type_t pool_type = {
.wrap_struct_name = "StringView::Pool",
.function = { .dmark = pool_mark, .dfree = RUBY_DEFAULT_FREE,
.dsize = pool_memsize, .dcompact = pool_compact },
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
};

/*
* Allocate a batch of StringView objects pre-initialized with the pool's
* backing string. They start with offset=0, length=0 (empty views).
* The `.view()` method sets the real offset+length before returning.
*/
static void pool_grow(sv_pool_t *pool, VALUE pool_obj) {
long grow = pool->capacity == 0 ? POOL_INITIAL_CAP : pool->capacity;
if (grow > POOL_MAX_GROW) grow = POOL_MAX_GROW;
long new_cap = pool->capacity + grow;
long old_cap = pool->capacity;

/* Grow the Ruby Array to hold the new views */
for (long i = old_cap; i < new_cap; i++) {
string_view_t *sv;
VALUE obj = TypedData_Make_Struct(cStringView, string_view_t,
&string_view_type, sv);
sv_init_fields(obj, sv, pool->backing, pool->base, pool->enc, 0, 0);
rb_ary_push(pool->views, obj);
}

pool->capacity = new_cap;
}

/*
* Pool.new(string) → Pool
*/
static VALUE pool_initialize(VALUE self, VALUE str) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);

if (!RB_TYPE_P(str, T_STRING)) {
rb_raise(rb_eTypeError,
"no implicit conversion of %s into String",
rb_obj_classname(str));
}

rb_str_freeze(str);

RB_OBJ_WRITE(self, &pool->backing, str);
pool->base = RSTRING_PTR(str);
pool->enc = rb_enc_get(str);
pool->single_byte = sv_compute_single_byte(str, pool->enc);
pool->backing_len = RSTRING_LEN(str);

/* Create the views array and pre-allocate the initial batch */
VALUE ary = rb_ary_new_capa(POOL_INITIAL_CAP);
RB_OBJ_WRITE(self, &pool->views, ary);
pool->next_idx = 0;
pool->capacity = 0;

pool_grow(pool, self);

return self;
}

static VALUE pool_alloc(VALUE klass) {
sv_pool_t *pool;
VALUE obj = TypedData_Make_Struct(klass, sv_pool_t, &pool_type, pool);
pool->backing = Qnil;
pool->base = NULL;
pool->enc = NULL;
pool->single_byte = -1;
pool->backing_len = 0;
pool->views = Qnil;
pool->next_idx = 0;
pool->capacity = 0;
return obj;
}

/*
* pool.view(byte_offset, byte_length) → StringView
*
* Returns a pre-allocated StringView pointed at the given byte range.
* If the pool is exhausted, grows exponentially before returning.
*/
static VALUE pool_view(VALUE self, VALUE voffset, VALUE vlength) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
long off = NUM2LONG(voffset);
long len = NUM2LONG(vlength);

if (SV_UNLIKELY(off < 0 || len < 0 || off > pool->backing_len ||
len > pool->backing_len - off)) {
rb_raise(rb_eArgError,
"offset %ld, length %ld out of range for string of bytesize %ld",
off, len, pool->backing_len);
}

/* Grow if exhausted */
if (SV_UNLIKELY(pool->next_idx >= pool->capacity)) {
pool_grow(pool, self);
}

/* Grab the next pre-allocated view and set its range */
VALUE view = RARRAY_AREF(pool->views, pool->next_idx);
pool->next_idx++;

string_view_t *sv = (string_view_t *)RTYPEDDATA_GET_DATA(view);
sv->offset = off;
sv->length = len;
sv->charlen = -1; /* invalidate cached char count */
sv->stride_idx = NULL; /* invalidate stride index */

return view;
}

/*
* pool.size → Integer
* Number of views handed out so far.
*/
static VALUE pool_size(VALUE self) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
return LONG2NUM(pool->next_idx);
}

/*
* pool.capacity → Integer
* Current number of pre-allocated view slots.
*/
static VALUE pool_capacity(VALUE self) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
return LONG2NUM(pool->capacity);
}

/*
* pool.reset! → self
* Reset the cursor to 0, allowing all pre-allocated views to be reused.
* Previously returned views become invalid (their offsets may be overwritten).
*/
static VALUE pool_reset(VALUE self) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
pool->next_idx = 0;
return self;
}

/*
* pool.backing → String (frozen)
*/
static VALUE pool_backing(VALUE self) {
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
return pool->backing;
}

/* ========================================================================= */
/* Internal helpers */
/* ========================================================================= */
Expand Down Expand Up @@ -208,23 +422,17 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
offset = NUM2LONG(voffset);
length = NUM2LONG(vlength);

if (offset < 0 || length < 0 || offset + length > backing_len) {
if (offset < 0 || length < 0 || offset > backing_len ||
length > backing_len - offset) {
rb_raise(rb_eArgError,
"offset %ld, length %ld out of range for string of bytesize %ld",
offset, length, backing_len);
}
}

string_view_t *sv = sv_get_struct(self);
rb_encoding *enc = rb_enc_get(str);
RB_OBJ_WRITE(self, &sv->backing, str);
sv->base = RSTRING_PTR(str);
sv->enc = enc;
sv->offset = offset;
sv->length = length;
sv->single_byte = sv_compute_single_byte(str, enc);
sv->charlen = -1;
sv->stride_idx = NULL;
sv_init_fields(self, sv, str, RSTRING_PTR(str), rb_enc_get(str),
offset, length);

/*
* We intentionally do NOT freeze self. StringView blocks content
Expand Down Expand Up @@ -283,28 +491,22 @@ static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlengt
long len = NUM2LONG(vlength);
long backing_len = RSTRING_LEN(new_backing);

if (off < 0 || len < 0 || off + len > backing_len) {
if (off < 0 || len < 0 || off > backing_len ||
len > backing_len - off) {
rb_raise(rb_eArgError,
"offset %ld, length %ld out of range for string of bytesize %ld",
off, len, backing_len);
}

rb_encoding *enc = rb_enc_get(new_backing);
RB_OBJ_WRITE(self, &sv->backing, new_backing);
sv->base = RSTRING_PTR(new_backing);
sv->enc = enc;
sv->offset = off;
sv->length = len;
sv->single_byte = sv_compute_single_byte(new_backing, enc);
sv->charlen = -1;

/* Free old stride index to avoid memory leak on reuse */
/* Free old stride index before reinitializing */
if (sv->stride_idx) {
xfree(sv->stride_idx->offsets);
xfree(sv->stride_idx);
sv->stride_idx = NULL;
}

sv_init_fields(self, sv, new_backing, RSTRING_PTR(new_backing),
rb_enc_get(new_backing), off, len);

return self;
}

Expand Down Expand Up @@ -1934,4 +2136,17 @@ void Init_string_view(void) {
* materialize is the EXPLICIT escape hatch (inherited from StringView,
* defined as a separate method pointing at sv_to_s). */
rb_define_method(cStringViewStrict, "to_s", sv_would_allocate, -1);

/* =================================================================== */
/* StringView::Pool */
/* =================================================================== */

cStringViewPool = rb_define_class_under(cStringView, "Pool", rb_cObject);
rb_define_alloc_func(cStringViewPool, pool_alloc);
rb_define_method(cStringViewPool, "initialize", pool_initialize, 1);
rb_define_method(cStringViewPool, "view", pool_view, 2);
rb_define_method(cStringViewPool, "size", pool_size, 0);
rb_define_method(cStringViewPool, "capacity", pool_capacity, 0);
rb_define_method(cStringViewPool, "reset!", pool_reset, 0);
rb_define_method(cStringViewPool, "backing", pool_backing, 0);
}
Loading
Loading