Skip to content

Commit

Permalink
updated Readstat sources to dev commit 6bb297c
Browse files Browse the repository at this point in the history
  • Loading branch information
ofajardo committed Jan 30, 2023
1 parent fe40f98 commit fca7dfd
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 105 deletions.
2 changes: 1 addition & 1 deletion src/readstat_convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
readstat_error_t readstat_convert(char *dst, size_t dst_len, const char *src, size_t src_len, iconv_t converter) {
/* strip off spaces from the input because the programs use ASCII space
* padding even with non-ASCII encoding. */
while (src_len && src[src_len-1] == ' ') {
while (src_len && (src[src_len-1] == ' ' || src[src_len-1] == '\0')) {
src_len--;
}
if (dst_len == 0) {
Expand Down
10 changes: 6 additions & 4 deletions src/readstat_malloc.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include <stdlib.h>

#define MAX_MALLOC_SIZE 0xFFF000
/* ~16 MB. Needs to be at least 0x3FF00, i.e. the default ~4MB block size used
* in compressed SPSS (ZSAV) files. The purpose here is to prevent massive
* allocations in the event of a malformed file or a bug in the library. */
#define MAX_MALLOC_SIZE 0x1000000
/* =16 MiB. Needs to be at least 0x3FF00, i.e. the default ~4MB block size used
* in compressed SPSS (ZSAV) files. Some SAS installations use 16MiB page sizes
* by default, see https://github.com/tidyverse/haven/issues/697.
* The purpose here is to prevent massive allocations in the event of a
* malformed file or a bug in the library. */

void *readstat_malloc(size_t len) {
if (len > MAX_MALLOC_SIZE || len == 0) {
Expand Down
33 changes: 19 additions & 14 deletions src/sas/readstat_sas7bcat_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz

/* Pass 1 -- find out the offset of the labels */
for (i=0; i<label_count_capacity; i++) {
if (&lbp1[3] - value_start > value_labels_len || lbp1[2] < 0) {
if (&lbp1[3] - value_start > value_labels_len || sas_read2(&lbp1[2], ctx->bswap) < 0) {
retval = READSTAT_ERROR_PARSE;
goto cleanup;
}
Expand All @@ -76,7 +76,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz
}
value_offset[label_pos] = lbp1 - value_start;
}
lbp1 += 6 + lbp1[2];
lbp1 += 6 + sas_read2(&lbp1[2], ctx->bswap);
}

const char *lbp2 = lbp1;
Expand All @@ -93,7 +93,7 @@ static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, siz
readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE };
char string_val[4*16+1];
if (is_string) {
size_t value_entry_len = 6 + lbp1[2];
size_t value_entry_len = 6 + sas_read2(&lbp1[2], ctx->bswap);
retval = readstat_convert(string_val, sizeof(string_val),
&lbp1[value_entry_len-16], 16, ctx->converter);
if (retval != READSTAT_OK)
Expand Down Expand Up @@ -143,18 +143,20 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size,
readstat_error_t retval = READSTAT_OK;

size_t pad = 0;
int label_count_capacity = 0;
int label_count_used = 0;
uint64_t label_count_capacity = 0;
uint64_t label_count_used = 0;
int payload_offset = 106;
uint16_t flags = 0;
char name[4*32+1];

if (data_size < payload_offset)
goto cleanup;

pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure
flags = sas_read2(&data[2], ctx->bswap);
pad = (flags & 0x08) ? 4 : 0; // might be 0x10, not sure
if (ctx->u64) {
label_count_capacity = sas_read4(&data[42+pad], ctx->bswap);
label_count_used = sas_read4(&data[50+pad], ctx->bswap);
label_count_capacity = sas_read8(&data[42+pad], ctx->bswap);
label_count_used = sas_read8(&data[50+pad], ctx->bswap);

payload_offset += 32;
} else {
Expand All @@ -169,7 +171,7 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size,
pad += 16;
}

if ((data[2] & 0x80) && !ctx->u64) { // has long name
if (((flags & 0x80) && !ctx->u64) || ((flags & 0x20) && ctx->u64)) { // has long name
if (data_size < payload_offset + pad + 32)
goto cleanup;

Expand All @@ -182,6 +184,9 @@ static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size,
if (data_size < payload_offset + pad)
goto cleanup;

if (label_count_used == 0)
goto cleanup;

if ((retval = sas7bcat_parse_value_labels(&data[payload_offset+pad], data_size - payload_offset - pad,
label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK)
goto cleanup;
Expand All @@ -200,15 +205,15 @@ static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sa
break;

if (xlsr[ctx->xlsr_O_offset] == 'O') {
uint32_t page = 0, pos = 0;
uint64_t page = 0, pos = 0;
if (ctx->u64) {
page = sas_read4(&xlsr[8], ctx->bswap);
pos = sas_read4(&xlsr[16], ctx->bswap);
page = sas_read8(&xlsr[8], ctx->bswap);
pos = sas_read2(&xlsr[16], ctx->bswap);
} else {
page = sas_read2(&xlsr[4], ctx->bswap);
page = sas_read4(&xlsr[4], ctx->bswap);
pos = sas_read2(&xlsr[8], ctx->bswap);
}
ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)page << 32) + pos;
ctx->block_pointers[ctx->block_pointers_used++] = (page << 32) + pos;
}

if (ctx->block_pointers_used == ctx->block_pointers_capacity) {
Expand Down
7 changes: 7 additions & 0 deletions src/sas/readstat_sas_rle.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ typedef SSIZE_T ssize_t;
#include "readstat_sas_rle.h"

#define SAS_RLE_COMMAND_COPY64 0
#define SAS_RLE_COMMAND_COPY64_PLUS_4096 1
#define SAS_RLE_COMMAND_COPY96 2
#define SAS_RLE_COMMAND_INSERT_BYTE18 4
#define SAS_RLE_COMMAND_INSERT_AT17 5
#define SAS_RLE_COMMAND_INSERT_BLANK17 6
Expand All @@ -29,6 +31,7 @@ typedef SSIZE_T ssize_t;

static size_t command_lengths[16] = {
[SAS_RLE_COMMAND_COPY64] = 1,
[SAS_RLE_COMMAND_COPY64_PLUS_4096] = 1,
[SAS_RLE_COMMAND_INSERT_BYTE18] = 2,
[SAS_RLE_COMMAND_INSERT_AT17] = 1,
[SAS_RLE_COMMAND_INSERT_BLANK17] = 1,
Expand Down Expand Up @@ -62,6 +65,10 @@ ssize_t sas_rle_decompress(void *output_buf, size_t output_len,
case SAS_RLE_COMMAND_COPY64:
copy_len = (*input++) + 64 + length * 256;
break;
case SAS_RLE_COMMAND_COPY64_PLUS_4096:
copy_len = (*input++) + 64 + length * 256 + 4096;
break;
case SAS_RLE_COMMAND_COPY96: copy_len = length + 96; break;
case SAS_RLE_COMMAND_INSERT_BYTE18:
insert_len = (*input++) + 18 + length * 256;
insert_byte = *input++;
Expand Down
2 changes: 1 addition & 1 deletion src/spss/readstat_por.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ ssize_t por_utf8_encode(const unsigned char *input, size_t input_len,
}
/* TODO - For some reason that replacement character isn't recognized
* by some systems, so be prepared to insert an ASCII space instead */
int printed = sprintf(output + offset, "%lc", codepoint);
int printed = snprintf(output + offset, output_len - offset, "%lc", codepoint);
if (printed > 0) {
offset += printed;
} else {
Expand Down
Loading

0 comments on commit fca7dfd

Please sign in to comment.