Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge 6d1154f into 95a7bc4
Browse files Browse the repository at this point in the history
  • Loading branch information
shssf authored Jul 31, 2019
2 parents 95a7bc4 + 6d1154f commit e9136ca
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 27 deletions.
2 changes: 1 addition & 1 deletion hpat/_meminfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct MemInfo {
typedef struct MemInfo NRT_MemInfo;


void nrt_debug_print(char *fmt, ...) {
void nrt_debug_print(const char *fmt, ...) {
va_list args;

va_start(args, fmt);
Expand Down
9 changes: 0 additions & 9 deletions hpat/_str_decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,8 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int* length, NRT_MemInfo** meminfo)
{
_C_UnicodeWriter writer;
const char *starts = s;
const char *end = s + size;

Py_ssize_t startinpos;
Py_ssize_t endinpos;
const char *errmsg = "";
*is_ascii = 0;

Expand Down Expand Up @@ -362,13 +359,9 @@ void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int*
if (s == end)
goto End;
errmsg = "unexpected end of data";
startinpos = s - starts;
endinpos = end - starts;
break;
case 1:
errmsg = "invalid start byte";
startinpos = s - starts;
endinpos = startinpos + 1;
break;
case 2:
case 3:
Expand All @@ -377,8 +370,6 @@ void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int*
goto End;
}
errmsg = "invalid continuation byte";
startinpos = s - starts;
endinpos = startinpos + ch - 1;
break;
default:
if (_C_UnicodeWriter_WriteCharInline(&writer, ch) < 0)
Expand Down
10 changes: 2 additions & 8 deletions hpat/_str_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,9 @@

#include "_str_decode.cpp"

#ifdef USE_BOOST_REGEX
#include <boost/regex.hpp>
using boost::regex;
using boost::regex_search;
#else
#include <regex>
using std::regex;
using std::regex_search;
#endif

#include <boost/lexical_cast.hpp>

Expand Down Expand Up @@ -296,8 +290,8 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
index_offsets[0] = 0;
// uint32_t curr_data_off = 0;

int data_ind = offsets[0];
int str_ind = 0;
uint32_t data_ind = offsets[0];
int64_t str_ind = 0;
// while there are chars to consume, equal since the first if will consume it
while (data_ind <= total_chars)
{
Expand Down
10 changes: 5 additions & 5 deletions hpat/io/_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,14 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel
std::vector<size_t> line_offset = count_lines(f, hpat_dist_get_node_portion(fsz, nranks, rank));
size_t no_lines = line_offset.size();
// get total number of lines using allreduce
size_t tot_no_lines(0);
int64_t tot_no_lines = 0;

hpat_dist_reduce(reinterpret_cast<char *>(&no_lines), reinterpret_cast<char *>(&tot_no_lines), HPAT_ReduceOps::SUM, HPAT_CTypes::UINT64);

// Now we need to communicate the distribution as we really want it
// First determine which is our first line (which is the sum of previous lines)
size_t byte_first_line = hpat_dist_exscan_i8(no_lines);
size_t byte_last_line = byte_first_line + no_lines;
int64_t byte_first_line = hpat_dist_exscan_i8(no_lines);
int64_t byte_last_line = byte_first_line + no_lines;

// We now determine the chunks of lines that begin and end in our byte-chunk

Expand Down Expand Up @@ -351,8 +351,8 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel

// We iterate through chunk boundaries (defined by line-numbers)
// we start with boundary 1 as 0 is the beginning of file
for(int i=1; i<nranks; ++i) {
size_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
for(size_t i=1; i<nranks; ++i) {
int64_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
// Note our line_offsets mark the end of each line!
// we check if boundary is on our byte-chunk
if(i_bndry > byte_first_line && i_bndry <= byte_last_line) {
Expand Down
5 changes: 2 additions & 3 deletions hpat/stringlib/codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ STRINGLIB(utf8_encoder)(char* out_data,

p = (char*)_C_BytesWriter_Alloc(&writer, size * max_char_size);
if (p == NULL)
return NULL;
return 0;

for (i = 0; i < size;) {
Py_UCS4 ch = data[i++];
Expand All @@ -304,8 +304,7 @@ STRINGLIB(utf8_encoder)(char* out_data,
}
#if STRINGLIB_SIZEOF_CHAR > 1
else if (Py_UNICODE_IS_SURROGATE(ch)) {
Py_ssize_t startpos, endpos, newpos;
Py_ssize_t k;
Py_ssize_t startpos, endpos;

startpos = i-1;
endpos = startpos+1;
Expand Down
2 changes: 1 addition & 1 deletion parquet_reader/hpat_parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ int pq_read_string_parallel_single_file(std::shared_ptr<FileReader> arrow_reader
}
std::shared_ptr< ::arrow::Array > arr = chunked_arr->chunk(0);
// std::cout << arr->ToString() << std::endl;
int64_t num_values = arr->length();

auto buffers = arr->data()->buffers;
// std::cout<<"num buffs: "<< buffers.size()<<std::endl;
if (buffers.size()!=3) {
Expand Down

0 comments on commit e9136ca

Please sign in to comment.