Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge 2943a6b into 99c5290
Browse files Browse the repository at this point in the history
  • Loading branch information
shssf committed Aug 7, 2019
2 parents 99c5290 + 2943a6b commit e37e531
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 25 deletions.
2 changes: 1 addition & 1 deletion hpat/_meminfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct MemInfo

typedef struct MemInfo NRT_MemInfo;

void nrt_debug_print(char* fmt, ...)
void nrt_debug_print(const char* fmt, ...)
{
va_list args;

Expand Down
9 changes: 0 additions & 9 deletions hpat/_str_decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,8 @@ static Py_ssize_t ascii_decode(const char* start, const char* end, Py_UCS1* dest
void decode_utf8(const char* s, Py_ssize_t size, int* kind, int* is_ascii, int* length, NRT_MemInfo** meminfo)
{
_C_UnicodeWriter writer;
const char* starts = s;
const char* end = s + size;

Py_ssize_t startinpos;
Py_ssize_t endinpos;
const char* errmsg = "";
*is_ascii = 0;

Expand Down Expand Up @@ -395,13 +392,9 @@ void decode_utf8(const char* s, Py_ssize_t size, int* kind, int* is_ascii, int*
if (s == end)
goto End;
errmsg = "unexpected end of data";
startinpos = s - starts;
endinpos = end - starts;
break;
case 1:
errmsg = "invalid start byte";
startinpos = s - starts;
endinpos = startinpos + 1;
break;
case 2:
case 3:
Expand All @@ -411,8 +404,6 @@ void decode_utf8(const char* s, Py_ssize_t size, int* kind, int* is_ascii, int*
goto End;
}
errmsg = "invalid continuation byte";
startinpos = s - starts;
endinpos = startinpos + ch - 1;
break;
default:
if (_C_UnicodeWriter_WriteCharInline(&writer, ch) < 0)
Expand Down
6 changes: 0 additions & 6 deletions hpat/_str_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,9 @@

#include "_str_decode.cpp"

#ifdef USE_BOOST_REGEX
#include <boost/regex.hpp>
using boost::regex;
using boost::regex_search;
#else
#include <regex>
using std::regex;
using std::regex_search;
#endif

#include <boost/lexical_cast.hpp>

Expand Down
10 changes: 5 additions & 5 deletions hpat/io/_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,14 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel
std::vector<size_t> line_offset = count_lines(f, hpat_dist_get_node_portion(fsz, nranks, rank));
size_t no_lines = line_offset.size();
// get total number of lines using allreduce
size_t tot_no_lines(0);
int64_t tot_no_lines = 0;

hpat_dist_reduce(reinterpret_cast<char *>(&no_lines), reinterpret_cast<char *>(&tot_no_lines), HPAT_ReduceOps::SUM, HPAT_CTypes::UINT64);

// Now we need to communicate the distribution as we really want it
// First determine which is our first line (which is the sum of previous lines)
size_t byte_first_line = hpat_dist_exscan_i8(no_lines);
size_t byte_last_line = byte_first_line + no_lines;
int64_t byte_first_line = hpat_dist_exscan_i8(no_lines);
int64_t byte_last_line = byte_first_line + no_lines;

// We now determine the chunks of lines that begin and end in our byte-chunk

Expand Down Expand Up @@ -351,8 +351,8 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel

// We iterate through chunk boundaries (defined by line-numbers)
// we start with boundary 1 as 0 is the beginning of file
for(int i=1; i<nranks; ++i) {
size_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
for(size_t i=1; i<nranks; ++i) {
int64_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
// Note our line_offsets mark the end of each line!
// we check if boundary is on our byte-chunk
if(i_bndry > byte_first_line && i_bndry <= byte_last_line) {
Expand Down
5 changes: 2 additions & 3 deletions hpat/stringlib/codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ int64_t STRINGLIB(utf8_encoder)(char* out_data, STRINGLIB_CHAR* data, Py_ssize_t

p = (char*)_C_BytesWriter_Alloc(&writer, size * max_char_size);
if (p == NULL)
return NULL;
return 0;

for (i = 0; i < size;)
{
Expand All @@ -316,8 +316,7 @@ int64_t STRINGLIB(utf8_encoder)(char* out_data, STRINGLIB_CHAR* data, Py_ssize_t
#if STRINGLIB_SIZEOF_CHAR > 1
else if (Py_UNICODE_IS_SURROGATE(ch))
{
Py_ssize_t startpos, endpos, newpos;
Py_ssize_t k;
Py_ssize_t startpos, endpos;

startpos = i - 1;
endpos = startpos + 1;
Expand Down
2 changes: 1 addition & 1 deletion parquet_reader/hpat_parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ int pq_read_string_parallel_single_file(std::shared_ptr<FileReader> arrow_reader
}
std::shared_ptr<::arrow::Array> arr = chunked_arr->chunk(0);
// std::cout << arr->ToString() << std::endl;
int64_t num_values = arr->length();

auto buffers = arr->data()->buffers;
// std::cout<<"num buffs: "<< buffers.size()<<std::endl;
if (buffers.size() != 3)
Expand Down

0 comments on commit e37e531

Please sign in to comment.