Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Fix boost runtime issue on Ubuntu16.04 with gcc 5.4 #92

Merged
merged 8 commits into from
Aug 7, 2019
2 changes: 1 addition & 1 deletion hpat/_meminfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct MemInfo {
typedef struct MemInfo NRT_MemInfo;


void nrt_debug_print(char *fmt, ...) {
void nrt_debug_print(const char *fmt, ...) {
va_list args;

va_start(args, fmt);
Expand Down
9 changes: 0 additions & 9 deletions hpat/_str_decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,8 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int* length, NRT_MemInfo** meminfo)
{
_C_UnicodeWriter writer;
const char *starts = s;
const char *end = s + size;

Py_ssize_t startinpos;
Py_ssize_t endinpos;
const char *errmsg = "";
*is_ascii = 0;

Expand Down Expand Up @@ -362,13 +359,9 @@ void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int*
if (s == end)
goto End;
errmsg = "unexpected end of data";
startinpos = s - starts;
endinpos = end - starts;
break;
case 1:
errmsg = "invalid start byte";
startinpos = s - starts;
endinpos = startinpos + 1;
break;
case 2:
case 3:
Expand All @@ -377,8 +370,6 @@ void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int*
goto End;
}
errmsg = "invalid continuation byte";
startinpos = s - starts;
endinpos = startinpos + ch - 1;
break;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How close is this code to the original CPython version?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about CPython but these variables used nowhere.

default:
if (_C_UnicodeWriter_WriteCharInline(&writer, ch) < 0)
Expand Down
10 changes: 2 additions & 8 deletions hpat/_str_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,9 @@

#include "_str_decode.cpp"

#ifdef USE_BOOST_REGEX
#include <boost/regex.hpp>
using boost::regex;
using boost::regex_search;
#else
#include <regex>
using std::regex;
using std::regex_search;
#endif

#include <boost/lexical_cast.hpp>

Expand Down Expand Up @@ -296,8 +290,8 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
index_offsets[0] = 0;
// uint32_t curr_data_off = 0;

int data_ind = offsets[0];
int str_ind = 0;
uint32_t data_ind = offsets[0];
int64_t str_ind = 0;
// while there are chars to consume, equal since the first if will consume it
while (data_ind <= total_chars)
{
Expand Down
10 changes: 5 additions & 5 deletions hpat/io/_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,14 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel
std::vector<size_t> line_offset = count_lines(f, hpat_dist_get_node_portion(fsz, nranks, rank));
size_t no_lines = line_offset.size();
// get total number of lines using allreduce
size_t tot_no_lines(0);
int64_t tot_no_lines = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code readability and consistence. I think this is more common way to initialize variables. From the compiler point of view, in this particular place, they are equal.


hpat_dist_reduce(reinterpret_cast<char *>(&no_lines), reinterpret_cast<char *>(&tot_no_lines), HPAT_ReduceOps::SUM, HPAT_CTypes::UINT64);

// Now we need to communicate the distribution as we really want it
// First determine which is our first line (which is the sum of previous lines)
size_t byte_first_line = hpat_dist_exscan_i8(no_lines);
size_t byte_last_line = byte_first_line + no_lines;
int64_t byte_first_line = hpat_dist_exscan_i8(no_lines);
int64_t byte_last_line = byte_first_line + no_lines;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do change to signed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because later it participated in comparison with signed input argument. In this case tne compiler show the warning.


// We now determine the chunks of lines that begin and end in our byte-chunk

Expand Down Expand Up @@ -351,8 +351,8 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel

// We iterate through chunk boundaries (defined by line-numbers)
// we start with boundary 1 as 0 is the beginning of file
for(int i=1; i<nranks; ++i) {
size_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
for(size_t i=1; i<nranks; ++i) {
int64_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
// Note our line_offsets mark the end of each line!
// we check if boundary is on our byte-chunk
if(i_bndry > byte_first_line && i_bndry <= byte_last_line) {
Expand Down
5 changes: 2 additions & 3 deletions hpat/stringlib/codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ STRINGLIB(utf8_encoder)(char* out_data,

p = (char*)_C_BytesWriter_Alloc(&writer, size * max_char_size);
if (p == NULL)
return NULL;
return 0;

for (i = 0; i < size;) {
Py_UCS4 ch = data[i++];
Expand All @@ -304,8 +304,7 @@ STRINGLIB(utf8_encoder)(char* out_data,
}
#if STRINGLIB_SIZEOF_CHAR > 1
else if (Py_UNICODE_IS_SURROGATE(ch)) {
Py_ssize_t startpos, endpos, newpos;
Py_ssize_t k;
Py_ssize_t startpos, endpos;

startpos = i-1;
endpos = startpos+1;
Expand Down
2 changes: 1 addition & 1 deletion parquet_reader/hpat_parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ int pq_read_string_parallel_single_file(std::shared_ptr<FileReader> arrow_reader
}
std::shared_ptr< ::arrow::Array > arr = chunked_arr->chunk(0);
// std::cout << arr->ToString() << std::endl;
int64_t num_values = arr->length();

auto buffers = arr->data()->buffers;
// std::cout<<"num buffs: "<< buffers.size()<<std::endl;
if (buffers.size()!=3) {
Expand Down