Merge e4ea748 into 3e879ed

IntelPython · Jul 15, 2019 · 6c3cc46 · 6c3cc46
2 parents 3e879ed + e4ea748
commit 6c3cc46
Show file tree

Hide file tree

Showing 6 changed files with 11 additions and 27 deletions.
diff --git a/hpat/_meminfo.h b/hpat/_meminfo.h
@@ -43,7 +43,7 @@ struct MemInfo {
 typedef struct MemInfo NRT_MemInfo;
 
 
-void nrt_debug_print(char *fmt, ...) {
+void nrt_debug_print(const char *fmt, ...) {
    va_list args;
 
    va_start(args, fmt);

diff --git a/hpat/_str_decode.cpp b/hpat/_str_decode.cpp
@@ -305,11 +305,8 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
 void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int* length, NRT_MemInfo** meminfo)
 {
     _C_UnicodeWriter writer;
-    const char *starts = s;
     const char *end = s + size;
 
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
     const char *errmsg = "";
     *is_ascii = 0;
 
@@ -362,13 +359,9 @@ void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int*
             if (s == end)
                 goto End;
             errmsg = "unexpected end of data";
-            startinpos = s - starts;
-            endinpos = end - starts;
             break;
         case 1:
             errmsg = "invalid start byte";
-            startinpos = s - starts;
-            endinpos = startinpos + 1;
             break;
         case 2:
         case 3:
@@ -377,8 +370,6 @@ void decode_utf8(const char *s, Py_ssize_t size, int* kind, int *is_ascii, int*
                 goto End;
             }
             errmsg = "invalid continuation byte";
-            startinpos = s - starts;
-            endinpos = startinpos + ch - 1;
             break;
         default:
             if (_C_UnicodeWriter_WriteCharInline(&writer, ch) < 0)

diff --git a/hpat/_str_ext.cpp b/hpat/_str_ext.cpp
@@ -8,15 +8,9 @@
 
 #include "_str_decode.cpp"
 
-#ifdef USE_BOOST_REGEX
-#include <boost/regex.hpp>
-using boost::regex;
-using boost::regex_search;
-#else
 #include <regex>
 using std::regex;
 using std::regex_search;
-#endif
 
 #include <boost/lexical_cast.hpp>
 
@@ -296,8 +290,8 @@ void str_arr_split_view_impl(str_arr_split_view_payload* out_view, int64_t n_str
     index_offsets[0] = 0;
     // uint32_t curr_data_off = 0;
 
-    int data_ind = offsets[0];
-    int str_ind = 0;
+    uint32_t data_ind = offsets[0];
+    int64_t str_ind = 0;
     // while there are chars to consume, equal since the first if will consume it
     while (data_ind <= total_chars)
     {

diff --git a/hpat/io/_csv.cpp b/hpat/io/_csv.cpp
@@ -309,14 +309,14 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel
         std::vector<size_t> line_offset = count_lines(f, hpat_dist_get_node_portion(fsz, nranks, rank));
         size_t no_lines = line_offset.size();
         // get total number of lines using allreduce
-        size_t tot_no_lines(0);
+        int64_t tot_no_lines = 0;
 
         hpat_dist_reduce(reinterpret_cast<char *>(&no_lines), reinterpret_cast<char *>(&tot_no_lines), HPAT_ReduceOps::SUM, HPAT_CTypes::UINT64);
 
         // Now we need to communicate the distribution as we really want it
         // First determine which is our first line (which is the sum of previous lines)
-        size_t byte_first_line = hpat_dist_exscan_i8(no_lines);
-        size_t byte_last_line = byte_first_line + no_lines;
+        int64_t byte_first_line = hpat_dist_exscan_i8(no_lines);
+        int64_t byte_last_line = byte_first_line + no_lines;
 
         // We now determine the chunks of lines that begin and end in our byte-chunk
 
@@ -351,8 +351,8 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel
 
         // We iterate through chunk boundaries (defined by line-numbers)
         // we start with boundary 1 as 0 is the beginning of file
-        for(int i=1; i<nranks; ++i) {
-            size_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
+        for(size_t i=1; i<nranks; ++i) {
+            int64_t i_bndry = skiprows + hpat_dist_get_start(n_lines_to_read, (int)nranks, i);
             // Note our line_offsets mark the end of each line!
             // we check if boundary is on our byte-chunk
             if(i_bndry > byte_first_line && i_bndry <= byte_last_line) {

diff --git a/hpat/stringlib/codecs.h b/hpat/stringlib/codecs.h
@@ -283,7 +283,7 @@ STRINGLIB(utf8_encoder)(char* out_data,
 
     p = (char*)_C_BytesWriter_Alloc(&writer, size * max_char_size);
     if (p == NULL)
-        return NULL;
+        return 0;
 
     for (i = 0; i < size;) {
         Py_UCS4 ch = data[i++];
@@ -304,8 +304,7 @@ STRINGLIB(utf8_encoder)(char* out_data,
         }
 #if STRINGLIB_SIZEOF_CHAR > 1
         else if (Py_UNICODE_IS_SURROGATE(ch)) {
-            Py_ssize_t startpos, endpos, newpos;
-            Py_ssize_t k;
+            Py_ssize_t startpos, endpos;
 
             startpos = i-1;
             endpos = startpos+1;

diff --git a/parquet_reader/hpat_parquet_reader.cpp b/parquet_reader/hpat_parquet_reader.cpp
@@ -449,7 +449,7 @@ int pq_read_string_parallel_single_file(std::shared_ptr<FileReader> arrow_reader
         }
         std::shared_ptr< ::arrow::Array > arr = chunked_arr->chunk(0);
         // std::cout << arr->ToString() << std::endl;
-        int64_t num_values = arr->length();
+
         auto buffers = arr->data()->buffers;
         // std::cout<<"num buffs: "<< buffers.size()<<std::endl;
         if (buffers.size()!=3) {