Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
Tmonster committed Dec 7, 2023
2 parents 6bf31e1 + 25906f3 commit c29eb0c
Show file tree
Hide file tree
Showing 67 changed files with 820 additions and 418 deletions.
2 changes: 2 additions & 0 deletions .github/config/out_of_tree_extensions.cmake
Expand Up @@ -11,6 +11,7 @@ if (NOT WIN32)
LOAD_TESTS DONT_LINK
GIT_URL https://github.com/duckdb/arrow
GIT_TAG 1b5b9649d28cd7f79496fb3f2e4dd7b03bf90ac5
APPLY_PATCHES
)
endif()

Expand Down Expand Up @@ -83,5 +84,6 @@ if (NOT WIN32)
LOAD_TESTS DONT_LINK
GIT_URL https://github.com/duckdb/substrait
GIT_TAG 5d621b1d7d16fe86f8b1930870c8e6bf05bcb92a
APPLY_PATCHES
)
endif()
20 changes: 20 additions & 0 deletions .github/patches/extensions/arrow/ubsan_fix.patch
@@ -0,0 +1,20 @@
diff --git a/duckdb b/duckdb
index 3196df7..9723c96 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 3196df79cecac38c867af084f19924c78dfc9d29
+Subproject commit 9723c96c284735e759138cedc5b00b282c2f2bec
diff --git a/src/arrow_scan_ipc.cpp b/src/arrow_scan_ipc.cpp
index e3bf858..5e45013 100644
--- a/src/arrow_scan_ipc.cpp
+++ b/src/arrow_scan_ipc.cpp
@@ -54,7 +54,7 @@ unique_ptr <FunctionData> ArrowIPCTableFunction::ArrowScanBind(ClientContext &co

// TODO Everything below this is identical to the bind in duckdb/src/function/table/arrow.cpp
auto &data = *res;
- stream_factory_get_schema(stream_factory_ptr, data.schema_root);
+ stream_factory_get_schema((ArrowArrayStream *) stream_factory_ptr, data.schema_root.arrow_schema);
for (idx_t col_idx = 0; col_idx < (idx_t) data.schema_root.arrow_schema.n_children; col_idx++) {
auto &schema = *data.schema_root.arrow_schema.children[col_idx];
if (!schema.release) {
15 changes: 15 additions & 0 deletions .github/patches/extensions/postgres_scanner/default_value.patch
@@ -0,0 +1,15 @@
diff --git a/src/storage/postgres_table_set.cpp b/src/storage/postgres_table_set.cpp
index 88786cf..bfd37ab 100644
--- a/src/storage/postgres_table_set.cpp
+++ b/src/storage/postgres_table_set.cpp
@@ -205,8 +205,8 @@ string PostgresColumnsToSQL(const ColumnList &columns, const vector<unique_ptr<C
}
if (column.Generated()) {
ss << " GENERATED ALWAYS AS(" << column.GeneratedExpression().ToString() << ")";
- } else if (column.DefaultValue()) {
- ss << " DEFAULT(" << column.DefaultValue()->ToString() << ")";
+ } else if (column.HasDefaultValue()) {
+ ss << " DEFAULT(" << column.DefaultValue().ToString() << ")";
}
}
// print any extra constraints that still need to be printed
18 changes: 18 additions & 0 deletions .github/patches/extensions/substrait/get_bind_info.patch
@@ -0,0 +1,18 @@
diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp
index 7ca777e..bac1f35 100644
--- a/src/to_substrait.cpp
+++ b/src/to_substrait.cpp
@@ -1058,11 +1058,11 @@ substrait::Rel *DuckDBToSubstrait::TransformGet(LogicalOperator &dop) {
substrait::Rel *rel = get_rel;
auto &dget = (LogicalGet &)dop;

- if (!dget.function.get_batch_info) {
+ if (!dget.function.get_bind_info) {
throw NotImplementedException("This Scanner Type can't be used in substrait because a get batch info "
"is not yet implemented");
}
- auto bind_info = dget.function.get_batch_info(dget.bind_data.get());
+ auto bind_info = dget.function.get_bind_info(dget.bind_data.get());
auto sget = get_rel->mutable_read();

if (!dget.table_filters.filters.empty()) {
11 changes: 8 additions & 3 deletions .github/workflows/Main.yml
Expand Up @@ -66,11 +66,16 @@ jobs:
shell: bash
run: make debug

- name: Set DUCKDB_INSTALL_LIB for ADBC tests
shell: bash
run: echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.so" | head -n 1)" >> $GITHUB_ENV

- name: Test DUCKDB_INSTALL_LIB variable
run: echo $DUCKDB_INSTALL_LIB

- name: Test
shell: bash
run: |
echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.so" | head -n 1)" >> $GITHUB_ENV
make unittestci
run: make unittestci


force-storage:
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/OSX.yml
Expand Up @@ -56,12 +56,17 @@ jobs:
shell: bash
run: GEN=ninja make debug

- name: Set DUCKDB_INSTALL_LIB for ADBC tests
shell: bash
run: echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.dylib" | head -n 1)" >> $GITHUB_ENV

- name: Test DUCKDB_INSTALL_LIB variable
run: echo $DUCKDB_INSTALL_LIB

- name: Test
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
shell: bash
run: |
echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.dylib" | head -n 1)" >> $GITHUB_ENV
make unittestci
run: make unittestci

- name: Amalgamation
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/Windows.yml
Expand Up @@ -66,11 +66,18 @@ jobs:
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_GENERATOR_PLATFORM=x64 -DENABLE_EXTENSION_AUTOLOADING=1 -DENABLE_EXTENSION_AUTOINSTALL=1 -DDUCKDB_EXTENSION_CONFIGS="${GITHUB_WORKSPACE}/.github/config/bundled_extensions.cmake" -DBUILD_ODBC_DRIVER=1 -DDISABLE_UNITY=1
cmake --build . --config Release
- name: Set DUCKDB_INSTALL_LIB for ADBC tests
shell: pwsh
run: echo "DUCKDB_INSTALL_LIB=$((Get-ChildItem -Recurse -Filter "duckdb.dll" | Select-Object -First 1).FullName)" >> $GITHUB_ENV

- name: Test DUCKDB_INSTALL_LIB variable
shell: bash
run: echo $DUCKDB_INSTALL_LIB

- name: Test
shell: bash
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |
echo "DUCKDB_INSTALL_LIB=D:\a\duckdb\duckdb\src\Release\duckdb.dll" >> $env:GITHUB_ENV
test/Release/unittest.exe
- name: Tools Test
Expand Down
4 changes: 2 additions & 2 deletions extension/parquet/parquet_extension.cpp
Expand Up @@ -149,7 +149,7 @@ struct ParquetWriteLocalState : public LocalFunctionData {
ColumnDataAppendState append_state;
};

BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
BindInfo ParquetGetBindInfo(const optional_ptr<FunctionData> bind_data) {
auto bind_info = BindInfo(ScanType::PARQUET);
auto &parquet_bind = bind_data->Cast<ParquetReadBindData>();
vector<Value> file_path;
Expand Down Expand Up @@ -317,7 +317,7 @@ class ParquetScanFunction {
table_function.get_batch_index = ParquetScanGetBatchIndex;
table_function.serialize = ParquetScanSerialize;
table_function.deserialize = ParquetScanDeserialize;
table_function.get_batch_info = ParquetGetBatchInfo;
table_function.get_bind_info = ParquetGetBindInfo;
table_function.projection_pushdown = true;
table_function.filter_pushdown = true;
table_function.filter_prune = true;
Expand Down
2 changes: 2 additions & 0 deletions scripts/exported_symbols_check.py
Expand Up @@ -38,6 +38,8 @@
'__udivti3',
'__popcount',
'Adbc',
'ErrorArrayStream',
'ErrorFromArrayStream',
]

for symbol in res.stdout.decode('utf-8').split('\n'):
Expand Down
8 changes: 8 additions & 0 deletions src/CMakeLists.txt
Expand Up @@ -10,6 +10,14 @@ if(NOT MSVC)
set(CMAKE_CXX_FLAGS_DEBUG
"${CMAKE_CXX_FLAGS_DEBUG} -Wextra -Wno-unused-parameter -Wno-redundant-move -Wimplicit-fallthrough"
)
if(CMAKE_COMPILER_IS_GNUCC)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6)
set(CMAKE_CXX_FLAGS_DEBUG
"${CMAKE_CXX_FLAGS_DEBUG} -Wimplicit-fallthrough")
endif()
else()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wimplicit-fallthrough")
endif()
endif()
set(EXIT_TIME_DESTRUCTORS_WARNING FALSE)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
Expand Down
4 changes: 2 additions & 2 deletions src/catalog/catalog_entry/table_catalog_entry.cpp
Expand Up @@ -136,8 +136,8 @@ string TableCatalogEntry::ColumnsToSQL(const ColumnList &columns, const vector<u
}
if (column.Generated()) {
ss << " GENERATED ALWAYS AS(" << column.GeneratedExpression().ToString() << ")";
} else if (column.DefaultValue()) {
ss << " DEFAULT(" << column.DefaultValue()->ToString() << ")";
} else if (column.HasDefaultValue()) {
ss << " DEFAULT(" << column.DefaultValue().ToString() << ")";
}
}
// print any extra constraints that still need to be printed
Expand Down
99 changes: 52 additions & 47 deletions src/common/adbc/adbc.cpp
Expand Up @@ -14,45 +14,46 @@
#include "duckdb/main/connection.hpp"
#endif

#include "duckdb/common/adbc/options.h"
#include "duckdb/common/adbc/single_batch_array_stream.hpp"
#include "duckdb/function/table/arrow.hpp"

#include "duckdb/common/adbc/options.h"
#include <string.h>
#include <stdlib.h>
#include <string.h>

// We must leak the symbols of the init function
duckdb_adbc::AdbcStatusCode duckdb_adbc_init(size_t count, struct duckdb_adbc::AdbcDriver *driver,
struct duckdb_adbc::AdbcError *error) {
AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *error) {
if (!driver) {
return ADBC_STATUS_INVALID_ARGUMENT;
}

driver->DatabaseNew = duckdb_adbc::DatabaseNew;
driver->DatabaseSetOption = duckdb_adbc::DatabaseSetOption;
driver->DatabaseInit = duckdb_adbc::DatabaseInit;
driver->DatabaseRelease = duckdb_adbc::DatabaseRelease;
driver->ConnectionNew = duckdb_adbc::ConnectionNew;
driver->ConnectionSetOption = duckdb_adbc::ConnectionSetOption;
driver->ConnectionInit = duckdb_adbc::ConnectionInit;
driver->ConnectionRelease = duckdb_adbc::ConnectionRelease;
driver->ConnectionGetTableTypes = duckdb_adbc::ConnectionGetTableTypes;
driver->StatementNew = duckdb_adbc::StatementNew;
driver->StatementRelease = duckdb_adbc::StatementRelease;
driver->StatementBind = duckdb_adbc::StatementBind;
driver->StatementBindStream = duckdb_adbc::StatementBindStream;
driver->StatementExecuteQuery = duckdb_adbc::StatementExecuteQuery;
driver->StatementPrepare = duckdb_adbc::StatementPrepare;
driver->StatementSetOption = duckdb_adbc::StatementSetOption;
driver->StatementSetSqlQuery = duckdb_adbc::StatementSetSqlQuery;
driver->ConnectionGetObjects = duckdb_adbc::ConnectionGetObjects;
driver->ConnectionCommit = duckdb_adbc::ConnectionCommit;
driver->ConnectionRollback = duckdb_adbc::ConnectionRollback;
driver->ConnectionReadPartition = duckdb_adbc::ConnectionReadPartition;
driver->StatementExecutePartitions = duckdb_adbc::StatementExecutePartitions;
driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo;
driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema;
driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema;
driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan;
auto adbc_driver = reinterpret_cast<AdbcDriver *>(driver);

adbc_driver->DatabaseNew = duckdb_adbc::DatabaseNew;
adbc_driver->DatabaseSetOption = duckdb_adbc::DatabaseSetOption;
adbc_driver->DatabaseInit = duckdb_adbc::DatabaseInit;
adbc_driver->DatabaseRelease = duckdb_adbc::DatabaseRelease;
adbc_driver->ConnectionNew = duckdb_adbc::ConnectionNew;
adbc_driver->ConnectionSetOption = duckdb_adbc::ConnectionSetOption;
adbc_driver->ConnectionInit = duckdb_adbc::ConnectionInit;
adbc_driver->ConnectionRelease = duckdb_adbc::ConnectionRelease;
adbc_driver->ConnectionGetTableTypes = duckdb_adbc::ConnectionGetTableTypes;
adbc_driver->StatementNew = duckdb_adbc::StatementNew;
adbc_driver->StatementRelease = duckdb_adbc::StatementRelease;
adbc_driver->StatementBind = duckdb_adbc::StatementBind;
adbc_driver->StatementBindStream = duckdb_adbc::StatementBindStream;
adbc_driver->StatementExecuteQuery = duckdb_adbc::StatementExecuteQuery;
adbc_driver->StatementPrepare = duckdb_adbc::StatementPrepare;
adbc_driver->StatementSetOption = duckdb_adbc::StatementSetOption;
adbc_driver->StatementSetSqlQuery = duckdb_adbc::StatementSetSqlQuery;
adbc_driver->ConnectionGetObjects = duckdb_adbc::ConnectionGetObjects;
adbc_driver->ConnectionCommit = duckdb_adbc::ConnectionCommit;
adbc_driver->ConnectionRollback = duckdb_adbc::ConnectionRollback;
adbc_driver->ConnectionReadPartition = duckdb_adbc::ConnectionReadPartition;
adbc_driver->StatementExecutePartitions = duckdb_adbc::StatementExecutePartitions;
adbc_driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo;
adbc_driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema;
adbc_driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema;
adbc_driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan;
return ADBC_STATUS_OK;
}

Expand All @@ -74,28 +75,31 @@ static AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct Ar

auto status = StatementNew(connection, &statement, error);
if (status != ADBC_STATUS_OK) {
StatementRelease(&statement, error);
SetError(error, "unable to initialize statement");
return status;
}
status = StatementSetSqlQuery(&statement, query, error);
if (status != ADBC_STATUS_OK) {
StatementRelease(&statement, error);
SetError(error, "unable to initialize statement");
return status;
}
status = StatementExecuteQuery(&statement, out, nullptr, error);
if (status != ADBC_STATUS_OK) {
StatementRelease(&statement, error);
SetError(error, "unable to initialize statement");
return status;
}

StatementRelease(&statement, error);
return ADBC_STATUS_OK;
}

struct DuckDBAdbcDatabaseWrapper {
//! The DuckDB Database Configuration
::duckdb_config config;
::duckdb_config config = nullptr;
//! The DuckDB Database
::duckdb_database database;
::duckdb_database database = nullptr;
//! Path of Disk-Based Database or :memory: database
std::string path;
};
Expand Down Expand Up @@ -124,7 +128,7 @@ AdbcStatusCode CheckResult(duckdb_state &res, AdbcError *error, const char *erro
return ADBC_STATUS_INVALID_ARGUMENT;
}
if (res != DuckDBSuccess) {
duckdb_adbc::SetError(error, error_msg);
SetError(error, error_msg);
return ADBC_STATUS_INTERNAL;
}
return ADBC_STATUS_OK;
Expand Down Expand Up @@ -197,14 +201,18 @@ AdbcStatusCode DatabaseInit(struct AdbcDatabase *database, struct AdbcError *err
return ADBC_STATUS_INVALID_ARGUMENT;
}
if (!database) {
duckdb_adbc::SetError(error, "ADBC Database has an invalid pointer");
SetError(error, "ADBC Database has an invalid pointer");
return ADBC_STATUS_INVALID_ARGUMENT;
}
char *errormsg;
char *errormsg = nullptr;
// TODO can we set the database path via option, too? Does not look like it...
auto wrapper = (DuckDBAdbcDatabaseWrapper *)database->private_data;
auto res = duckdb_open_ext(wrapper->path.c_str(), &wrapper->database, wrapper->config, &errormsg);
return CheckResult(res, error, errormsg);
auto adbc_result = CheckResult(res, error, errormsg);
if (errormsg) {
free(errormsg);
}
return adbc_result;
}

AdbcStatusCode DatabaseRelease(struct AdbcDatabase *database, struct AdbcError *error) {
Expand Down Expand Up @@ -548,20 +556,17 @@ const char *get_last_error(struct ArrowArrayStream *stream) {
// this is an evil hack, normally we would need a stream factory here, but its probably much easier if the adbc clients
// just hand over a stream

duckdb::unique_ptr<duckdb::ArrowArrayStreamWrapper>
stream_produce(uintptr_t factory_ptr,
std::pair<std::unordered_map<idx_t, std::string>, std::vector<std::string>> &project_columns,
duckdb::TableFilterSet *filters) {
duckdb::unique_ptr<duckdb::ArrowArrayStreamWrapper> stream_produce(uintptr_t factory_ptr,
duckdb::ArrowStreamParameters &parameters) {

// TODO this will ignore any projections or filters but since we don't expose the scan it should be sort of fine
auto res = duckdb::make_uniq<duckdb::ArrowArrayStreamWrapper>();
res->arrow_array_stream = *(ArrowArrayStream *)factory_ptr;
return res;
}

void stream_schema(uintptr_t factory_ptr, duckdb::ArrowSchemaWrapper &schema) {
auto stream = (ArrowArrayStream *)factory_ptr;
get_schema(stream, &schema.arrow_schema);
void stream_schema(ArrowArrayStream *stream, ArrowSchema &schema) {
stream->get_schema(stream, &schema);
}

AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, struct ArrowArrayStream *input,
Expand All @@ -584,7 +589,7 @@ AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, stru

auto arrow_scan = cconn->TableFunction("arrow_scan", {duckdb::Value::POINTER((uintptr_t)input),
duckdb::Value::POINTER((uintptr_t)stream_produce),
duckdb::Value::POINTER((uintptr_t)input->get_schema)});
duckdb::Value::POINTER((uintptr_t)stream_schema)});
try {
if (ingestion_mode == IngestionMode::CREATE) {
// We create the table based on an Arrow Scanner
Expand Down Expand Up @@ -699,7 +704,7 @@ AdbcStatusCode GetPreparedParameters(duckdb_connection connection, duckdb::uniqu
try {
auto arrow_scan = cconn->TableFunction("arrow_scan", {duckdb::Value::POINTER((uintptr_t)input),
duckdb::Value::POINTER((uintptr_t)stream_produce),
duckdb::Value::POINTER((uintptr_t)input->get_schema)});
duckdb::Value::POINTER((uintptr_t)stream_schema)});
result = arrow_scan->Execute();
// After creating a table, the arrow array stream is released. Hence we must set it as released to avoid
// double-releasing it
Expand Down

0 comments on commit c29eb0c

Please sign in to comment.