diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..ea28923fc --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 2023 Molecula Corp. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/Makefile b/Makefile index 8451ce00a..2f76cb660 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ .PHONY: build clean build-lattice cover cover-viz default docker docker-build docker-tag-push generate generate-protoc generate-pql generate-statik generate-stringer install install-protoc-gen-gofast install-protoc install-statik install-peg test docker-login +SHELL := /bin/bash VERSION := $(shell git describe --tags 2> /dev/null || echo unknown) VARIANT = Molecula GO=go @@ -76,6 +77,16 @@ testvsub: echo; echo "999 done testing subpkg $$pkg"; \ done +# make a 2GB RAMDisk. Speed up tests by running them with RAMDISK=/mnt/ramdisk +ramdisk-linux: + mount -o size=2G -t tmpfs none /mnt/ramdisk + +# make a 2GB RAMDisk. Speed up tests by running them with RAMDISK=/Volumes/RAMDisk +ramdisk-osx: + diskutil erasevolume HFS+ 'RAMDisk' `hdiutil attach -nobrowse -nomount ram://4194304` + +detach-ramdisk-osx: + hdiutil detach /Volumes/RAMDisk testvsub-race: @set -e; for pkg in $(GOPACKAGES); do \ @@ -228,15 +239,16 @@ build-for-quick: docker-image-featurebase-quick: build-for-quick docker build \ --build-arg GO_VERSION=$(GO_VERSION) \ - --file Dockerfile-dax-quick ./.quick/ + --file Dockerfile-dax-quick \ + --tag dax/featurebase ./.quick/ docker-image-datagen: vendor docker build --tag dax/datagen --file Dockerfile-datagen . ecr-push-featurebase: docker-login - docker tag dax/featurebase:latest $(AWS_ACCOUNTID).dkr.ecr.us-east-2.amazonaws.com/dax:latest - docker push $(AWS_ACCOUNTID).dkr.ecr.us-east-2.amazonaws.com/dax:latest + docker tag dax/featurebase:latest $(AWS_ACCOUNTID).dkr.ecr.us-east-2.amazonaws.com/dax/featurebase:latest + docker push $(AWS_ACCOUNTID).dkr.ecr.us-east-2.amazonaws.com/dax/featurebase:latest ecr-push-datagen: docker-login docker tag dax/datagen:latest $(AWS_ACCOUNTID).dkr.ecr.us-east-2.amazonaws.com/dax/datagen:latest diff --git a/api.go b/api.go index 27254dc47..be059e4f0 100644 --- a/api.go +++ b/api.go @@ -25,6 +25,8 @@ import ( fbcontext "github.com/featurebasedb/featurebase/v3/context" "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/dax/computer" + "github.com/featurebasedb/featurebase/v3/dax/storage" + "github.com/featurebasedb/featurebase/v3/logger" "github.com/featurebasedb/featurebase/v3/disco" "github.com/featurebasedb/featurebase/v3/rbf" @@ -55,9 +57,7 @@ type API struct { Serializer Serializer - writeLogReader computer.WriteLogReader - writeLogWriter computer.WriteLogWriter - snapshotReadWriter computer.SnapshotReadWriter + serverlessStorage *storage.ResourceManager directiveWorkerPoolSize int @@ -70,6 +70,10 @@ func (api *API) Holder() *Holder { return api.holder } +func (api *API) logger() logger.Logger { + return api.server.logger +} + // apiOption is a functional option type for pilosa.API type apiOption func(*API) error @@ -83,30 +87,16 @@ func OptAPIServer(s *Server) apiOption { } } -func OptAPIImportWorkerPoolSize(size int) apiOption { +func OptAPIServerlessStorage(mm *storage.ResourceManager) apiOption { return func(a *API) error { - a.importWorkerPoolSize = size + a.serverlessStorage = mm return nil } } -func OptAPIWriteLogReader(wlr computer.WriteLogReader) apiOption { - return func(a *API) error { - a.writeLogReader = wlr - return nil - } -} - -func OptAPIWriteLogWriter(wlw computer.WriteLogWriter) apiOption { - return func(a *API) error { - a.writeLogWriter = wlw - return nil - } -} - -func OptAPISnapshotter(snap computer.SnapshotReadWriter) apiOption { +func OptAPIImportWorkerPoolSize(size int) apiOption { return func(a *API) error { - a.snapshotReadWriter = snap + a.importWorkerPoolSize = size return nil } } @@ -129,9 +119,6 @@ func OptAPIIsComputeNode(is bool) apiOption { func NewAPI(opts ...apiOption) (*API, error) { api := &API{ importWorkerPoolSize: 2, - writeLogReader: computer.NewNopWriteLogReader(), - writeLogWriter: computer.NewNopWriteLogWriter(), - snapshotReadWriter: computer.NewNopSnapshotReadWriter(), directiveWorkerPoolSize: 2, } @@ -250,7 +237,7 @@ func (api *API) query(ctx context.Context, req *QueryRequest) (QueryResponse, er EmbeddedData: req.EmbeddedData, // precomputed values that needed to be passed with the request MaxMemory: req.MaxMemory, } - resp, err := api.server.executor.Execute(ctx, req.Index, q, req.Shards, execOpts) + resp, err := api.server.executor.Execute(ctx, dax.StringTableKeyer(req.Index), q, req.Shards, execOpts) if err != nil { return QueryResponse{}, errors.Wrap(err, "executing") } @@ -709,20 +696,20 @@ func (api *API) ImportRoaring(ctx context.Context, indexName, fieldName string, Views: req.Views, } - // Get the current version for shard. - version, err := api.getOrCreateShardVersion(ctx, indexName, shard) - if err != nil { - return errors.Wrap(err, "get or creating shard version") - } - tkey := dax.TableKey(indexName) qtid := tkey.QualifiedTableID() partitionNum := dax.PartitionNum(partition) shardNum := dax.ShardNum(shard) - api.server.logger.Debugf("importroaring writing to writelogger: %+v, %[1]T len(msg.Views): %d, table: %s", api.writeLogWriter, len(msg.Views), msg.Table) - if err := api.writeLogWriter.WriteShard(ctx, qtid, partitionNum, shardNum, version, msg); err != nil { - return err + b, err := computer.MarshalLogMessage(msg, computer.EncodeTypeJSON) + if err != nil { + return errors.Wrap(err, "marshalling log message") + } + + resource := api.serverlessStorage.GetShardResource(qtid, partitionNum, shardNum) + err = resource.Append(b) + if err != nil { + return errors.Wrap(err, "appending shard data") // TODO do we need to set err0 or something? } } @@ -731,29 +718,6 @@ func (api *API) ImportRoaring(ctx context.Context, indexName, fieldName string, } } -func (api *API) getOrCreateShardVersion(ctx context.Context, indexName string, shard uint64) (int, error) { - tableName := dax.TableName(indexName) - shardNum := dax.ShardNum(shard) - - // Here we assume that indexName is the string encoding of QualifiedTableID. - qtid, err := dax.QualifiedTableIDFromKey(indexName) - if err != nil { - return -1, errors.Wrap(err, "decoding qtid from key (indexName)") - } - - version, found, err := api.holder.versionStore.ShardVersion(ctx, qtid, shardNum) - if err != nil { - return -1, errors.Wrap(err, "getting shard version") - } else if !found { - version = 0 - api.server.logger.Printf("could not find version for shard: %s, %d, so creating 0", tableName, shardNum) - if err := api.holder.versionStore.AddShards(ctx, qtid, dax.NewVersionedShard(shardNum, version)); err != nil { - return -1, errors.Wrap(err, "adding shard 0") - } - } - return version, nil -} - // DeleteField removes the named field from the named index. If the index is not // found, an error is returned. If the field is not found, it is ignored and no // action is taken. @@ -982,7 +946,7 @@ func (r RedirectError) Error() string { } // TranslateData returns all translation data in the specified partition. -func (api *API) TranslateData(ctx context.Context, indexName string, partition int) (io.WriterTo, error) { +func (api *API) TranslateData(ctx context.Context, indexName string, partition int) (TranslateStore, error) { span, _ := tracing.StartSpanFromContext(ctx, "API.TranslateData") defer span.Finish() @@ -1037,7 +1001,7 @@ func (api *API) TranslateData(ctx context.Context, indexName string, partition i } // FieldTranslateData returns all translation data in the specified field. -func (api *API) FieldTranslateData(ctx context.Context, indexName, fieldName string) (io.WriterTo, error) { +func (api *API) FieldTranslateData(ctx context.Context, indexName, fieldName string) (TranslateStore, error) { span, _ := tracing.StartSpanFromContext(ctx, "API.FieldTranslateData") defer span.Finish() if err := api.validate(apiFieldTranslateData); err != nil { @@ -1286,8 +1250,13 @@ func (api *API) DeleteView(ctx context.Context, indexName string, fieldName stri return errors.Wrap(err, "sending DeleteView message") } -// IndexShardSnapshot returns a reader that contains the contents of an RBF snapshot for an index/shard. -func (api *API) IndexShardSnapshot(ctx context.Context, indexName string, shard uint64) (io.ReadCloser, error) { +// IndexShardSnapshot returns a reader that contains the contents of +// an RBF snapshot for an index/shard. When snapshotting for +// serverless, we need to be able to transactionally move the write +// log to the new version, so we expose writeTx to allow the caller to +// request a write transaction for the snapshot even though we'll just +// be reading inside RBF. +func (api *API) IndexShardSnapshot(ctx context.Context, indexName string, shard uint64, writeTx bool) (io.ReadCloser, error) { span, _ := tracing.StartSpanFromContext(ctx, "API.IndexShardSnapshot") defer span.Finish() @@ -1298,7 +1267,7 @@ func (api *API) IndexShardSnapshot(ctx context.Context, indexName string, shard } // Start transaction. - tx := index.holder.txf.NewTx(Txo{Index: index, Shard: shard}) + tx := index.holder.txf.NewTx(Txo{Index: index, Shard: shard, Write: writeTx}) // Ensure transaction is an RBF transaction. rtx, ok := tx.(*RBFTx) @@ -1517,20 +1486,20 @@ func (api *API) Import(ctx context.Context, qcx *Qcx, req *ImportRequest, opts . } if api.isComputeNode && !options.suppressLog { - // Get the current version for shard. - version, err := api.getOrCreateShardVersion(ctx, req.Index, req.Shard) - if err != nil { - return errors.Wrap(err, "get or creating shard version") - } - tkey := dax.TableKey(req.Index) qtid := tkey.QualifiedTableID() partitionNum := dax.PartitionNum(partition) shardNum := dax.ShardNum(req.Shard) - // Write the request to the write logger. - if err := api.writeLogWriter.WriteShard(ctx, qtid, partitionNum, shardNum, version, msg); err != nil { - return err + b, err := computer.MarshalLogMessage(msg, computer.EncodeTypeJSON) + if err != nil { + return errors.Wrap(err, "marshalling log message") + } + + resource := api.serverlessStorage.GetShardResource(qtid, partitionNum, shardNum) + err = resource.Append(b) + if err != nil { + return errors.Wrap(err, "appending shard data") // TODO do we need to set err0 or something? } } @@ -1765,21 +1734,21 @@ func (api *API) ImportRoaringShard(ctx context.Context, indexName string, shard ClearRecords: view.ClearRecords, } } - // Get the current version for shard. - version, err := api.getOrCreateShardVersion(ctx, indexName, shard) - if err != nil { - err1 = errors.Wrap(err, "get or creating shard version") - return err1 - } + tkey := dax.TableKey(indexName) qtid := tkey.QualifiedTableID() partitionNum := dax.PartitionNum(partition) shardNum := dax.ShardNum(shard) - api.server.logger.Debugf("importroaringshard writing shard to writelogger: %+v, len(msg.Views): %d, table: %s", api.writeLogWriter, len(msg.Views), msg.Table) + b, err := computer.MarshalLogMessage(msg, computer.EncodeTypeJSON) + if err != nil { + err1 = errors.Wrap(err, "marshalling log message") + return err1 + } - if err := api.writeLogWriter.WriteShard(ctx, qtid, partitionNum, shardNum, version, msg); err != nil { - err1 = errors.Wrap(err, "writing import-roaring-shard to writelogger") + resource := api.serverlessStorage.GetShardResource(qtid, partitionNum, shardNum) + err1 = errors.Wrap(resource.Append(b), "appending shard data") + if err1 != nil { return err1 } } @@ -1860,20 +1829,21 @@ func (api *API) ImportValue(ctx context.Context, qcx *Qcx, req *ImportValueReque if api.isComputeNode && !options.suppressLog { // Get the current version for shard. - version, err := api.getOrCreateShardVersion(ctx, req.Index, req.Shard) - if err != nil { - return errors.Wrap(err, "get or creating shard version") - } - tkey := dax.TableKey(req.Index) qtid := tkey.QualifiedTableID() partitionNum := dax.PartitionNum(partition) shardNum := dax.ShardNum(req.Shard) + b, err := computer.MarshalLogMessage(msg, computer.EncodeTypeJSON) + if err != nil { + return errors.Wrap(err, "marshalling log message") + } - // Write the request to the write logger. - if err := api.writeLogWriter.WriteShard(ctx, qtid, partitionNum, shardNum, version, msg); err != nil { - return errors.Wrap(err, "writing shard to write logger") + resource := api.serverlessStorage.GetShardResource(qtid, partitionNum, shardNum) + err = resource.Append(b) + if err != nil { + return errors.Wrap(err, "appending shard data") // TODO do we need to set err0 or something? } + } return nil @@ -3107,52 +3077,43 @@ func (api *API) DirectiveApplied(ctx context.Context) (bool, error) { // SnapshotShardData triggers the node to perform a shard snapshot based on the // provided SnapshotShardDataRequest. func (api *API) SnapshotShardData(ctx context.Context, req *dax.SnapshotShardDataRequest) error { - qtid := req.TableKey.QualifiedTableID() - - // Confirm that this node is currently responsible for table/shard/fromVersion. - var version int - if v, ok, err := api.holder.versionStore.ShardVersion(ctx, qtid, req.ShardNum); err != nil { - return err - } else if !ok { - return errors.Errorf("shard not managed by this node: %s, %d", req.TableKey, req.ShardNum) - } else if v != req.FromVersion { - return errors.Errorf("shard managed by this node is at version: %d, not: %d", v, req.FromVersion) - } else { - version = v + if !api.holder.DirectiveApplied() { + return errors.New("don't have directive yet, can't snapshot shard") } + // TODO(jaffee) confirm this node is actually responsible for the given + // shard? Not sure we need to given that this request comes from + // MDS, but might be a belt&suspenders situation. + + qtid := req.TableKey.QualifiedTableID() partition := disco.ShardToShardPartition(string(req.TableKey), uint64(req.ShardNum), disco.DefaultPartitionN) partitionNum := dax.PartitionNum(partition) - // Create the snapshot for the current version. - rc, err := api.IndexShardSnapshot(ctx, string(req.TableKey), uint64(req.ShardNum)) + // Open a write Tx snapshotting current version. + rc, err := api.IndexShardSnapshot(ctx, string(req.TableKey), uint64(req.ShardNum), true) if err != nil { return errors.Wrap(err, "getting index/shard readcloser") } + defer rc.Close() - // The following closes rc, the ReadCloser. - if err := api.snapshotReadWriter.WriteShardData(ctx, qtid, partitionNum, req.ShardNum, version, rc); err != nil { - return errors.Wrap(err, "snapshotting shard data") - } - - // Increment the version of the shard managed by this node. - if err := api.holder.versionStore.AddShards(ctx, qtid, - dax.NewVersionedShard(req.ShardNum, req.ToVersion), - ); err != nil { - return errors.Wrap(err, "incrementing shard version locally") + resource := api.serverlessStorage.GetShardResource(qtid, partitionNum, req.ShardNum) + // Bump writelog version while write Tx is held. + if ok, err := resource.IncrementWLVersion(); err != nil { + return errors.Wrap(err, "incrementing write log version") + } else if !ok { + return nil } - - // Update the cached directive on the holder. - api.holder.SetDirective(&req.Directive) - api.holder.SetDirectiveApplied(true) - - // Finally, delete the log file for the previous version. - return api.writeLogWriter.DeleteShard(ctx, qtid, partitionNum, req.ShardNum, req.FromVersion) + // TODO(jaffee) look into downgrading Tx on RBF to read lock here now that WL version is incremented. + err = resource.Snapshot(rc) + return errors.Wrap(err, "snapshotting shard data") } // SnapshotTableKeys triggers the node to perform a table keys snapshot based on // the provided SnapshotTableKeysRequest. func (api *API) SnapshotTableKeys(ctx context.Context, req *dax.SnapshotTableKeysRequest) error { + if !api.holder.DirectiveApplied() { + return errors.New("don't have directive yet, can't snapshot table keys") + } // If the index is not keyed, no-op on snapshotting its keys. if idx, err := api.Index(ctx, string(req.TableKey)); err != nil { return newNotFoundError(ErrIndexNotFound, string(req.TableKey)) @@ -3162,83 +3123,60 @@ func (api *API) SnapshotTableKeys(ctx context.Context, req *dax.SnapshotTableKey qtid := req.TableKey.QualifiedTableID() - // Confirm that this node is currently responsible for table/partition/fromVersion. - var version int - if v, ok, err := api.holder.versionStore.PartitionVersion(ctx, qtid, req.PartitionNum); err != nil { - return err - } else if !ok { - return errors.Errorf("partition not managed by this node: %s, %d", req.TableKey, req.PartitionNum) - } else if v != req.FromVersion { - return errors.Errorf("partition managed by this node is at version: %d, not: %d", v, req.FromVersion) - } else { - version = v - } - // Create the snapshot for the current version. - wrTo, err := api.TranslateData(ctx, string(req.TableKey), int(req.PartitionNum)) + trans, err := api.TranslateData(ctx, string(req.TableKey), int(req.PartitionNum)) if err != nil { - return errors.Wrapf(err, "getting index/partition writeto: %s/%d", req.TableKey, req.PartitionNum) + return errors.Wrapf(err, "getting index/partition translate store: %s/%d", req.TableKey, req.PartitionNum) } - - if err := api.snapshotReadWriter.WriteTableKeys(ctx, qtid, req.PartitionNum, version, wrTo); err != nil { - return errors.Wrap(err, "snapshotting table keys") + // get a write tx to ensure no other writes while incrementing WL version. + wrTo, err := trans.Begin(true) + if err != nil { + return errors.Wrap(err, "beginning table translate write tx") } + defer wrTo.Rollback() - // Increment the version of the partition managed by this node. - if err := api.holder.versionStore.AddPartitions(ctx, qtid, - dax.NewVersionedPartition(req.PartitionNum, req.ToVersion), - ); err != nil { - return errors.Wrap(err, "incrementing partition version locally") + resource := api.serverlessStorage.GetTableKeyResource(qtid, req.PartitionNum) + if ok, err := resource.IncrementWLVersion(); err != nil { + return errors.Wrap(err, "incrementing write log version") + } else if !ok { + // no need to snapshot, no writes + return nil } - - // Update the cached directive on the holder. - api.holder.SetDirective(&req.Directive) - api.holder.SetDirectiveApplied(true) - - // Finally, delete the log file for the previous version. - return api.writeLogWriter.DeleteTableKeys(ctx, qtid, req.PartitionNum, req.FromVersion) + // TODO(jaffee) downgrade write tx to read-only + err = resource.SnapshotTo(wrTo) + return errors.Wrap(err, "snapshotting table keys") } // SnapshotFieldKeys triggers the node to perform a field keys snapshot based on // the provided SnapshotFieldKeysRequest. func (api *API) SnapshotFieldKeys(ctx context.Context, req *dax.SnapshotFieldKeysRequest) error { - qtid := req.TableKey.QualifiedTableID() - - // Confirm that this node is currently responsible for table/field/fromVersion. - var version int - if v, ok, err := api.holder.versionStore.FieldVersion(ctx, qtid, req.Field); err != nil { - return err - } else if !ok { - return errors.Errorf("field not managed by this node: %s, %s", req.TableKey, req.Field) - } else if v != req.FromVersion { - return errors.Errorf("field managed by this node is at version: %d, not: %d", v, req.FromVersion) - } else { - version = v + if !api.holder.DirectiveApplied() { + return errors.New("don't have directive yet, can't snapshot field keys") } + qtid := req.TableKey.QualifiedTableID() // Create the snapshot for the current version. - wrTo, err := api.FieldTranslateData(ctx, string(req.TableKey), string(req.Field)) + trans, err := api.FieldTranslateData(ctx, string(req.TableKey), string(req.Field)) if err != nil { - return errors.Wrap(err, "getting index/field writeto") + return errors.Wrap(err, "getting index/field translator") } - - if err := api.snapshotReadWriter.WriteFieldKeys(ctx, qtid, req.Field, version, wrTo); err != nil { - return errors.Wrap(err, "snapshotting field keys") + // get a write tx to ensure no other writes while incrementing WL version. + wrTo, err := trans.Begin(true) + if err != nil { + return errors.Wrap(err, "beginning field translate write tx") } + defer wrTo.Rollback() - // Increment the version of the field managed by this node. - if err := api.holder.versionStore.AddFields(ctx, qtid, - dax.NewVersionedField(req.Field, req.ToVersion), - ); err != nil { - return errors.Wrap(err, "incrementing field version locally") + resource := api.serverlessStorage.GetFieldKeyResource(qtid, req.Field) + if ok, err := resource.IncrementWLVersion(); err != nil { + return errors.Wrap(err, "incrementing writelog version") + } else if !ok { + // no need to snapshot, no writes + return nil } - - // Update the cached directive on the holder. - api.holder.SetDirective(&req.Directive) - api.holder.SetDirectiveApplied(true) - - // Finally, delete the log file for the previous version. - return api.writeLogWriter.DeleteFieldKeys(ctx, qtid, req.Field, req.FromVersion) + // TODO(jaffee) downgrade to read tx + err = resource.SnapshotTo(wrTo) + return errors.Wrap(err, "snapshotTo in FieldKeys") } type serverInfo struct { @@ -3365,9 +3303,9 @@ var methodsNormal = map[apiMethod]struct{}{ apiDeleteDataframe: {}, } -func shardInShards(i dax.ShardNum, s dax.VersionedShards) bool { +func shardInShards(i dax.ShardNum, s dax.ShardNums) bool { for _, o := range s { - if i == o.Num { + if i == o { return true } } @@ -3386,11 +3324,6 @@ type SchemaAPI interface { DeleteField(ctx context.Context, tname dax.TableName, fname dax.FieldName) error } -type SchemaInfoAPI interface { - IndexInfo(ctx context.Context, indexName string) (*IndexInfo, error) - FieldInfo(ctx context.Context, indexName, fieldName string) (*FieldInfo, error) -} - type ClusterNode struct { ID string State string @@ -3408,6 +3341,7 @@ type SystemAPI interface { ClusterReplicaCount() int ShardWidth() int ClusterState() string + DataDir() string ClusterNodes() []ClusterNode } @@ -3476,6 +3410,10 @@ func (fsapi *FeatureBaseSystemAPI) ClusterState() string { return string(state) } +func (fsapi *FeatureBaseSystemAPI) DataDir() string { + return fsapi.server.dataDir +} + func (fsapi *FeatureBaseSystemAPI) ClusterNodes() []ClusterNode { result := make([]ClusterNode, 0) diff --git a/api_directive.go b/api_directive.go index d7650c0b0..1e321e781 100644 --- a/api_directive.go +++ b/api_directive.go @@ -9,6 +9,7 @@ import ( "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/dax/computer" + "github.com/featurebasedb/featurebase/v3/dax/storage" "github.com/featurebasedb/featurebase/v3/disco" "github.com/pkg/errors" ) @@ -41,6 +42,9 @@ func (api *API) ApplyDirective(ctx context.Context, d *dax.Directive) error { if err := api.deleteAllIndexes(ctx); err != nil { return errors.Wrap(err, "deleting all indexes") } + if err := api.serverlessStorage.RemoveAll(); err != nil { + return errors.Wrap(err, "removing all managers") + } // Set previousDirective to empty so the diff handles everything as new. previousDirective = dax.Directive{} @@ -102,19 +106,19 @@ type directiveJobTableKeys struct { directiveJobType idx *Index tkey dax.TableKey - partition dax.VersionedPartition + partition dax.PartitionNum } type directiveJobFieldKeys struct { directiveJobType tkey dax.TableKey - field dax.VersionedField + field dax.FieldName } type directiveJobShards struct { directiveJobType tkey dax.TableKey - shard dax.VersionedShard + shard dax.ShardNum } // directiveWorker is a worker in a worker pool which handles portions of a @@ -348,42 +352,38 @@ func (api *API) pushJobsTableKeys(ctx context.Context, jobs chan<- directiveJobT } } -func (api *API) loadTableKeys(ctx context.Context, idx *Index, tkey dax.TableKey, partition dax.VersionedPartition) error { +func (api *API) loadTableKeys(ctx context.Context, idx *Index, tkey dax.TableKey, partition dax.PartitionNum) error { qtid := tkey.QualifiedTableID() - // Load the previous snapshot. Version 0 doesn't have a snapshot - // file; it only has log entries. - if partition.Version > 0 { - // Load partition snapshot: version - 1 - previousVersion := partition.Version - 1 - rc, err := api.snapshotReadWriter.ReadTableKeys(ctx, qtid, partition.Num, previousVersion) - if err != nil { - return errors.Wrap(err, "reading table keys snapshot") - } - defer rc.Close() + resource := api.serverlessStorage.GetTableKeyResource(qtid, partition) + if resource.IsLocked() { + api.logger().Warnf("skipping loadTableKeys (already held) %s %d", tkey, partition) + return nil + } - if err := api.TranslateIndexDB(ctx, string(tkey), int(partition.Num), rc); err != nil { + // load latest snapshot + if rc, err := resource.LoadLatestSnapshot(); err != nil { + return errors.Wrap(err, "loading table key snapshot") + } else if rc != nil { + defer rc.Close() + if err := api.TranslateIndexDB(ctx, string(tkey), int(partition), rc); err != nil { return errors.Wrap(err, "restoring table keys") - } } - if err := func() error { - store := idx.TranslateStore(int(partition.Num)) - - reader := api.writeLogReader.TableKeyReader(ctx, qtid, partition.Num, partition.Version) - if err := reader.Open(); err != nil { - // TODO: this log can be confusing because on a create - // table, there is no log file yet, so an error is expected. - // Instead of swallowing this error, we need to check the - // error code and handle it differently. This means the - // writelogger will need to return an error indicating that - // the log file does not exist, but that that is expected. - // log.Printf("could not open log file for table: %s, partition: %d: version: %d, err: %s", table, partition.Num, partition.Version, err) + // define write log loading in a function since we have to do it + // before and after locking + loadWriteLog := func() error { + writelog, err := resource.LoadWriteLog() + if err != nil { + return errors.Wrap(err, "getting write log reader for table keys") + } + if writelog == nil { return nil } + reader := storage.NewTableKeyReader(qtid, partition, writelog) defer reader.Close() - + store := idx.TranslateStore(int(partition)) for msg, err := reader.Read(); err != io.EOF; msg, err = reader.Read() { if err != nil { return errors.Wrap(err, "reading from log reader") @@ -394,18 +394,21 @@ func (api *API) loadTableKeys(ctx context.Context, idx *Index, tkey dax.TableKey } } } - return nil - }(); err != nil { + } + // 1st write log load + if err := loadWriteLog(); err != nil { return err } - // Set the table/partition/version in the holder. - if err := api.holder.versionStore.AddPartitions(ctx, qtid, partition); err != nil { - return errors.Wrap(err, "adding partition to sharder") + // acquire lock on this partition's keys + if err := resource.Lock(); err != nil { + return errors.Wrap(err, "locking table key partition") } - return nil + // reload writelog in case of changes between last load and + // lock. The resource object takes care of only loading new data. + return loadWriteLog() } func (api *API) pushJobsFieldKeys(ctx context.Context, jobs chan<- directiveJobType, fromD, toD *dax.Directive) { @@ -423,47 +426,45 @@ func (api *API) pushJobsFieldKeys(ctx context.Context, jobs chan<- directiveJobT } } -func (api *API) loadFieldKeys(ctx context.Context, tkey dax.TableKey, field dax.VersionedField) error { +func (api *API) loadFieldKeys(ctx context.Context, tkey dax.TableKey, field dax.FieldName) error { qtid := tkey.QualifiedTableID() - // Load the previous snapshot. Version 0 doesn't have a snapshot - // file; it only has log entries. - if field.Version > 0 { - // Load field snapshot: version - 1 - previousVersion := field.Version - 1 - rc, err := api.snapshotReadWriter.ReadFieldKeys(ctx, qtid, field.Name, previousVersion) - if err != nil { - return errors.Wrap(err, "reading field keys snapshot") - } - defer rc.Close() + resource := api.serverlessStorage.GetFieldKeyResource(qtid, field) + if resource.IsLocked() { + api.logger().Warnf("skipping loadFieldKeys (already held) %s %s", tkey, field) + return nil + } - if err := api.TranslateFieldDB(ctx, string(tkey), string(field.Name), rc); err != nil { + // load latest snapshot + if rc, err := resource.LoadLatestSnapshot(); err != nil { + return errors.Wrap(err, "loading field key snapshot") + } else if rc != nil { + defer rc.Close() + if err := api.TranslateFieldDB(ctx, string(tkey), string(field), rc); err != nil { return errors.Wrap(err, "restoring field keys") } } - if err := func() error { + // define write log loading in a function since we have to do it + // before and after locking + loadWriteLog := func() error { + writelog, err := resource.LoadWriteLog() + if err != nil { + return errors.Wrap(err, "getting write log reader for field keys") + } + if writelog == nil { + return nil + } + reader := storage.NewFieldKeyReader(qtid, field, writelog) + defer reader.Close() // Get field in order to find the translate store. - fld := api.holder.Field(string(tkey), string(field.Name)) + fld := api.holder.Field(string(tkey), string(field)) if fld == nil { - log.Printf("field not found in holder: %s", field.Name) + log.Printf("field not found in holder: %s", field) return nil } store := fld.TranslateStore() - reader := api.writeLogReader.FieldKeyReader(ctx, qtid, field.Name, field.Version) - if err := reader.Open(); err != nil { - // TODO: this log can be confusing because on a create - // table, there is no log file yet, so an error is expected. - // Instead of swallowing this error, we need to check the - // error code and handle it differently. This means the - // writelogger will need to return an error indicating that - // the log file does not exist, but that that is expected. - // log.Printf("could not open log file for table: %s, field: %s: version: %d, err: %s", table, field.Name, field.Version, err) - return nil - } - defer reader.Close() - for msg, err := reader.Read(); err != io.EOF; msg, err = reader.Read() { if err != nil { return errors.Wrap(err, "reading from log reader") @@ -474,18 +475,21 @@ func (api *API) loadFieldKeys(ctx context.Context, tkey dax.TableKey, field dax. } } } - return nil - }(); err != nil { + } + // 1st write log load + if err := loadWriteLog(); err != nil { return err } - // Set the table/field/version in the holder. - if err := api.holder.versionStore.AddFields(ctx, qtid, field); err != nil { - return errors.Wrap(err, "adding field to sharder") + // acquire lock on this partition's keys + if err := resource.Lock(); err != nil { + return errors.Wrap(err, "locking field key partition") } - return nil + // reload writelog in case of changes between last load and + // lock. The resource object takes care of only loading new data. + return loadWriteLog() } func (api *API) pushJobsShards(ctx context.Context, jobs chan<- directiveJobType, fromD, toD *dax.Directive) { @@ -506,46 +510,43 @@ func (api *API) pushJobsShards(ctx context.Context, jobs chan<- directiveJobType } } -func (api *API) loadShard(ctx context.Context, tkey dax.TableKey, shard dax.VersionedShard) error { +func (api *API) loadShard(ctx context.Context, tkey dax.TableKey, shard dax.ShardNum) error { qtid := tkey.QualifiedTableID() - partition := disco.ShardToShardPartition(string(tkey), uint64(shard.Num), disco.DefaultPartitionN) - partitionNum := dax.PartitionNum(partition) + partition := dax.PartitionNum(disco.ShardToShardPartition(string(tkey), uint64(shard), disco.DefaultPartitionN)) - // Load the previous snapshot. Version 0 doesn't have a snapshot - // file; it only has log entries. - if shard.Version > 0 { - // Load shard snapshot: version - 1 - previousVersion := shard.Version - 1 - rc, err := api.snapshotReadWriter.ReadShardData(ctx, qtid, partitionNum, shard.Num, previousVersion) - if err != nil { - return errors.Wrap(err, "reading shard data snapshot") - } + resource := api.serverlessStorage.GetShardResource(qtid, partition, shard) + if resource.IsLocked() { + api.logger().Warnf("skipping loadShard (already held) %s %d", tkey, shard) + return nil + } - if err := api.RestoreShard(ctx, string(tkey), uint64(shard.Num), rc); err != nil { + if rc, err := resource.LoadLatestSnapshot(); err != nil { + return errors.Wrap(err, "reading latest snapshot for shard") + } else if rc != nil { + defer rc.Close() + if err := api.RestoreShard(ctx, string(tkey), uint64(shard), rc); err != nil { return errors.Wrap(err, "restoring shard data") } } - // WriteLog reader. - if err := func() error { - reader := api.writeLogReader.ShardReader(ctx, qtid, partitionNum, shard.Num, shard.Version) - if err := reader.Open(); err != nil { - // TODO: this log can be confusing because on a create - // table, there is no log file yet, so an error is expected. - // Instead of swallowing this error, we need to check the - // error code and handle it differently. This means the - // writelogger will need to return an error indicating that - // the log file does not exist, but that that is expected. - // log.Printf("could not open log file for table: %s, partition: %d: version: %d, shard: %d, err: %s", table, partition, shard.Version, shard.Num, err) + // define write log loading in a func because we do it twice. + loadWriteLog := func() error { + writelog, err := resource.LoadWriteLog() + if err != nil { + return errors.Wrap(err, "") + } + if writelog == nil { return nil } - defer reader.Close() + reader := storage.NewShardReader(qtid, partition, shard, writelog) + defer reader.Close() for logMsg, err := reader.Read(); err != io.EOF; logMsg, err = reader.Read() { if err != nil { return errors.Wrap(err, "reading from log reader") } + switch msg := logMsg.(type) { case *computer.ImportRoaringMessage: req := &ImportRoaringRequest{ @@ -632,18 +633,21 @@ func (api *API) loadShard(ctx context.Context, tkey dax.TableKey, shard dax.Vers } } } - return nil - }(); err != nil { + } + // 1st write log load + if err := loadWriteLog(); err != nil { return err } - // Set the table/shard/version in the holder. - if err := api.holder.versionStore.AddShards(ctx, qtid, shard); err != nil { - return errors.Wrap(err, "adding shard to sharder") + // acquire lock on this partition's keys + if err := resource.Lock(); err != nil { + return errors.Wrap(err, "locking field key partition") } - return nil + // reload writelog in case of changes between last load and + // lock. The resource object takes care of only loading new data. + return loadWriteLog() } ////////////////////////////////////////////////////////////// @@ -707,11 +711,11 @@ func thingsAdded[K comparable](from []K, to []K) []K { // partitionsComparer is used to compare the differences between two maps of // table:[]partition. type partitionsComparer struct { - from map[dax.TableKey]dax.VersionedPartitions - to map[dax.TableKey]dax.VersionedPartitions + from map[dax.TableKey]dax.PartitionNums + to map[dax.TableKey]dax.PartitionNums } -func newPartitionsComparer(from map[dax.TableKey]dax.VersionedPartitions, to map[dax.TableKey]dax.VersionedPartitions) *partitionsComparer { +func newPartitionsComparer(from map[dax.TableKey]dax.PartitionNums, to map[dax.TableKey]dax.PartitionNums) *partitionsComparer { return &partitionsComparer{ from: from, to: to, @@ -720,23 +724,23 @@ func newPartitionsComparer(from map[dax.TableKey]dax.VersionedPartitions, to map // added returns the partitions which are present in `to` but not in `from`. The // results remain in the format of a map of table:[]partition. -func (p *partitionsComparer) added() map[dax.TableKey]dax.VersionedPartitions { +func (p *partitionsComparer) added() map[dax.TableKey]dax.PartitionNums { return partitionsAdded(p.from, p.to) } // removed returns the partitions which are present in `from` but not in `to`. // The results remain in the format of a map of table:[]partition. -func (p *partitionsComparer) removed() map[dax.TableKey]dax.VersionedPartitions { +func (p *partitionsComparer) removed() map[dax.TableKey]dax.PartitionNums { return partitionsAdded(p.to, p.from) } // partitionsAdded returns the partitions which are present in `to` but not in `from`. -func partitionsAdded(from map[dax.TableKey]dax.VersionedPartitions, to map[dax.TableKey]dax.VersionedPartitions) map[dax.TableKey]dax.VersionedPartitions { +func partitionsAdded(from map[dax.TableKey]dax.PartitionNums, to map[dax.TableKey]dax.PartitionNums) map[dax.TableKey]dax.PartitionNums { if from == nil { return to } - added := make(map[dax.TableKey]dax.VersionedPartitions) + added := make(map[dax.TableKey]dax.PartitionNums) for tt, tps := range to { fps, found := from[tt] if !found { @@ -744,7 +748,7 @@ func partitionsAdded(from map[dax.TableKey]dax.VersionedPartitions, to map[dax.T continue } - addedPartitions := dax.VersionedPartitions{} + addedPartitions := dax.PartitionNums{} for i := range tps { var found bool for j := range fps { @@ -768,11 +772,11 @@ func partitionsAdded(from map[dax.TableKey]dax.VersionedPartitions, to map[dax.T // fieldsComparer is used to compare the differences between two maps of // table:[]fieldVersion. type fieldsComparer struct { - from map[dax.TableKey]dax.VersionedFields - to map[dax.TableKey]dax.VersionedFields + from map[dax.TableKey][]dax.FieldName + to map[dax.TableKey][]dax.FieldName } -func newFieldsComparer(from map[dax.TableKey]dax.VersionedFields, to map[dax.TableKey]dax.VersionedFields) *fieldsComparer { +func newFieldsComparer(from map[dax.TableKey][]dax.FieldName, to map[dax.TableKey][]dax.FieldName) *fieldsComparer { return &fieldsComparer{ from: from, to: to, @@ -781,23 +785,23 @@ func newFieldsComparer(from map[dax.TableKey]dax.VersionedFields, to map[dax.Tab // added returns the fields which are present in `to` but not in `from`. The // results remain in the format of a map of table:[]field. -func (f *fieldsComparer) added() map[dax.TableKey]dax.VersionedFields { +func (f *fieldsComparer) added() map[dax.TableKey][]dax.FieldName { return fieldsAdded(f.from, f.to) } // removed returns the fields which are present in `from` but not in `to`. // The results remain in the format of a map of table:[]field. -func (f *fieldsComparer) removed() map[dax.TableKey]dax.VersionedFields { +func (f *fieldsComparer) removed() map[dax.TableKey][]dax.FieldName { return fieldsAdded(f.to, f.from) } // fieldsAdded returns the fields which are present in `to` but not in `from`. -func fieldsAdded(from map[dax.TableKey]dax.VersionedFields, to map[dax.TableKey]dax.VersionedFields) map[dax.TableKey]dax.VersionedFields { +func fieldsAdded(from map[dax.TableKey][]dax.FieldName, to map[dax.TableKey][]dax.FieldName) map[dax.TableKey][]dax.FieldName { if from == nil { return to } - added := make(map[dax.TableKey]dax.VersionedFields) + added := make(map[dax.TableKey][]dax.FieldName) for tt, tps := range to { fps, found := from[tt] if !found { @@ -805,7 +809,7 @@ func fieldsAdded(from map[dax.TableKey]dax.VersionedFields, to map[dax.TableKey] continue } - addedFieldVersions := dax.VersionedFields{} + addedFieldVersions := []dax.FieldName{} for i := range tps { var found bool for j := range fps { @@ -829,11 +833,11 @@ func fieldsAdded(from map[dax.TableKey]dax.VersionedFields, to map[dax.TableKey] // shardsComparer is used to compare the differences between two maps of // table:[]shardV. type shardsComparer struct { - from map[dax.TableKey]dax.VersionedShards - to map[dax.TableKey]dax.VersionedShards + from map[dax.TableKey]dax.ShardNums + to map[dax.TableKey]dax.ShardNums } -func newShardsComparer(from map[dax.TableKey]dax.VersionedShards, to map[dax.TableKey]dax.VersionedShards) *shardsComparer { +func newShardsComparer(from map[dax.TableKey]dax.ShardNums, to map[dax.TableKey]dax.ShardNums) *shardsComparer { return &shardsComparer{ from: from, to: to, @@ -842,23 +846,23 @@ func newShardsComparer(from map[dax.TableKey]dax.VersionedShards, to map[dax.Tab // added returns the shards which are present in `to` but not in `from`. The // results remain in the format of a map of table:[]shard. -func (s *shardsComparer) added() map[dax.TableKey]dax.VersionedShards { +func (s *shardsComparer) added() map[dax.TableKey]dax.ShardNums { return shardsAdded(s.from, s.to) } // removed returns the shards which are present in `from` but not in `to`. The // results remain in the format of a map of table:[]shard. -func (s *shardsComparer) removed() map[dax.TableKey]dax.VersionedShards { +func (s *shardsComparer) removed() map[dax.TableKey]dax.ShardNums { return shardsAdded(s.to, s.from) } // shardsAdded returns the shards which are present in `to` but not in `from`. -func shardsAdded(from map[dax.TableKey]dax.VersionedShards, to map[dax.TableKey]dax.VersionedShards) map[dax.TableKey]dax.VersionedShards { +func shardsAdded(from map[dax.TableKey]dax.ShardNums, to map[dax.TableKey]dax.ShardNums) map[dax.TableKey]dax.ShardNums { if from == nil { return to } - added := make(map[dax.TableKey]dax.VersionedShards) + added := make(map[dax.TableKey]dax.ShardNums) for tt, tss := range to { fss, found := from[tt] if !found { @@ -866,7 +870,7 @@ func shardsAdded(from map[dax.TableKey]dax.VersionedShards, to map[dax.TableKey] continue } - addedShards := dax.VersionedShards{} + addedShards := dax.ShardNums{} for i := range tss { var found bool for j := range fss { @@ -889,7 +893,7 @@ func shardsAdded(from map[dax.TableKey]dax.VersionedShards, to map[dax.TableKey] // createTableAndFields creates the FeatureBase Tables and Fields provided in // the dax.Directive format. -func (api *API) createTableAndFields(tbl *dax.QualifiedTable, partitions dax.VersionedPartitions) error { +func (api *API) createTableAndFields(tbl *dax.QualifiedTable, partitions dax.PartitionNums) error { cim := &CreateIndexMessage{ Index: string(tbl.Key()), CreatedAt: 0, diff --git a/apply.go b/apply.go index ecbee7735..12f89aa25 100644 --- a/apply.go +++ b/apply.go @@ -13,8 +13,6 @@ import ( "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" - "github.com/apache/arrow/go/v10/parquet/file" - "github.com/apache/arrow/go/v10/parquet/pqarrow" "github.com/gomem/gomem/pkg/dataframe" "github.com/featurebasedb/featurebase/v3/pql" "github.com/featurebasedb/featurebase/v3/tracing" @@ -220,12 +218,14 @@ func (e *executor) executeApplyShard(ctx context.Context, qcx *Qcx, index string if idx == nil { return nil, newNotFoundError(ErrIndexNotFound, index) } + fname := idx.GetDataFramePath(shard) - if _, err := os.Stat(fname + ".parquet"); os.IsNotExist(err) { + + if !e.dataFrameExists(fname) { return value.NewVector([]value.Value{}), nil } - table, err := readTableParquet(fname) + table, err := e.getDataTable(ctx, fname, pool) if err != nil { return nil, err } @@ -254,57 +254,20 @@ func (e *executor) executeApplyShard(ctx context.Context, qcx *Qcx, index string return context.Global("_"), nil } -func readTableParquet(filename string) (arrow.Table, error) { - r, err := os.Open(filename + ".parquet") - if err != nil { - return nil, err - } - - pf, err := file.NewParquetReader(r) - if err != nil { - return nil, err - } - - reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator) - if err != nil { - return nil, err - } - return reader.ReadTable(context.Background()) -} - -func readTableParquetCtx(ctx context.Context, filename string, mem memory.Allocator) (arrow.Table, error) { - r, err := os.Open(filename + ".parquet") - if err != nil { - return nil, err - } - - pf, err := file.NewParquetReader(r) - if err != nil { - return nil, err - } - - reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, mem) - if err != nil { - return nil, err - } - return reader.ReadTable(ctx) -} - // /////////////////////////////////////////////////////// // all the ingest supporting functions // /////////////////////////////////////////////////////// -func NewShardFile(name string) (*ShardFile, error) { - if _, err := os.Stat(name + ".parquet"); os.IsNotExist(err) { - return &ShardFile{dest: name}, nil +func NewShardFile(ctx context.Context, name string, mem memory.Allocator, e *executor) (*ShardFile, error) { + if !e.dataFrameExists(name) { + return &ShardFile{dest: name, executor: e}, nil } - // else read in existing - table, err := readTableParquet(name) + table, err := e.getDataTable(ctx, name, mem) if err != nil { return nil, err } - return &ShardFile{table: table, schema: table.Schema(), dest: name}, nil + return &ShardFile{table: table, schema: table.Schema(), dest: name, executor: e}, nil } type NameType struct { @@ -349,6 +312,7 @@ type ShardFile struct { added int64 columns []interface{} dest string + executor *executor } func compareSchema(s1, s2 *arrow.Schema) bool { @@ -427,7 +391,7 @@ func (sf *ShardFile) Process(cs *ChangesetRequest) error { if err != nil { return err } - return os.Rename(rtemp+".parquet", sf.dest+".parquet") + return os.Rename(rtemp+sf.executor.TableExtension(), sf.dest+sf.executor.TableExtension()) } func (sf *ShardFile) process(cs *ChangesetRequest) error { @@ -521,22 +485,9 @@ func (sf *ShardFile) Save(name string) error { } } rec := array.NewRecord(sf.schema, parts, sf.beforeRows+sf.added) - df, err := dataframe.NewDataFrameFromRecord(mem, rec) - if err != nil { - return err - } - // confirm change - w, err := os.Create(name + ".parquet") - if err != nil { - return err - } + table := array.NewTableFromRecords(sf.schema, []arrow.Record{rec}) - err = df.ToParquet(w, 1024) - if err != nil { - return err - } - w.Close() - return nil + return sf.executor.SaveTable(name, table, mem) } // TODO(twg) 2022/10/03 Not a huge fan of the global variable will look at adding to executor structure @@ -573,7 +524,8 @@ func (api *API) ApplyDataframeChangeset(ctx context.Context, index string, cs *C mu := getDataframeWritelock(shard) mu.Lock() defer mu.Unlock() - shardFile, err := NewShardFile(fname) + mem := memory.NewGoAllocator() + shardFile, err := NewShardFile(ctx, fname, mem, api.server.executor) if err != nil { return err } @@ -600,14 +552,16 @@ func (api *API) GetDataframeSchema(ctx context.Context, indexName string) (inter dir, _ := os.Open(base) files, _ := dir.Readdir(0) parts := make([]column, 0) + mem := memory.NewGoAllocator() for i := range files { file := files[i] name := file.Name() - if strings.HasSuffix(name, ".parquet") { + if api.server.executor.IsDataframeFile(name) { // strip off the parquet extenison name = strings.TrimSuffix(name, filepath.Ext(name)) // read the parquet file and extract the schema - table, err := readTableParquet(filepath.Join(base, name)) + fname := filepath.Join(base, name) + table, err := api.server.executor.getDataTable(ctx, fname, mem) if err != nil { return nil, err } diff --git a/arrow.go b/arrow.go index d5b75e820..1d009b285 100644 --- a/arrow.go +++ b/arrow.go @@ -5,12 +5,18 @@ import ( "context" "encoding/json" "fmt" + "io" "os" + "strings" "sync" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/ipc" "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/pqarrow" "github.com/gomem/gomem/pkg/dataframe" "github.com/featurebasedb/featurebase/v3/pql" "github.com/featurebasedb/featurebase/v3/tracing" @@ -371,12 +377,14 @@ func (e *executor) executeArrowShard(ctx context.Context, qcx *Qcx, index string if idx == nil { return nil, newNotFoundError(ErrIndexNotFound, index) } + fname := idx.GetDataFramePath(shard) - if _, err := os.Stat(fname + ".parquet"); os.IsNotExist(err) { + + if !e.dataFrameExists(fname) { return &basicTable{name: name}, nil } - table, err := readTableParquetCtx(context.TODO(), fname, pool) + table, err := e.getDataTable(ctx, fname, pool) if err != nil { return nil, errors.Wrap(err, "arrow readTableParquet") } @@ -403,3 +411,142 @@ func (e *executor) executeArrowShard(ctx context.Context, qcx *Qcx, index string table.Retain() return &basicTable{resolver: resolver, table: table, filtered: filter != nil, name: name}, nil } + +func (e *executor) dataFrameExists(fname string) bool { + if e.typeIsParquet() { + if _, err := os.Stat(fname + ".parquet"); os.IsNotExist(err) { + return false + } + return true + } + if _, err := os.Stat(fname + ".arrow"); os.IsNotExist(err) { + return false + } + return true +} + +func (e *executor) getDataTable(ctx context.Context, fname string, mem memory.Allocator) (arrow.Table, error) { + if e.typeIsParquet() { + table, err := readTableParquetCtx(ctx, fname, mem) + return table, err + } + return readTableArrow(fname, mem) +} + +func (e *executor) typeIsParquet() bool { + return e.datafameUseParquet +} + +func (e *executor) IsDataframeFile(name string) bool { + if e.typeIsParquet() { + return strings.HasSuffix(name, ".parquet") + } + return strings.HasSuffix(name, ".arrow") +} + +func (e *executor) SaveTable(name string, table arrow.Table, mem memory.Allocator) error { + if e.typeIsParquet() { + return writeTableParquet(table, name) + } + return writeTableArrow(table, name, mem) +} + +func (e *executor) TableExtension() string { + if e.typeIsParquet() { + return ".parquet" + } + return ".arrow" +} + +func readTableArrow(filename string, mem memory.Allocator) (arrow.Table, error) { + r, err := os.Open(filename + ".arrow") + if err != nil { + return nil, err + } + rr, err := ipc.NewFileReader(r, ipc.WithAllocator(mem)) + if err != nil { + return nil, err + } + defer rr.Close() + records := make([]arrow.Record, rr.NumRecords(), rr.NumRecords()) + i := 0 + for { + rec, err := rr.Read() + if err == io.EOF { + break + } else if err != nil { + return nil, err + } + records[i] = rec + i++ + } + records = records[:i] + table := array.NewTableFromRecords(rr.Schema(), records) + return table, nil +} + +func readTableParquetCtx(ctx context.Context, filename string, mem memory.Allocator) (arrow.Table, error) { + r, err := os.Open(filename + ".parquet") + if err != nil { + return nil, err + } + defer r.Close() + + pf, err := file.NewParquetReader(r) + if err != nil { + return nil, err + } + + reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, mem) + if err != nil { + return nil, err + } + return reader.ReadTable(ctx) +} + +func writeTableParquet(table arrow.Table, filename string) error { + f, err := os.Create(filename + ".parquet") + if err != nil { + return err + } + defer f.Close() + props := parquet.NewWriterProperties(parquet.WithDictionaryDefault(false)) + arrProps := pqarrow.DefaultWriterProps() + chunkSize := 10 * 1024 * 1024 + err = pqarrow.WriteTable(table, f, int64(chunkSize), props, arrProps) + if err != nil { + return err + } + f.Sync() + return nil +} + +func writeTableArrow(table arrow.Table, filename string, mem memory.Allocator) error { + f, err := os.Create(filename + ".arrow") + if err != nil { + return err + } + defer f.Close() + writer, err := ipc.NewFileWriter(f, ipc.WithAllocator(mem), ipc.WithSchema(table.Schema())) + if err != nil { + panic(err) + } + chunkSize := int64(0) + tr := array.NewTableReader(table, chunkSize) + defer tr.Release() + n := 0 + for tr.Next() { + arec := tr.Record() + err = writer.Write(arec) + if err != nil { + panic(err) + } + n++ + } + err = writer.Close() + if err != nil { + panic(err) + } + f.Sync() + return nil +} diff --git a/arrow_test.go b/arrow_test.go new file mode 100644 index 000000000..8142997db --- /dev/null +++ b/arrow_test.go @@ -0,0 +1,53 @@ +// Copyright 2021 Molecula Corp. All rights reserved. +package pilosa + +import ( + "context" + "encoding/hex" + "math/rand" + "os" + "path/filepath" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" +) + +func TempFileName(prefix string) string { + randBytes := make([]byte, 16) + rand.Read(randBytes) + return filepath.Join(os.TempDir(), prefix+hex.EncodeToString(randBytes)) +} + +func Test_TableParquet(t *testing.T) { + // create a arrow table + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "num", Type: arrow.PrimitiveTypes.Float64}, + }, + nil, // no metadata + ) + mem := memory.NewGoAllocator() + b := array.NewRecordBuilder(mem, schema) + defer b.Release() + b.Field(0).(*array.Float64Builder).AppendValues([]float64{1.0, 1.5, 2.0}, nil) + table := array.NewTableFromRecords(schema, []arrow.Record{b.NewRecord()}) + defer table.Release() + fileName := TempFileName("pq-") + // save it as a parquet file + err := writeTableParquet(table, fileName) + if err != nil { + t.Fatal(err) + } + defer os.Remove(fileName) + + // read it back in and compare the result + got, err := readTableParquetCtx(context.Background(), fileName, mem) + if err != nil { + t.Fatalf("readTableParquetCtx() error = %v", err) + } + if got.NumCols() != table.NumCols() { + t.Errorf("got:%v expected:%v", got.NumCols(), table.NumCols()) + } +} diff --git a/audit_test.go b/audit_test.go index 5aabb1b14..79293e411 100644 --- a/audit_test.go +++ b/audit_test.go @@ -7,7 +7,7 @@ import ( "os" "reflect" - pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/testhook" ) diff --git a/batch/batch.go b/batch/batch.go index 42455c872..ca1d3ef97 100644 --- a/batch/batch.go +++ b/batch/batch.go @@ -552,7 +552,6 @@ func (b *Batch) Add(rec Row) error { // empty string is not a valid value at this point (Pilosa refuses to translate it) if val == "" { // b.rowIDs[i] = append(rowIDs, nilSentinel) - } else if rowID, ok := b.getRowTranslation(field.Name, val); ok { b.rowIDs[i] = append(rowIDs, rowID) } else { @@ -742,23 +741,27 @@ var ErrBatchNowStale = errors.New("batch is stale and needs to be imported (howe func (b *Batch) Import() error { ctx := context.Background() start := time.Now() - trns, err := b.importer.StartTransaction(ctx, "", b.prevDuration*10, false, time.Hour) - if err != nil { - return errors.Wrap(err, "starting transaction") + if !b.useShardTransactionalEndpoint { + trns, err := b.importer.StartTransaction(ctx, "", b.prevDuration*10, false, time.Hour) + if err != nil { + return errors.Wrap(err, "starting transaction") + } + defer func() { + if trns != nil { + if trnsl, err := b.importer.FinishTransaction(ctx, trns.ID); err != nil { + b.log.Errorf("error finishing transaction: %v. trns: %+v", err, trnsl) + } + } + }() } defer func() { - if trns != nil { - if trnsl, err := b.importer.FinishTransaction(ctx, trns.ID); err != nil { - b.log.Errorf("error finishing transaction: %v. trns: %+v", err, trnsl) - } - } b.importer.StatsTiming(MetricBatchImportDurationSeconds, time.Since(start), 1.0) }() size := len(b.ids) transStart := time.Now() // first we need to translate the toTranslate, then fill out the missing row IDs - err = b.doTranslation() + err := b.doTranslation() if err != nil { return errors.Wrap(err, "doing Translation") } @@ -1313,20 +1316,15 @@ func (b *Batch) makeFragments(frags, clearFrags fragments) (fragments, fragments shardWidth := b.shardWidth() emptyClearRows := make(map[int]uint64) - // create _exists fragments if needed - // TODO(tlt): maybe make this a separate flag for backward compatibility? - // (because dax.Table doesn't have this). - //if b.index.Options.TrackExistence { - if true { - var curBM *roaring.Bitmap - curShard := ^uint64(0) // impossible sentinel value for shard. - for _, col := range b.ids { - if col/shardWidth != curShard { - curShard = col / shardWidth - curBM = frags.GetOrCreate(curShard, "_exists", "") - } - curBM.DirectAdd(col % shardWidth) + // create _exists fragments + var curBM *roaring.Bitmap + curShard := ^uint64(0) // impossible sentinel value for shard. + for _, col := range b.ids { + if col/shardWidth != curShard { + curShard = col / shardWidth + curBM = frags.GetOrCreate(curShard, "_exists", "") } + curBM.DirectAdd(col % shardWidth) } for i, rowIDs := range b.rowIDs { diff --git a/bufferpool/bufferpool.go b/bufferpool/bufferpool.go new file mode 100644 index 000000000..5cd3f0f72 --- /dev/null +++ b/bufferpool/bufferpool.go @@ -0,0 +1,262 @@ +package bufferpool + +import ( + "errors" + "fmt" + "sync" +) + +// FrameID is the type for frame id +type FrameID int + +// PageID is the type for page id +type PageID int + +var pageSyncPool = sync.Pool{ + New: func() any { + pg := new(Page) + pg.id = PageID(INVALID_PAGE) + pg.isDirty = false + pg.pinCount = 0 + return pg + }, +} + +// BufferPool represents a buffer pool of pages +type BufferPool struct { + // the underlying storage + diskManager DiskManager + // the actual pages in the buffer pool + pages []*Page + // the replacer that will elect replacements when buffer pool is full + replacer *ClockReplacer + // the list of free frames + freeList []FrameID + // the map of frames to page ids to frame ids + // frame ids are the offset into pages + // if you ask the pool for page 673, this will know at + // what offset in pages page 673 will exist + pageTable map[PageID]FrameID +} + +// TODO(pok) implement a lazy writer +// * if free list is 'low' then +// * increase size of cache if there is physical memory available +// * write out old pages and boot them from the cache to increase free list + +// TODO(pok) implement a checkpoint that scans the pool and writes out dirty pages every +// minute or so + +// NewBufferPool returns a buffer pool +func NewBufferPool(maxSize int, diskManager DiskManager) *BufferPool { + freeList := make([]FrameID, 0) + pages := make([]*Page, maxSize) + for i := 0; i < maxSize; i++ { + frameNumber := FrameID(i) + freeList = append(freeList, frameNumber) + } + clockReplacer := NewClockReplacer(maxSize) + return &BufferPool{ + diskManager: diskManager, + pages: pages, + replacer: clockReplacer, + freeList: freeList, + pageTable: make(map[PageID]FrameID), + } +} + +// Dumps all the pages in the buffer pool +func (b *BufferPool) Dump() { + fmt.Println() + fmt.Printf("------------------------------------------------------------------------------------------\n") + fmt.Printf("BUFFER POOL\n") + for _, p := range b.pages { + if p != nil { + p.Dump("") + } + } + fmt.Printf("------------------------------------------------------------------------------------------\n") + fmt.Println() +} + +// FetchPage fetches the requested page from the buffer pool. +func (b *BufferPool) FetchPage(pageID PageID) (*Page, error) { + // if it is in buffer pool already then just return it + if frameID, ok := b.pageTable[pageID]; ok { + page := b.pages[frameID] + page.pinCount++ + b.replacer.Pin(frameID) + return page, nil + } + + // not in the buffer pool so try the free list or + // the replacer will vote a page off the island + frameID, isFromFreeList, err := b.getFrameID() + if err != nil { + return nil, err + } + + if !isFromFreeList { + // if it didn't come from the freelist then + // remove page from current frame, writing it out if dirty + currentPage := b.pages[frameID] + if currentPage != nil { + if currentPage.isDirty { + b.diskManager.WritePage(currentPage) + } + + delete(b.pageTable, currentPage.id) + } + } + + // if we got to here, sorry, have to do an I/O + page, err := b.diskManager.ReadPage(pageID) + if err != nil { + return nil, err + } + page.pinCount = 1 + b.pageTable[pageID] = frameID + pageSyncPool.Put(b.pages[frameID]) + b.pages[frameID] = page + b.replacer.Pin(frameID) + + return page, nil +} + +// UnpinPage unpins the target page from the buffer pool +func (b *BufferPool) UnpinPage(pageID PageID) error { + if frameID, ok := b.pageTable[pageID]; ok { + page := b.pages[frameID] + page.DecPinCount() + + if page.pinCount <= 0 { + b.replacer.Unpin(frameID) + } + return nil + } + + return errors.New("could not find page") +} + +// FlushPage Flushes the target page to disk +func (b *BufferPool) FlushPage(pageID PageID) bool { + if frameID, ok := b.pageTable[pageID]; ok { + page := b.pages[frameID] + page.DecPinCount() + + b.diskManager.WritePage(page) + page.isDirty = false + + return true + } + return false +} + +// NewPage allocates a new page in the buffer pool with the disk manager help +func (b *BufferPool) NewPage() (*Page, error) { + // get a free frame + frameID, isFromFreeList, err := b.getFrameID() + if err != nil { + return nil, err + } + + if !isFromFreeList { + // remove page from current frame + currentPage := b.pages[frameID] + if currentPage != nil { + if currentPage.isDirty { + b.diskManager.WritePage(currentPage) + } + + delete(b.pageTable, currentPage.id) + } + } + + // allocates new page + pageID, err := b.diskManager.AllocatePage() + if err != nil { + return nil, err + } + page := &Page{pageID, 1, false, [PAGE_SIZE]byte{}} + page.WritePageNumber(int32(pageID)) + page.WriteFreeSpaceOffset(int16(PAGE_SIZE)) + page.WriteNextPointer(int32(INVALID_PAGE)) + page.WritePrevPointer(int32(INVALID_PAGE)) + + // update the frame table + b.pageTable[pageID] = frameID + pageSyncPool.Put(b.pages[frameID]) + b.pages[frameID] = page + + return page, nil +} + +// ScratchPage returns a page outside the buffer pool - do not use if you intend the page +// to be in the buffer pool (use NewPage() for that) +// ScratchPage is intended to be used in cases where you need the Page primitives +// and will copy the scratch page back over a real page later +func (b *BufferPool) ScratchPage() *Page { + page := &Page{ + id: PageID(INVALID_PAGE), + pinCount: 0, + isDirty: false, + data: [PAGE_SIZE]byte{}, + } + page.WritePageNumber(int32(INVALID_PAGE)) + page.WriteFreeSpaceOffset(int16(PAGE_SIZE)) + page.WriteNextPointer(int32(INVALID_PAGE)) + page.WritePrevPointer(int32(INVALID_PAGE)) + return page +} + +// DeletePage deletes a page from the buffer pool +func (b *BufferPool) DeletePage(pageID PageID) error { + var frameID FrameID + var ok bool + if frameID, ok = b.pageTable[pageID]; !ok { + return nil + } + + page := b.pages[frameID] + + if page.pinCount > 0 { + return errors.New("pin count greater than 0") + } + delete(b.pageTable, page.id) + b.replacer.Pin(frameID) + b.diskManager.DeallocatePage(pageID) + + b.freeList = append(b.freeList, frameID) + + return nil +} + +// FlushAllpages flushes all the pages in the buffer pool to disk +// Yeah, never call this unless you know what you are doing +func (b *BufferPool) FlushAllpages() { + for pageID := range b.pageTable { + b.FlushPage(pageID) + } +} + +func (b *BufferPool) getFrameID() (FrameID, bool, error) { + if len(b.freeList) > 0 { + frameID, newFreeList := b.freeList[0], b.freeList[1:] + b.freeList = newFreeList + return frameID, true, nil + } + + victim, err := b.replacer.Victim() + return victim, false, err +} + +// OnDiskSize exposes the on disk size of the backing store +// behind this buffer pool +func (b *BufferPool) OnDiskSize() int64 { + return b.diskManager.FileSize() +} + +// Close closes the buffer pool +func (b *BufferPool) Close() { + b.diskManager.Close() +} diff --git a/bufferpool/circularlist.go b/bufferpool/circularlist.go new file mode 100644 index 000000000..696e7f438 --- /dev/null +++ b/bufferpool/circularlist.go @@ -0,0 +1,93 @@ +package bufferpool + +import ( + "errors" +) + +type circularListNode struct { + key interface{} + value interface{} + next *circularListNode + prev *circularListNode +} + +type circularList struct { + head *circularListNode + tail *circularListNode + size int + capacity int +} + +func newCircularList(maxSize int) *circularList { + return &circularList{nil, nil, 0, maxSize} +} + +func (c *circularList) find(key interface{}) *circularListNode { + ptr := c.head + for i := 0; i < c.size; i++ { + if ptr.key == key { + return ptr + } + ptr = ptr.next + } + return nil +} + +func (c *circularList) hasKey(key interface{}) bool { + return c.find(key) != nil +} + +func (c *circularList) insert(key interface{}, value interface{}) error { + if c.size == c.capacity { + return errors.New("list is full") + } + newNode := &circularListNode{key, value, nil, nil} + if c.size == 0 { + newNode.next = newNode + newNode.prev = newNode + c.head = newNode + c.tail = newNode + c.size++ + return nil + } + + node := c.find(key) + if node != nil { + node.value = value + return nil + } + + newNode.next = c.head + newNode.prev = c.tail + c.tail.next = newNode + if c.head == c.tail { + c.head.next = newNode + } + c.tail = newNode + c.head.prev = c.tail + + c.size++ + return nil +} + +func (c *circularList) remove(key interface{}) { + node := c.find(key) + if node == nil { + return + } + if c.size == 1 { + c.head = nil + c.tail = nil + c.size-- + return + } + if node == c.head { + c.head = c.head.next + } + if node == c.tail { + c.tail = c.tail.prev + } + node.next.prev = node.prev + node.prev.next = node.next + c.size-- +} diff --git a/bufferpool/clockreplacer.go b/bufferpool/clockreplacer.go new file mode 100644 index 000000000..4210c5f73 --- /dev/null +++ b/bufferpool/clockreplacer.go @@ -0,0 +1,64 @@ +package bufferpool + +import "errors" + +// ClockReplacer implements a clock replacer algorithm +type ClockReplacer struct { + cList *circularList + clockHand **circularListNode +} + +// NewClockReplacer instantiates a new clock replacer +func NewClockReplacer(poolSize int) *ClockReplacer { + cList := newCircularList(poolSize) + return &ClockReplacer{cList, &cList.head} +} + +// Victim removes the victim frame as defined by the replacement policy +func (c *ClockReplacer) Victim() (FrameID, error) { + if c.cList.size == 0 { + return FrameID(INVALID_PAGE), errors.New("no victims available") + } + var victimFrameID FrameID + currentNode := (*c.clockHand) + for { + + if currentNode.value.(bool) { + currentNode.value = false + c.clockHand = ¤tNode.next + } else { + frameID := currentNode.key.(FrameID) + victimFrameID = frameID + c.clockHand = ¤tNode.next + c.cList.remove(currentNode.key) + return victimFrameID, nil + } + } +} + +// Unpin unpins a frame, indicating that it can now be victimized +func (c *ClockReplacer) Unpin(id FrameID) { + if !c.cList.hasKey(id) { + c.cList.insert(id, true) + if c.cList.size == 1 { + c.clockHand = &c.cList.head + } + } +} + +// Pin pins a frame, indicating that it should not be victimized until it is unpinned +func (c *ClockReplacer) Pin(id FrameID) { + node := c.cList.find(id) + if node == nil { + return + } + if (*c.clockHand) == node { + c.clockHand = &(*c.clockHand).next + } + c.cList.remove(id) +} + +// Size returns the size of the clock +func (c *ClockReplacer) Size() int { + return c.cList.size +} diff --git a/bufferpool/diskmanager.go b/bufferpool/diskmanager.go new file mode 100644 index 000000000..6eebcdc10 --- /dev/null +++ b/bufferpool/diskmanager.go @@ -0,0 +1,21 @@ +package bufferpool + +// DiskManager is responsible for interacting with disk +type DiskManager interface { + // reads a page from the disk + ReadPage(PageID) (*Page, error) + // writes a page to the disk + WritePage(*Page) error + + // allocates a page + AllocatePage() (PageID, error) + + // deallocates a page + DeallocatePage(PageID) error + + // returns on disk file size + FileSize() int64 + + // closes and does any clean up + Close() +} diff --git a/bufferpool/inmemdiskmanager.go b/bufferpool/inmemdiskmanager.go new file mode 100644 index 000000000..83b26d8a6 --- /dev/null +++ b/bufferpool/inmemdiskmanager.go @@ -0,0 +1,162 @@ +package bufferpool + +import ( + "errors" + "fmt" + "os" + + uuid "github.com/satori/go.uuid" +) + +// InMemDiskSpillingDiskManager is a memory implementation for a DiskManager interface +// that can spill to disk when a threshold is reached +type InMemDiskSpillingDiskManager struct { + // tracks the number of pages + numPages int + + onDiskPages int + + // tracks the number of pages we can consume before spilling + thresholdPages int + hasSpilled *struct{} + fd *os.File + + // the data buffer + data []byte +} + +// NewInMemDiskSpillingDiskManager returns a in-memory version of disk manager +func NewInMemDiskSpillingDiskManager(thresholdPages int) *InMemDiskSpillingDiskManager { + dm := &InMemDiskSpillingDiskManager{ + numPages: 0, + thresholdPages: thresholdPages, + data: make([]byte, 0), + } + return dm +} + +// ReadPage reads a page from pages +func (d *InMemDiskSpillingDiskManager) ReadPage(pageID PageID) (*Page, error) { + // check we're not asking for page out of range + if pageID < 0 || int(pageID) >= d.numPages { + return nil, errors.New("page not found") + } + // check that the offset is within range + offset := int(pageID) * PAGE_SIZE + + var page = pageSyncPool.Get().(*Page) + // we have to do this stupid check because if -cpuprofile is set for go test, this + // the previous line return a weird nil-ish thing... + if page == (*Page)(nil) { + page = pageSyncPool.New().(*Page) + } + page.id = pageID + + // do the read + if d.hasSpilled == nil { + if offset+PAGE_SIZE > len(d.data) { + return nil, errors.New("offset out of range") + } + b := copy(page.data[:], d.data[offset:offset+PAGE_SIZE]) + fmt.Printf("bytes read: %d", b) + } else { + var err error + if offset+PAGE_SIZE > d.numPages*PAGE_SIZE { + return nil, errors.New("offset out of range") + } + _, err = d.fd.ReadAt(page.data[:], int64(offset)) + if err != nil { + return nil, err + } + } + return page, nil +} + +// WritePage writes a page in memory to pages +func (d *InMemDiskSpillingDiskManager) WritePage(page *Page) error { + // make sure the offset is sensible + offset := int(page.ID()) * PAGE_SIZE + // do the write + if d.hasSpilled == nil { + if offset+PAGE_SIZE > len(d.data) { + return errors.New("offset out of range") + } + copy(d.data[offset:], page.data[:]) + } else { + var err error + if offset+PAGE_SIZE > d.numPages*PAGE_SIZE { + return errors.New("offset out of range") + } + _, err = d.fd.WriteAt(page.data[:], int64(offset)) + if err != nil { + return err + } + // err = d.fd.Sync() + // if err != nil { + // return err + // } + } + return nil +} + +// AllocatePage allocates a page and returns the page number +func (d *InMemDiskSpillingDiskManager) AllocatePage() (PageID, error) { + d.numPages = d.numPages + 1 + pageID := PageID(d.numPages - 1) + + if d.hasSpilled == nil { + // we have not spilled (yet), so make storage bigger + newData := make([]byte, PAGE_SIZE) + d.data = append(d.data, newData...) + + // check to see if we need to spill + if d.numPages > d.thresholdPages { + fileUUID, err := uuid.NewV4() + if err != nil { + return PageID(INVALID_PAGE), err + } + // TODO(pok) we should try to tell the OS not to cache this file + d.fd, err = os.CreateTemp("", fmt.Sprintf("fb-ehash-%s", fileUUID.String())) + if err != nil { + return PageID(INVALID_PAGE), err + } + _, err = d.fd.WriteAt(d.data, 0) + if err != nil { + return PageID(INVALID_PAGE), err + } + d.data = []byte{} + d.hasSpilled = &struct{}{} + } + } else { + if d.numPages >= d.onDiskPages { + // grow the file by a chunk - 512 pages + d.onDiskPages += 512 + var err error + size := int64(d.onDiskPages * PAGE_SIZE) + _, err = d.fd.WriteAt([]byte{0}, size-1) + if err != nil { + return PageID(INVALID_PAGE), err + } + } + } + + return pageID, nil +} + +// DeallocatePage removes page from disk +func (d *InMemDiskSpillingDiskManager) DeallocatePage(pageID PageID) error { + // nothing to do right now + return nil +} + +func (d *InMemDiskSpillingDiskManager) FileSize() int64 { + return int64(len(d.data)) +} + +func (d *InMemDiskSpillingDiskManager) Close() { + // close and delete the file if we spilled + if d.fd != nil { + _ = d.fd.Close() + os.Remove(d.fd.Name()) + } +} diff --git a/bufferpool/page.go b/bufferpool/page.go new file mode 100644 index 000000000..411699402 --- /dev/null +++ b/bufferpool/page.go @@ -0,0 +1,371 @@ +package bufferpool + +import ( + "encoding/binary" + "errors" + "fmt" +) + +const PAGE_SIZE int = 8192 + +const INVALID_PAGE int = -1 + +const PAGE_TYPE_BTREE_INTERNAL = 10 +const PAGE_TYPE_BTREE_LEAF = 11 +const PAGE_TYPE_HASH_TABLE = 12 + +// PAGE +// page size 8192 bytes +// byte aligned, big endian + +// |====================================================| +// | offset | length | | +// |----------------------------------------------------| +// | header | +// |====================================================| +// | 0 | 4 | pageNumber (int32) | +// | 4 | 2 | pageType (int16) | +// | 6 | 2 | slotCount (int16) | +// | 8 | 2 | localDepth (int16) | +// | 10 | 2 | freeSpaceOffset (int16) | +// | 12 | 4 | prevPointer (int32) | +// | 16 | 4 | nextPointer (int32) | +// |====================================================| +// | | +// |----------------------------------------------------| +// | 20 | slotcount | slot entry is 2 int16 | +// | | * slotwidth | values (payloadOffset, | +// | | * #slots | payloadLength) | +// |----------------------------------------------------| +// | | +// |----------------------------------------------------| +// | | +// | payload entries are keylength (int16), key bytes, | +// | payload length (int32), payload bytes | +// |====================================================| + +const PAGE_NUMBER_OFFSET = 0 // offset 0, length 4, end 4 +const PAGE_TYPE_OFFSET = 4 // offset 4, length 2, end 6 +const PAGE_SLOT_COUNT_OFFSET = 6 // offset 6, length 2, end 8 +const PAGE_LOCAL_DEPTH_OFFSET = 8 // offset 8, length 2, end 10 +const PAGE_FREE_SPACE_OFFSET = 10 // offset 10, length 2, end 12 +const PAGE_PREV_POINTER_OFFSET = 12 // offset 12, length 4, end 16 +const PAGE_NEXT_POINTER_OFFSET = 16 // offset 16, length 4, end 20 +const PAGE_SLOTS_START_OFFSET = 20 // offset 20 + +// page slots +// +// key offset int16 //offset 0, length 2, end 2 +// value offset int16 //offset 2, length 2, end 4 +const PAGE_SLOT_LENGTH = 4 + +// Page represents a page on disk +type Page struct { + id PageID + pinCount int + isDirty bool + data [PAGE_SIZE]byte +} + +type PageSlot struct { + KeyOffset int16 + ValueOffset int16 +} + +func (s *PageSlot) KeyBytes(page *Page) []byte { + offset := s.KeyOffset + keyLen := int16(binary.BigEndian.Uint16(page.data[offset:])) + offset += 2 + result := make([]byte, keyLen) + copy(result, page.data[offset:offset+keyLen]) + return result +} + +func (s *PageSlot) KeyAsInt(page *Page) int32 { + return int32(binary.BigEndian.Uint32(page.data[s.KeyOffset+2:])) +} + +func (s *PageSlot) ValueBytes(page *Page) []byte { + offset := s.ValueOffset + valueLen := int32(binary.BigEndian.Uint32(page.data[offset:])) + offset += 4 + result := make([]byte, valueLen) + copy(result, page.data[offset:int32(offset)+valueLen]) + return result +} + +func (s *PageSlot) ValueAsPagePointer(page *Page) int32 { + return int32(binary.BigEndian.Uint32(page.data[s.ValueOffset+4:])) +} + +type PageChunk struct { + KeyLength int16 + KeyBytes []byte + // TODO(pok) ValueBytes can be up to int32 long + // this requires an overflow page mechanism, that is not implemented + // yet, so be aware of this when storing stuff... + ValueLength int32 + ValueBytes []byte +} + +func (pc *PageChunk) Length() int { + return 2 + len(pc.KeyBytes) + 4 + len(pc.ValueBytes) +} + +func (pc *PageChunk) ComputeKeyOffset(pageOffset int) int { + return pageOffset +} + +func (pc *PageChunk) ComputeValueOffset(pageOffset int) int { + return pageOffset + 2 + len(pc.KeyBytes) +} + +func (p *Page) WritePageNumber(pageNumber int32) { + p.id = PageID(pageNumber) + binary.BigEndian.PutUint32(p.data[PAGE_NUMBER_OFFSET:], uint32(pageNumber)) + p.isDirty = true +} + +func (p *Page) ReadPageNumber() int { + return int(binary.BigEndian.Uint32(p.data[PAGE_NUMBER_OFFSET:])) +} + +func (p *Page) WritePageType(pageType int16) { + binary.BigEndian.PutUint16(p.data[PAGE_TYPE_OFFSET:], uint16(pageType)) + p.isDirty = true +} + +func (p *Page) ReadPageType() int16 { + return int16(binary.BigEndian.Uint16(p.data[PAGE_TYPE_OFFSET:])) +} + +func (p *Page) WriteSlotCount(slotCount int16) { + binary.BigEndian.PutUint16(p.data[PAGE_SLOT_COUNT_OFFSET:], uint16(slotCount)) + p.isDirty = true +} + +func (p *Page) ReadSlotCount() int16 { + return int16(binary.BigEndian.Uint16(p.data[PAGE_SLOT_COUNT_OFFSET:])) +} + +func (p *Page) WriteLocalDepth(localDepth int16) { + binary.BigEndian.PutUint16(p.data[PAGE_LOCAL_DEPTH_OFFSET:], uint16(localDepth)) + p.isDirty = true +} + +func (p *Page) ReadLocalDepth() int16 { + return int16(binary.BigEndian.Uint16(p.data[PAGE_LOCAL_DEPTH_OFFSET:])) +} + +func (p *Page) ReadSlot(slot int16) PageSlot { + offset := PAGE_SLOTS_START_OFFSET + PAGE_SLOT_LENGTH*slot + keyOffset := int16(binary.BigEndian.Uint16(p.data[offset:])) + offset += 2 + valueOffset := int16(binary.BigEndian.Uint16(p.data[offset:])) + return PageSlot{ + KeyOffset: keyOffset, + ValueOffset: valueOffset, + } +} + +func (p *Page) WriteSlot(slot int16, value PageSlot) { + offset := PAGE_SLOTS_START_OFFSET + PAGE_SLOT_LENGTH*slot + binary.BigEndian.PutUint16(p.data[offset:], uint16(value.KeyOffset)) + offset += 2 + binary.BigEndian.PutUint16(p.data[offset:], uint16(value.ValueOffset)) +} + +func (p *Page) WriteFreeSpaceOffset(offset int16) { + binary.BigEndian.PutUint16(p.data[PAGE_FREE_SPACE_OFFSET:], uint16(offset)) + p.isDirty = true +} + +func (p *Page) ReadFreeSpaceOffset() int16 { + return int16(binary.BigEndian.Uint16(p.data[PAGE_FREE_SPACE_OFFSET:])) +} + +func (p *Page) WritePrevPointer(prevPointer int32) { + binary.BigEndian.PutUint32(p.data[PAGE_PREV_POINTER_OFFSET:], uint32(prevPointer)) + p.isDirty = true +} + +func (p *Page) ReadPrevPointer() int { + return int(binary.BigEndian.Uint32(p.data[PAGE_PREV_POINTER_OFFSET:])) +} + +func (p *Page) WriteNextPointer(nextPointer int32) { + binary.BigEndian.PutUint32(p.data[PAGE_NEXT_POINTER_OFFSET:], uint32(nextPointer)) + p.isDirty = true +} + +func (p *Page) ReadNextPointer() int { + return int(binary.BigEndian.Uint32(p.data[PAGE_NEXT_POINTER_OFFSET:])) +} + +func (p *Page) WriteChunk(offset int16, chunk PageChunk) { + binary.BigEndian.PutUint16(p.data[offset:], uint16(chunk.KeyLength)) + offset += 2 + copy(p.data[offset:], chunk.KeyBytes) + offset += int16(len(chunk.KeyBytes)) + binary.BigEndian.PutUint32(p.data[offset:], uint32(chunk.ValueLength)) + offset += 4 + copy(p.data[offset:], chunk.ValueBytes) + p.isDirty = true +} + +func (p *Page) ReadChunk(offset int16) PageChunk { + keyLen := int16(binary.BigEndian.Uint16(p.data[offset:])) + offset += 2 + keyBytes := make([]byte, keyLen) + copy(keyBytes, p.data[offset:offset+keyLen]) + offset += keyLen + valueLen := int32(binary.BigEndian.Uint32(p.data[offset:])) + offset += 4 + valueBytes := make([]byte, valueLen) + copy(valueBytes, p.data[offset:int32(offset)+valueLen]) + return PageChunk{ + KeyLength: keyLen, + KeyBytes: keyBytes, + ValueLength: valueLen, + ValueBytes: valueBytes, + } +} + +func (p *Page) FreeSpace() int16 { + freeSpaceOffset := p.ReadFreeSpaceOffset() + freespace := freeSpaceOffset - (p.ReadSlotCount()*PAGE_SLOT_LENGTH + PAGE_SLOT_LENGTH + PAGE_SLOTS_START_OFFSET) + return freespace +} + +func (p *Page) WriteKeyValueInSlot(slotNumber int16, key []byte, value []byte) error { + freeSpaceOffset := p.ReadFreeSpaceOffset() + + // build a chunk + chunk := PageChunk{ + KeyLength: int16(len(key)), + KeyBytes: key, + ValueLength: int32(len(value)), + ValueBytes: value, + } + + // compute the new free space offset + freeSpaceOffset -= int16(chunk.Length()) + + // check we won't blow free space on page + slotCount := p.ReadSlotCount() + slotEndOffset := slotCount*PAGE_SLOT_LENGTH + PAGE_SLOT_LENGTH + PAGE_SLOTS_START_OFFSET + + // DEBUG!! + //fmt.Printf("freeSpaceOffset: %d, slotCount: %d, slotCount*4 + 4 + 20: %d, freeSpace: %d\n", freeSpaceOffset, slotCount, slotEndOffset, freeSpaceOffset-slotEndOffset) + + if freeSpaceOffset-slotEndOffset <= 0 { + return errors.New("page is full") + } + + keyOffset := chunk.ComputeKeyOffset(int(freeSpaceOffset)) + valueOffset := chunk.ComputeValueOffset(int(freeSpaceOffset)) + + p.WriteChunk(freeSpaceOffset, chunk) + + // update the free space offset + p.WriteFreeSpaceOffset(int16(freeSpaceOffset)) + + // make a slot + slot := PageSlot{ + KeyOffset: int16(keyOffset), + ValueOffset: int16(valueOffset), + } + // write the slot + p.WriteSlot(slotNumber, slot) + + return nil +} + +func (p *Page) WritePage(page *Page) { + // copy everything but pageNumber & pageType + offset := PAGE_SLOT_COUNT_OFFSET + copy(page.data[offset:], p.data[offset:offset+PAGE_SIZE-offset]) +} + +func (p *Page) PinCount() int { + return p.pinCount +} + +func (p *Page) ID() PageID { + return p.id +} + +func (p *Page) DecPinCount() { + if p.pinCount > 0 { + p.pinCount-- + } +} + +type PageSlotIterator struct { + page *Page + slotCount int16 + cursor int16 +} + +func NewPageSlotIterator(page *Page, fromSlot int16) *PageSlotIterator { + i := &PageSlotIterator{ + page: page, + slotCount: page.ReadSlotCount(), + cursor: fromSlot, + } + return i +} + +func (i *PageSlotIterator) Next() *PageSlot { + if i.cursor < i.slotCount { + s := i.page.ReadSlot(i.cursor) + i.cursor++ + return &s + } + return nil +} + +func (i *PageSlotIterator) Cursor() int16 { + return i.cursor +} + +func (pg *Page) Dump(label string) { + indent := 0 + if len(label) > 0 { + fmt.Printf("%s%s:\n", fmt.Sprintf("%*s", indent, ""), label) + indent += 4 + } + pageType := pg.ReadPageType() + fmt.Printf("%sPAGE(%d) pageType: %d slotCount: %d, prevPtr: %d, nextPtr: %d\n", fmt.Sprintf("%*s", indent, ""), pg.ID(), pageType, pg.ReadSlotCount(), pg.ReadPrevPointer(), pg.ReadNextPointer()) + fmt.Printf("%sKEYS: -->\n", fmt.Sprintf("%*s", indent, "")) + indent += 4 + + // get the keys off the page + keys := make([]int, 0) + pointers := make([]int, 0) + iter := NewPageSlotIterator(pg, 0) + for { + ps := iter.Next() + if ps == nil { + break + } + keys = append(keys, int(ps.KeyAsInt(pg))) + if pageType == /*nodeTypeInternal*/ 10 { + pointers = append(pointers, int(ps.ValueAsPagePointer(pg))) + } + } + + if pageType == /*nodeTypeLeaf*/ 11 { + for _, key := range keys { + fmt.Printf("%s(%d)\n", fmt.Sprintf("%*s", indent, ""), key) + } + } else { + for idx, key := range keys { + ptr := pointers[idx] + fmt.Printf("%s(%d, %d)\n", fmt.Sprintf("%*s", indent, ""), key, ptr) + } + ptr := pg.ReadNextPointer() + fmt.Printf("%s(-->, %d)\n", fmt.Sprintf("%*s", indent, ""), ptr) + } + +} diff --git a/cache_test.go b/cache_test.go index fb338fec8..8f19a44d3 100644 --- a/cache_test.go +++ b/cache_test.go @@ -6,7 +6,7 @@ import ( "reflect" "testing" - pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3" ) // Ensure cache stays constrained to its configured size. diff --git a/client/client.go b/client/client.go index b41f32616..0b313b971 100644 --- a/client/client.go +++ b/client/client.go @@ -1842,15 +1842,5 @@ func (c *Client) ApplyDataframeChangeset(indexName string, cr *pilosa.ChangesetR }) } err = eg.Wait() - - // status, body, err := c.HTTPRequest(http.MethodPost, path, buffer.Bytes(), headers) - /* - var result map[string]interface{} - err = json.Unmarshal(body, &result) - if err != nil { - return nil, errors.Wrap(err, "unmarshalling response") - } - */ - return nil, err } diff --git a/cluster.go b/cluster.go index 86bd1c9cd..6cc081e4c 100644 --- a/cluster.go +++ b/cluster.go @@ -4,6 +4,7 @@ package pilosa import ( "context" + "encoding/json" "fmt" "log" "sync" @@ -11,6 +12,7 @@ import ( "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/dax/computer" + "github.com/featurebasedb/featurebase/v3/dax/storage" "github.com/featurebasedb/featurebase/v3/disco" "github.com/featurebasedb/featurebase/v3/logger" "github.com/featurebasedb/featurebase/v3/roaring" @@ -72,8 +74,7 @@ type cluster struct { // nolint: maligned partitionAssigner string - writeLogWriter computer.WriteLogWriter - versionStore dax.VersionStore + serverlessStorage *storage.ResourceManager // isComputeNode is set to true if this node is running as a DAX compute // node. @@ -100,8 +101,6 @@ func newCluster() *cluster { disCo: disco.NopDisCo, noder: disco.NewEmptyLocalNoder(), - - writeLogWriter: computer.NewNopWriteLogWriter(), } } @@ -318,6 +317,44 @@ func (c *cluster) findFieldKeys(ctx context.Context, field *Field, keys ...strin return translations, nil } +func (c *cluster) appendFieldKeysWriteLog(ctx context.Context, qtid dax.QualifiedTableID, fieldName dax.FieldName, translations map[string]uint64) error { + // TODO move marshaling somewhere more centralized and less... explicitly json-y + msg := computer.FieldKeyMap{ + TableKey: qtid.Key(), + Field: fieldName, + StringToID: translations, + } + + b, err := json.Marshal(msg) + if err != nil { + return errors.Wrap(err, "marshalling field key map to json") + } + resource := c.serverlessStorage.GetFieldKeyResource(qtid, fieldName) + err = resource.Append(b) + if err != nil { + return errors.Wrap(err, "appending field keys") + } + return nil + +} + +func (c *cluster) appendTableKeysWriteLog(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, translations map[string]uint64) error { + msg := computer.PartitionKeyMap{ + TableKey: qtid.Key(), + Partition: partition, + StringToID: translations, + } + + b, err := json.Marshal(msg) + if err != nil { + return errors.Wrap(err, "marshalling partition key map to json") + } + + resource := c.serverlessStorage.GetTableKeyResource(qtid, partition) + return errors.Wrap(resource.Append(b), "appending table keys") + +} + func (c *cluster) createFieldKeys(ctx context.Context, field *Field, keys ...string) (map[string]uint64, error) { if idx := field.ForeignIndex(); idx != "" { // The field uses foreign index keys. @@ -350,17 +387,9 @@ func (c *cluster) createFieldKeys(ctx context.Context, field *Field, keys ...str tkey := dax.TableKey(field.Index()) qtid := tkey.QualifiedTableID() fieldName := dax.FieldName(field.Name()) - - // Get the current version for field. - version, found, err := c.versionStore.FieldVersion(ctx, qtid, fieldName) + err = c.appendFieldKeysWriteLog(ctx, qtid, fieldName, translations) if err != nil { - return nil, errors.Wrap(err, "getting field version") - } else if !found { - return nil, errors.Errorf("no version found for table(%s) field(%s)", qtid, fieldName) - } - - if err := c.writeLogWriter.CreateFieldKeys(ctx, qtid, fieldName, version, translations); err != nil { - return nil, errors.Errorf("logging field(%s/%s) keys(%v)", field.Index(), field.Name(), keys) + return nil, errors.Wrap(err, "appending to write log") } return translations, nil @@ -754,16 +783,7 @@ func (c *cluster) createIndexKeys(ctx context.Context, indexName string, keys .. tkey := dax.TableKey(idx.Name()) qtid := tkey.QualifiedTableID() partitionNum := dax.PartitionNum(partitionID) - - // Get the current version for partition. - version, found, err := c.versionStore.PartitionVersion(ctx, qtid, partitionNum) - if err != nil { - return errors.Wrap(err, "getting partition version") - } else if !found { - return errors.Errorf("no version found for table(%s) partition(%d)", qtid, partitionNum) - } - - return c.writeLogWriter.CreateTableKeys(ctx, qtid, partitionNum, version, translations) + return c.appendTableKeysWriteLog(ctx, qtid, partitionNum, translations) }) } @@ -993,9 +1013,9 @@ type TransactionMessage struct { Action string } -func intInPartitions(i int, s dax.VersionedPartitions) bool { +func intInPartitions(i int, s dax.PartitionNums) bool { for _, a := range s { - if int(a.Num) == i { + if int(a) == i { return true } } diff --git a/cmd/backup_tar.go b/cmd/backup_tar.go index b4a517a44..6c9612d77 100644 --- a/cmd/backup_tar.go +++ b/cmd/backup_tar.go @@ -2,12 +2,13 @@ package cmd import ( + "io" + "github.com/featurebasedb/featurebase/v3/ctl" - "github.com/featurebasedb/featurebase/v3/logger" "github.com/spf13/cobra" ) -func newBackupTarCommand(logdest logger.Logger) *cobra.Command { +func newBackupTarCommand(logdest io.Writer) *cobra.Command { cmd := ctl.NewBackupTarCommand(logdest) ccmd := &cobra.Command{ Use: "backuptar", diff --git a/cmd/dataframe-csv-loader.go b/cmd/dataframe-csv-loader.go new file mode 100644 index 000000000..a0f677ca5 --- /dev/null +++ b/cmd/dataframe-csv-loader.go @@ -0,0 +1,37 @@ +// Copyright 2021 Molecula Corp. All rights reserved. +package cmd + +import ( + "github.com/featurebasedb/featurebase/v3/ctl" + "github.com/featurebasedb/featurebase/v3/logger" + "github.com/spf13/cobra" +) + +// newImportCommand runs the FeatureBase import subcommand for ingesting bulk data. +func newDataframeCsvLoaderCommand(logdest logger.Logger) *cobra.Command { + cmd := ctl.NewDataframeCsvLoaderCommand(logdest) + loaderCmd := &cobra.Command{ + Use: "dataframe-csv-loader", + Short: "load dataframe integer and floating point values into featurebase", + Long: ` +`, + RunE: usageErrorWrapper(cmd), + } + flags := loaderCmd.Flags() + flags.StringVar(&cmd.Path, "csv", "", "path to csv input file") + flags.StringVar(&cmd.Host, "host", "localhost:10101", "host:port of FeatureBase.") + flags.StringVar(&cmd.Pprof, "pprof", cmd.Pprof, "host:port to listen for profiling requests at /debug/pprof and /debug/fgprof.") + flags.StringVar(&cmd.AuthToken, "auth-token", "", "Authentication token") + flags.StringVar(&cmd.Index, "index", "", "Destination Index. ") + flags.IntVar(&cmd.MaxCapacity, "buffer", 0, "Maximum size of of the line buffer defaults to go bufio default ") + ctl.SetTLSConfig( + flags, "", + &cmd.TLS.CertificatePath, + &cmd.TLS.CertificateKeyPath, + &cmd.TLS.CACertPath, + &cmd.TLS.SkipVerify, + &cmd.TLS.EnableClientVerification, + ) + + return loaderCmd +} diff --git a/cmd/root.go b/cmd/root.go index cbda03892..58528ea39 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -93,7 +93,7 @@ at https://docs.featurebase.com/. rc.AddCommand(newChkSumCommand(logdest)) rc.AddCommand(newBackupCommand(logdest)) rc.AddCommand(newRestoreCommand(logdest)) - rc.AddCommand(newBackupTarCommand(logdest)) + rc.AddCommand(newBackupTarCommand(stderr)) rc.AddCommand(newRestoreTarCommand(logdest)) rc.AddCommand(newConfigCommand(stderr)) rc.AddCommand(newExportCommand(logdest)) @@ -106,6 +106,7 @@ at https://docs.featurebase.com/. rc.AddCommand(newKeygenCommand(logdest)) rc.AddCommand(newCLICommand(logdest)) rc.AddCommand(newDAXCommand(stderr)) + rc.AddCommand(newDataframeCsvLoaderCommand(logdest)) rc.SetOutput(stderr) return rc diff --git a/cmd/server.go b/cmd/server.go index e1a9deed7..3b0d6503c 100644 --- a/cmd/server.go +++ b/cmd/server.go @@ -5,6 +5,10 @@ package cmd import ( "io" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/opentracer" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer" + + "github.com/featurebasedb/featurebase/v3/ctl" "github.com/featurebasedb/featurebase/v3/server" "github.com/featurebasedb/featurebase/v3/tracing" @@ -12,6 +16,7 @@ import ( "github.com/pkg/errors" "github.com/spf13/cobra" jaegercfg "github.com/uber/jaeger-client-go/config" + "gopkg.in/DataDog/dd-trace-go.v1/profiler" ) @@ -113,8 +118,12 @@ on the configured port.`, } defer closer.Close() tracing.GlobalTracer = opentracing.NewTracer(tracer, Server.Logger()) - } + } else if Server.Config.DataDog.EnableTracing { // Give preference to legacy support of jaeger + t := opentracer.New(tracer.WithServiceName(Server.Config.DataDog.Service)) + defer tracer.Stop() + tracing.GlobalTracer = opentracing.NewTracer(t, Server.Logger()) + } return errors.Wrap(Server.Wait(), "waiting on Server") }, } diff --git a/ctl/backup_tar.go b/ctl/backup_tar.go index 165a3ced7..043f0421f 100644 --- a/ctl/backup_tar.go +++ b/ctl/backup_tar.go @@ -21,7 +21,6 @@ import ( "github.com/featurebasedb/featurebase/v3/encoding/proto" "github.com/featurebasedb/featurebase/v3/logger" "github.com/featurebasedb/featurebase/v3/server" - "github.com/featurebasedb/featurebase/v3/vprint" "github.com/pkg/errors" ) @@ -52,7 +51,8 @@ type BackupTarCommand struct { // nolint: maligned client *pilosa.InternalClient // Standard input/output - logDest logger.Logger + logwriter io.Writer + logDest logger.Logger TLS server.TLSConfig @@ -65,9 +65,10 @@ func (cmd *BackupTarCommand) Logger() logger.Logger { } // NewBackupTarCommand returns a new instance of BackupCommand. -func NewBackupTarCommand(logdest logger.Logger) *BackupTarCommand { +func NewBackupTarCommand(logwriter io.Writer) *BackupTarCommand { return &BackupTarCommand{ - logDest: logdest, + logwriter: logwriter, + logDest: logger.NewStandardLogger(logwriter), RetryPeriod: time.Minute, HeaderTimeout: time.Second * 3, Pprof: "localhost:0", @@ -76,18 +77,24 @@ func NewBackupTarCommand(logdest logger.Logger) *BackupTarCommand { // Run executes the main program execution. func (cmd *BackupTarCommand) Run(ctx context.Context) (err error) { - logger := cmd.Logger() - close, err := startProfilingServer(cmd.Pprof, logger) - if err != nil { - return errors.Wrap(err, "starting profiling server") - } - defer close() - + logdest := cmd.Logger() // Validate arguments. if cmd.OutputPath == "" { return fmt.Errorf("%w: -o flag required", UsageError) } useStdout := cmd.OutputPath == "-" + if useStdout && cmd.logwriter == os.Stdout { + logdest = logger.NewStandardLogger(os.Stderr) + } + + // This was the very first thing in the function, but since logging to stdout causes file corruption + // if the tarfile is also going to stdout, we need to check that before we can safely send anything + // to the logger. + close, err := startProfilingServer(cmd.Pprof, logdest) + if err != nil { + return errors.Wrap(err, "starting profiling server") + } + defer close() if cmd.HeaderTimeoutStr != "" { if dur, err := time.ParseDuration(cmd.HeaderTimeoutStr); err != nil { @@ -137,6 +144,13 @@ func (cmd *BackupTarCommand) Run(ctx context.Context) (err error) { var w io.Writer if useStdout { w = os.Stdout + // if writing tarfile to stdout, the logs can't also go there or the file ends up corrupt + // redirect to stderr and log a message there to avoid this + // commented out for testing + //if dest := logger.Logger(); dest.Writer() == os.Stdout { + // dest.SetOutput(os.Stderr) + // logger.Printf("redirected logs to stderr to avoid file corruption") + //} } else { f, err := os.Create(cmd.OutputPath + ".tmp") if err != nil { @@ -171,7 +185,7 @@ func (cmd *BackupTarCommand) Run(ctx context.Context) (err error) { // Move data file to final location. if !useStdout { - logger.Printf("writing backup: %s", cmd.OutputPath) + logdest.Printf("writing backup: %s", cmd.OutputPath) if err := os.Rename(cmd.OutputPath+".tmp", cmd.OutputPath); err != nil { return err } @@ -359,7 +373,7 @@ func (cmd *BackupTarCommand) backupTarShardDataframe(ctx context.Context, tw *ta } filename := filepath.Join("indexes", indexName, "dataframe", fmt.Sprintf("%04d", shard)) - vprint.VV("wrting %v", filename) + logger.Printf("writing %v", filename) var buf bytes.Buffer if _, err := buf.ReadFrom(resp.Body); err != nil { return fmt.Errorf("copying shard data to memory: %w", err) diff --git a/ctl/backup_tar_test.go b/ctl/backup_tar_test.go index 355bb36b8..db8e88032 100644 --- a/ctl/backup_tar_test.go +++ b/ctl/backup_tar_test.go @@ -2,13 +2,11 @@ package ctl import ( "context" - "io" - "net/http" + "os" "path/filepath" - "strings" "testing" - "github.com/featurebasedb/featurebase/v3/logger" + pilosa "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/test" ) @@ -16,27 +14,54 @@ func TestBackupTarCommand_Run(t *testing.T) { cluster := test.MustRunCluster(t, 1) defer cluster.Close() cmd := cluster.GetNode(0) + indexName := "backuptar" - cmLog := logger.NewStandardLogger(io.Discard) - cm := NewBackupTarCommand(cmLog) + // this might produce some annoying spam in tests but we need to make sure log messages to + // stdout are being redirected properly when the tarfile is also going to stdout + cm := NewBackupTarCommand(os.Stdout) hostport := cmd.API.Node().URI.HostPort() cm.Host = hostport dir := t.TempDir() cm.OutputPath = filepath.Join(dir, "backuptest.tar") - resp, err := http.DefaultClient.Do(test.MustNewHTTPRequest("POST", "http://"+hostport+"/index/i", strings.NewReader(""))) + _, err := cmd.API.CreateIndex(context.Background(), indexName, pilosa.IndexOptions{Keys: true, TrackExistence: true}) if err != nil { - t.Fatalf("making http request: %v", err) + t.Fatalf("creating test index: %v", err) } - resp.Body.Close() - resp, err = http.DefaultClient.Do(test.MustNewHTTPRequest("POST", "http://"+hostport+"/index/i/field/f", strings.NewReader(""))) + _, err = cmd.API.CreateField(context.Background(), indexName, "f", pilosa.OptFieldKeys()) if err != nil { - t.Fatalf("making http request: %v", err) + t.Fatalf("creating test field: %v", err) } - resp.Body.Close() - cm.Index = "i" + cm.Index = indexName if err := cm.Run(context.Background()); err != nil { t.Fatalf("BackupTarCommand Run error: %s", err) } + + oldpath := cm.OutputPath + cm.OutputPath = "-" + cfpath := filepath.Join(dir, "stdouttest.tar") //capture file + cf, err := os.Create(cfpath) + if err != nil { + t.Fatalf("opening file to compare file and stdout outputs: %v", err) + } + defer cf.Close() + // I don't like this at all but it's all i'm really finding for capturing os.Stdout + old := os.Stdout + defer func() { os.Stdout = old }() + os.Stdout = cf + if err := cm.Run(context.Background()); err != nil { + t.Fatalf("BackupTarCommand Run error: %s", err) + } + fdata, err := os.ReadFile(oldpath) + if err != nil { + t.Fatalf("unable to read from direct-to-file backup: %v", err) + } + cdata, err := os.ReadFile(cfpath) + if err != nil { + t.Fatalf("unable to read from captured stdout backup: %v", err) + } + if len(fdata) != len(cdata) { + t.Fatalf("backing up to file and to stdout produced different length results") + } } diff --git a/ctl/cli.go b/ctl/cli.go index 248983370..77bafbd11 100644 --- a/ctl/cli.go +++ b/ctl/cli.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/chzyer/readline" "github.com/jedib0t/go-pretty/table" @@ -23,6 +24,7 @@ import ( ) const ( + defaultHost string = "localhost" promptBegin string = "fbsql> " promptMid string = " -> " terminationChar string = ";" @@ -71,7 +73,7 @@ func NewCLICommand(logdest logger.Logger) *CLICommand { } } return &CLICommand{ - Host: "localhost", + Host: defaultHost, HistoryPath: historyPath, OrganizationID: "", @@ -118,7 +120,7 @@ func (cmd *CLICommand) setupClient() error { case featurebaseTypeCloud: fmt.Println("Detected cloud deployment") cmd.queryer = &fbcloud.Queryer{ - Host: cmd.Host, + Host: hostPort(cmd.Host, cmd.Port), ClientID: cmd.ClientID, Region: cmd.Region, @@ -174,6 +176,16 @@ func (cmd *CLICommand) detectFBType() (featurebaseType, error) { typ: featurebaseTypeStandard, }, ) + } else if strings.HasPrefix(cmd.Host, "https") { + // https suggesting we might be connecting to a cloud host + trials = append(trials, + // cloud + trial{ + port: "", + health: "health", + typ: featurebaseTypeCloud, + }, + ) } else { // Try default ports just in case. trials = append(trials, @@ -192,9 +204,12 @@ func (cmd *CLICommand) detectFBType() (featurebaseType, error) { ) } + client := http.Client{ + Timeout: 100 * time.Millisecond, + } for _, trial := range trials { url := hostPort(cmd.Host, trial.port) + trial.health - if resp, err := http.Get(url); err != nil { + if resp, err := client.Get(url); err != nil { continue } else if resp.StatusCode/100 == 2 { cmd.Port = trial.port diff --git a/ctl/dataframe-csv-loader.go b/ctl/dataframe-csv-loader.go new file mode 100644 index 000000000..10ccc9785 --- /dev/null +++ b/ctl/dataframe-csv-loader.go @@ -0,0 +1,391 @@ +// Copyright 2021 Molecula Corp. All rights reserved. +package ctl + +import ( + "bufio" + "context" + "crypto/tls" + "encoding/gob" + "fmt" + "io" + "math" + "net/http" + "os" + "runtime" + "strconv" + "strings" + + "github.com/apache/arrow/go/v10/arrow" + pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3/client" + "github.com/featurebasedb/featurebase/v3/idk" + "github.com/featurebasedb/featurebase/v3/logger" + "github.com/pkg/errors" +) + +var ( + mask = uint64(pilosa.ShardWidth - 1) + Sentinal = uint64(math.MaxUint64) +) + +func init() { + gob.Register(arrow.PrimitiveTypes.Int64) + gob.Register(arrow.PrimitiveTypes.Float64) +} + +// TODO(rdp): add refresh token to this as well + +// NewDataframeCsvLoaderCommand +type DataframeCsvLoaderCommand struct { + tlsConfig *tls.Config + + Host string + + Index string + + // Filepath to the csv file + Path string + + // max line length of csv file + MaxCapacity int + + // Host:port on which to listen for pprof. + Pprof string `json:"pprof"` + + TLS idk.TLSConfig + AuthToken string `flag:"auth-token" help:"Authentication Token for FeatureBase"` + SchemaManager idk.SchemaManager `flag:"-"` + + // Reusable client. + client *client.Client + index *client.Index + needTranslation bool + + // Standard input/output + log logger.Logger +} + +// Logger returns the command's associated Logger to maintain CommandWithTLSSupport interface compatibility +func (cmd *DataframeCsvLoaderCommand) Logger() logger.Logger { + return cmd.log +} + +// NewDataframeCsvLoaderCommand returns a new instance of DataframeCsvLoaderCommand. +func NewDataframeCsvLoaderCommand(logdest logger.Logger) *DataframeCsvLoaderCommand { + return &DataframeCsvLoaderCommand{ + log: logdest, + Pprof: "localhost:0", + } +} + +func (cmd *DataframeCsvLoaderCommand) setupClient() (*tls.Config, error) { + var tlsConfig *tls.Config + var err error + opts := []client.ClientOption{} + if cmd.TLS.CertificatePath != "" { + tlsConfig, err = idk.GetTLSConfig(&cmd.TLS, cmd.log) + if err != nil { + return nil, errors.Wrap(err, "getting TLS config") + } + opts = append(opts, client.OptClientTLSConfig(tlsConfig)) + } else { + opts = append(opts, + client.OptClientRetries(2), + client.OptClientTotalPoolSize(1000), + client.OptClientPoolSizePerRoute(400), + ) + } + cmd.client, err = client.NewClient([]string{cmd.Host}, opts...) + if err != nil { + return nil, err + } + cmd.client.AuthToken = cmd.AuthToken + if err != nil { + return nil, errors.Wrap(err, "getting featurebase client") + } + return tlsConfig, nil +} + +func (cmd *DataframeCsvLoaderCommand) Setup() (err error) { + // setup logging + cmd.log = logger.NewStandardLogger(os.Stderr) + if cmd.Pprof != "" { + go func() { + runtime.SetBlockProfileRate(10000000) // 1 sample per 10 ms + runtime.SetMutexProfileFraction(100) // 1% sampling + cmd.log.Printf("Listening for /debug/pprof/ and /debug/fgprof on '%s'", cmd.Pprof) + cmd.log.Printf("%v", http.ListenAndServe(cmd.Pprof, nil)) + }() + } + // set up Pilosa client + _, err = cmd.setupClient() + if err != nil { + return errors.Wrap(err, "setting up client") + } + + if cmd.AuthToken != "" { + cmd.AuthToken = "Bearer " + cmd.AuthToken // Gets added to context + } + + return nil +} + +// Run executes the dataload. +func (cmd *DataframeCsvLoaderCommand) Run(ctx context.Context) (err error) { + err = cmd.Setup() + if err != nil { + return err + } + logger := cmd.Logger() + close, err := startProfilingServer(cmd.Pprof, logger) + if err != nil { + return errors.Wrap(err, "starting profiling server") + } + defer close() + + // Validate arguments. + if cmd.Path == "" { + return fmt.Errorf("%w: --csv flag required", UsageError) + } + + readFile, err := os.Open(cmd.Path) + if err != nil { + return err + } + fields := make([]arrow.Field, 0) + fields = append(fields, arrow.Field{Name: "_ID", Type: arrow.PrimitiveTypes.Int64}) + fileScanner := bufio.NewScanner(readFile) + fileScanner.Split(bufio.ScanLines) + // need for really long csv lines + var buf []byte + if cmd.MaxCapacity > 0 { + buf = make([]byte, cmd.MaxCapacity) + fileScanner.Buffer(buf, cmd.MaxCapacity) + } + total := 0 + if fileScanner.Scan() { + total++ + t := fileScanner.Text() + p := strings.Split(t, ",") + for _, col := range p[1:] { + col = strings.TrimSpace(col) + cmd.Logger().Infof("checking %v", col) + name := col[:strings.LastIndex(col, "__")] + cmd.Logger().Infof("name:%v", name) + if strings.HasSuffix(col, "__I") { + fields = append(fields, arrow.Field{Name: name, Type: arrow.PrimitiveTypes.Int64}) + } else if strings.HasSuffix(col, "__F") { + fields = append(fields, arrow.Field{Name: name, Type: arrow.PrimitiveTypes.Float64}) + } else { + return errors.New("invalid format for type") + } + } + + } else { + return errors.Wrap(fileScanner.Err(), "No header") + } + schema, err := cmd.client.Schema() + if err != nil { + return err + } + idx := schema.Index(cmd.Index) + if idx.Opts().Keys() { + cmd.needTranslation = true + cmd.index = idx + } + + arrowSchema := arrow.NewSchema(fields, nil) + keys := make([]string, 0) + lookup := make(map[string]uint64) + if cmd.needTranslation { + + for fileScanner.Scan() { + t := fileScanner.Text() + r := t[:strings.Index(t, ",")] + _, ok := lookup[r] + if !ok { + keys = append(keys, r) + lookup[r] = Sentinal + } + + } + cmd.Logger().Infof("Translate Keys %d", total) + ids, err := cmd.client.CreateIndexKeys(cmd.index, keys...) + if err != nil { + return err + } + lookup = ids + } + sharder := &Sharder{ + shards: make(map[uint64]*ShardDiff), + schema: arrowSchema, + index: cmd.Index, + log: cmd.log, + } + readFile.Seek(0, io.SeekStart) + fileScanner = bufio.NewScanner(readFile) + if cmd.MaxCapacity > 0 { + fileScanner.Buffer(buf, cmd.MaxCapacity) + } + fileScanner.Split(bufio.ScanLines) + fileScanner.Scan() // skip the header + cmd.Logger().Infof("Build the dataframe input package in memory") + id := uint64(0) + for fileScanner.Scan() { + records := strings.Split(fileScanner.Text(), ",") + if cmd.needTranslation { + id = lookup[records[0]] + } else { + id, err = strconv.ParseUint(records[0], 10, 64) + if err != nil { + return err + } + } + shard := id / pilosa.ShardWidth + shardFile, err := sharder.GetShard(shard) + if err != nil { + return err + } + shardRow := int64(id & mask) + shardFile.SetRow(shardRow) + for i, rec := range records { + if i == 0 { + shardFile.SetIntValue(i, shardRow, int64(id)) + } else { + rec = strings.TrimSpace(rec) + switch arrowSchema.Field(i).Type { + case arrow.PrimitiveTypes.Int64: + val, err := strconv.ParseInt(rec, 10, 64) + if err != nil { + shardFile.SetIntValue(i, shardRow, 0) + continue + } + shardFile.SetIntValue(i, shardRow, val) + case arrow.PrimitiveTypes.Float64: + val, err := strconv.ParseFloat(rec, 64) + if err != nil { + shardFile.SetFloatValue(i, shardRow, 0) + continue + } + shardFile.SetFloatValue(i, shardRow, val) + default: + return errors.New("unhandled arrow type type") + } + } + } + } + cmd.Logger().Infof("sending package to featurebase") + err = sharder.Store(arrowSchema, cmd.client) + if err != nil { + return err + } + return err +} + +type pair struct { + col int + row uint64 +} + +type ShardDiff struct { + columns []interface{} + rows []int64 + null map[pair]struct{} + shard uint64 + // Standard input/output + log logger.Logger +} + +func NewShardDiff(shard uint64, log logger.Logger) (*ShardDiff, error) { + return &ShardDiff{shard: shard, log: log}, nil +} + +type Number interface { + int64 | float64 +} + +func (s *ShardDiff) SetIntValue(col int, row int64, val int64) { + slice := s.columns[col].([]int64) + s.columns[col] = append(slice, val) +} + +func (s *ShardDiff) SetFloatValue(col int, row int64, val float64) { + slice := s.columns[col].([]float64) + s.columns[col] = append(slice, val) +} + +func (s *ShardDiff) SetNulll(col int, row uint64) { + s.null[pair{col: col, row: row}] = struct{}{} +} + +func (s *ShardDiff) SetRow(row int64) { + s.rows = append(s.rows, row) +} + +func (s *ShardDiff) Setup(schema *arrow.Schema) { + for _, f := range schema.Fields() { + switch f.Type { + case arrow.PrimitiveTypes.Int64: + s.columns = append(s.columns, make([]int64, 0)) + case arrow.PrimitiveTypes.Float64: + s.columns = append(s.columns, make([]float64, 0)) + } + } +} + +func makeSimpleSchema(a *arrow.Schema) []pilosa.NameType { + nt := make([]pilosa.NameType, len(a.Fields())) + + for i := 0; i < len(a.Fields()); i++ { + f := a.Field(i) + nt[i] = pilosa.NameType{Name: f.Name, DataType: f.Type} + } + return nt +} + +func (s *ShardDiff) Store(index string, schema *arrow.Schema, fb *client.Client) error { + s.log.Infof("dataframe for shard %v:%v:", index, s.shard) + request := &pilosa.ChangesetRequest{} + request.Columns = s.columns + request.ShardIds = s.rows + request.SimpleSchema = makeSimpleSchema(schema) + _, err := fb.ApplyDataframeChangeset(index, request, s.shard) + return err +} + +func (s *ShardDiff) IsValid(col int, row int) bool { + _, ok := s.null[pair{col: col, row: uint64(row)}] + return !ok +} + +type Sharder struct { + shards map[uint64]*ShardDiff + index string + schema *arrow.Schema + // Standard input/output + log logger.Logger +} + +func (s *Sharder) GetShard(shard uint64) (*ShardDiff, error) { + f, ok := s.shards[shard] + if ok { + return f, nil + } + f, err := NewShardDiff(shard, s.log) + f.Setup(s.schema) + if err != nil { + return nil, err + } + s.shards[shard] = f + return f, nil +} + +func (s *Sharder) Store(schema *arrow.Schema, client *client.Client) error { + for _, f := range s.shards { + err := f.Store(s.index, schema, client) + if err != nil { + return err + } + } + return nil +} diff --git a/ctl/dataframe-csv-loader_test.go b/ctl/dataframe-csv-loader_test.go new file mode 100644 index 000000000..8280fc026 --- /dev/null +++ b/ctl/dataframe-csv-loader_test.go @@ -0,0 +1,74 @@ +// Copyright 2021 Molecula Corp. All rights reserved. +package ctl + +import ( + "context" + "io" + "testing" + + pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3/logger" + "github.com/featurebasedb/featurebase/v3/server" + "github.com/featurebasedb/featurebase/v3/test" + "github.com/featurebasedb/featurebase/v3/testhook" +) + +func TestDataframeCsvLoaderCommand(t *testing.T) { + cluster := test.MustRunCluster(t, 1, []server.CommandOption{server.OptCommandServerOptions(pilosa.OptServerIsDataframeEnabled(true))}) + defer cluster.Close() + cmd := cluster.GetNode(0) + t.Run("basic", func(t *testing.T) { + cmLog := logger.NewStandardLogger(io.Discard) + cm := NewDataframeCsvLoaderCommand(cmLog) + file, err := testhook.TempFile(t, "import.csv") + if err != nil { + t.Fatalf("creating tempfile: %v", err) + } + _, err = file.Write([]byte("id,val__I\n1,2\n3,4\n5,6")) + if err != nil { + t.Fatalf("writing to tempfile: %v", err) + } + ctx := context.Background() + if err != nil { + t.Fatal(err) + } + index := "non-keyed" + cmd.API.CreateIndex(ctx, index, pilosa.IndexOptions{Keys: false}) + + cm.Host = cmd.API.Node().URI.HostPort() + cm.Path = file.Name() + cm.Index = index + + err = cm.Run(ctx) + if err != nil { + t.Fatalf("DataframeCsvLoader Run doesn't work: %s", err) + } + }) + t.Run("keyed", func(t *testing.T) { + cmLog := logger.NewStandardLogger(io.Discard) + cm := NewDataframeCsvLoaderCommand(cmLog) + file, err := testhook.TempFile(t, "import_key.csv") + if err != nil { + t.Fatalf("creating tempfile: %v", err) + } + _, err = file.Write([]byte("id,val__I\nA,2\nB,4\nC,6")) + if err != nil { + t.Fatalf("writing to tempfile: %v", err) + } + ctx := context.Background() + if err != nil { + t.Fatal(err) + } + index := "keyed" + cmd.API.CreateIndex(ctx, index, pilosa.IndexOptions{Keys: true}) + + cm.Host = cmd.API.Node().URI.HostPort() + cm.Path = file.Name() + cm.Index = index + + err = cm.Run(ctx) + if err != nil { + t.Fatalf("DataframeCsvLoader Run doesn't work: %s", err) + } + }) +} diff --git a/ctl/dax.go b/ctl/dax.go index 45836d3e4..61759916a 100644 --- a/ctl/dax.go +++ b/ctl/dax.go @@ -18,6 +18,7 @@ func BuildDAXFlags(cmd *cobra.Command, srv *server.Command) { flags.BoolVar(&srv.Config.MDS.Run, "mds.run", srv.Config.MDS.Run, "Run the MDS service in process.") flags.DurationVar(&srv.Config.MDS.Config.RegistrationBatchTimeout, "mds.config.registration-batch-timeout", srv.Config.MDS.Config.RegistrationBatchTimeout, "Timeout for node registration batches.") flags.StringVar(&srv.Config.MDS.Config.DataDir, "mds.config.data-dir", srv.Config.MDS.Config.DataDir, "MDS directory to use in process.") + flags.DurationVar(&srv.Config.MDS.Config.SnappingTurtleTimeout, "mds.config.snapping-turtle-timeout", srv.Config.MDS.Config.SnappingTurtleTimeout, "Period for running automatic snapshotting routine.") // WriteLogger flags.BoolVar(&srv.Config.WriteLogger.Run, "writelogger.run", srv.Config.WriteLogger.Run, "Run the WriteLogger service in process.") diff --git a/ctl/server.go b/ctl/server.go index f4f3be907..f1a17c8e3 100644 --- a/ctl/server.go +++ b/ctl/server.go @@ -133,10 +133,13 @@ func serverFlagSet(srv *server.Config, prefix string) *pflag.FlagSet { flags.StringSliceVar(&srv.Auth.ConfiguredIPs, pre("auth.configured-ips"), srv.Auth.ConfiguredIPs, "List of configured IPs allowed for ingest") flags.BoolVar(&srv.DataDog.Enable, pre("datadog.enable"), false, "enable continuous profiling with DataDog cloud service, Note you must have DataDog agent installed") + flags.BoolVar(&srv.DataDog.EnableTracing, pre("datadog.enable-tracing"), false, "Enable continuous tracing with DataDog cloud service, this flag is mutually exclusive to tracing.* parameters") + flags.StringVar(&srv.DataDog.Service, pre("datadog.service"), "default-service", "The Datadog service name, for example my-web-app") flags.StringVar(&srv.DataDog.Env, pre("datadog.env"), "default-env", "The Datadog environment name, for example, production") flags.StringVar(&srv.DataDog.Version, pre("datadog.version"), "default-version", "The version of your application") flags.StringVar(&srv.DataDog.Tags, pre("datadog.tags"), "molecula", "The tags to apply to an uploaded profile. Must be a list of in the format :,:") + flags.BoolVar(&srv.DataDog.CPUProfile, pre("datadog.cpu-profile"), true, "golang pprof cpu profile ") flags.BoolVar(&srv.DataDog.HeapProfile, pre("datadog.heap-profile"), true, "golang pprof heap profile") flags.BoolVar(&srv.DataDog.MutexProfile, pre("datadog.mutex-profile"), false, "golang pprof mutex profile") @@ -144,6 +147,7 @@ func serverFlagSet(srv *server.Config, prefix string) *pflag.FlagSet { flags.BoolVar(&srv.DataDog.BlockProfile, pre("datadog.block-profile"), false, "golang pprof goroutine ") flags.BoolVar(&srv.Dataframe.Enable, pre("dataframe.enable"), false, "EXPERIMENTAL enable support for Apply and Arrow") + flags.BoolVar(&srv.Dataframe.UseParquet, pre("dataframe.use-parquet"), false, "EXPERIMENTAL use parquet for file format") return flags } diff --git a/dax/Makefile b/dax/Makefile index ca0cfb899..b89fe499f 100644 --- a/dax/Makefile +++ b/dax/Makefile @@ -78,11 +78,14 @@ dc-logs-%: dc-prereqs: mkdir -p ../.quick +dc-cli: + featurebase cli --host localhost --port 8080 --org-id=testorg --db-id=testdb + # This is just an example. For it to work, you'll first need to: # featurebase cli --host localhost --port 8080 --org-id=testorg --db-id=testdb # create table keysidstbl2 (_id string, slice idset); dc-datagen: - docker-compose run datagen --end-at=500 --pilosa.batch-size=500 --featurebase.table-name=keysidstbl2 + docker-compose run datagen --end-at=500 --pilosa.batch-size=500 --featurebase.table-name=keysidstbl2 dc-exec-%: docker-compose exec $* /bin/sh diff --git a/dax/boltdb/versionstore.go b/dax/boltdb/versionstore.go deleted file mode 100644 index 91075823e..000000000 --- a/dax/boltdb/versionstore.go +++ /dev/null @@ -1,768 +0,0 @@ -package boltdb - -import ( - "bytes" - "context" - "encoding/binary" - "fmt" - "strconv" - "strings" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/inmem" - "github.com/featurebasedb/featurebase/v3/errors" - "github.com/featurebasedb/featurebase/v3/logger" -) - -var ( - bucketTables = Bucket("versionStoreTables") - bucketShards = Bucket("versionStoreShards") - bucketTableKeys = Bucket("versionStoreTableKeys") - bucketFieldKeys = Bucket("versionStoreFieldKeys") -) - -// VersionStoreBuckets defines the buckets used by this package. It can be -// called during setup to create the buckets ahead of time. -var VersionStoreBuckets []Bucket = []Bucket{ - bucketTables, - bucketShards, - bucketTableKeys, - bucketFieldKeys, -} - -// Ensure type implements interface. -var _ dax.VersionStore = (*VersionStore)(nil) - -// VersionStore manages all version info for shard, table keys, and field keys. -type VersionStore struct { - db *DB - - logger logger.Logger -} - -// NewVersionStore returns a new instance of VersionStore with default values. -func NewVersionStore(db *DB, logger logger.Logger) *VersionStore { - return &VersionStore{ - db: db, - logger: logger, - } -} - -func (s *VersionStore) AddTable(ctx context.Context, qtid dax.QualifiedTableID) error { - tx, err := s.db.BeginTx(ctx, true) - if err != nil { - return errors.Wrap(err, "getting transaction") - } - defer tx.Rollback() - - bkt := tx.Bucket(bucketTables) - if bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketTables) - } - - if val := bkt.Get(tableKey(qtid)); val != nil { - return dax.NewErrTableIDExists(qtid) - } - - // The assumption is that we may store information about the table (other - // than just the fact that it exists). So for now, the value is an empty - // JSON object. - val := []byte("{}") - - if err := bkt.Put(tableKey(qtid), val); err != nil { - return errors.Wrap(err, "putting table") - } - - // Add the table to the "table index" of the other buckets. - // - // Shards - if bkt := tx.Bucket(bucketShards); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketShards) - } else if err := bkt.Put(tableKey(qtid), val); err != nil { - return errors.Wrap(err, "putting table into shards") - } - - // TableKeys. - if bkt := tx.Bucket(bucketTableKeys); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketTableKeys) - } else if err := bkt.Put(tableKey(qtid), val); err != nil { - return errors.Wrap(err, "putting table into table keys") - } - - // FieldKeys. - if bkt := tx.Bucket(bucketFieldKeys); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketFieldKeys) - } else if err := bkt.Put(tableKey(qtid), val); err != nil { - return errors.Wrap(err, "putting table into field keys") - } - - return tx.Commit() -} - -func (s *VersionStore) RemoveTable(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedShards, dax.VersionedPartitions, error) { - tx, err := s.db.BeginTx(ctx, true) - if err != nil { - return nil, nil, err - } - defer tx.Rollback() - - // Get the shards and partitions before deleting by table. - shards, err := s.getShards(ctx, tx, qtid) - if err != nil { - return nil, nil, err - } - - partitions, err := s.getPartitions(ctx, tx, qtid) - if err != nil { - return nil, nil, err - } - - if err := removeTable(ctx, tx, qtid); err != nil { - return nil, nil, err - } - - if err := tx.Commit(); err != nil { - return nil, nil, err - } - - return shards, partitions, nil -} - -func removeTable(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID) error { - // Tables. - if bkt := tx.Bucket(bucketTables); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketTables) - } else if err := bkt.Delete(tableKey(qtid)); err != nil { - return errors.Wrap(err, "deleting table") - } - - // Shards. - if bkt := tx.Bucket(bucketShards); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketShards) - } else if err := bkt.Delete(tableKey(qtid)); err != nil { - return errors.Wrap(err, "deleting table in shards") - } else if err := deleteByPrefix(tx, bucketShards, []byte(fmt.Sprintf(prefixFmtShards, qtid.OrganizationID, qtid.DatabaseID, qtid.ID))); err != nil { - return errors.Wrap(err, "deleting shards for table") - } - - // TableKeys. - if bkt := tx.Bucket(bucketTableKeys); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketTableKeys) - } else if err := bkt.Delete(tableKey(qtid)); err != nil { - return errors.Wrap(err, "deleting table in table keys") - } else if err := deleteByPrefix(tx, bucketTableKeys, []byte(fmt.Sprintf(prefixFmtTableKeys, qtid.OrganizationID, qtid.DatabaseID, qtid.ID))); err != nil { - return errors.Wrap(err, "deleting table keys for table") - } - - // FieldKeys. - if bkt := tx.Bucket(bucketFieldKeys); bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketFieldKeys) - } else if err := bkt.Delete(tableKey(qtid)); err != nil { - return errors.Wrap(err, "deleting table in field keys") - } else if err := deleteByPrefix(tx, bucketFieldKeys, []byte(fmt.Sprintf(prefixFmtFieldKeys, qtid.OrganizationID, qtid.DatabaseID, qtid.ID))); err != nil { - return errors.Wrap(err, "deleting field keys for table") - } - - return nil -} - -func deleteByPrefix(tx *Tx, bucket Bucket, prefix []byte) error { - bkt := tx.Bucket(bucket) - cursor := bkt.Cursor() - - // Deleting keys within the for loop seems to cause Next() to skip the next - // matching key because the Delete() call pops the item and effectively - // moves the cursor forward. Then calling Next() skips the item that was - // being pointed to after the delete. So, we're going to make a list of keys - // to delete, and then delete them outside of the cursor logic. - var keysToDelete [][]byte - for k, _ := cursor.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, _ = cursor.Next() { - keysToDelete = append(keysToDelete, k) - } - - for _, k := range keysToDelete { - if err := bkt.Delete(k); err != nil { - return errors.Wrapf(err, "deleting key: %s", k) - } - } - - return nil -} - -func (s *VersionStore) AddShards(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.VersionedShard) error { - tx, err := s.db.BeginTx(ctx, true) - if err != nil { - return errors.Wrap(err, "getting transaction") - } - defer tx.Rollback() - - for _, shard := range shards { - if err := createShard(ctx, tx, qtid, shard); err != nil { - return errors.Wrap(err, "creating shard") - } - } - - return tx.Commit() -} - -func createShard(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID, shard dax.VersionedShard) error { - // TODO: validate data more formally - if shard.Version < 0 { - return errors.New(errors.ErrUncoded, fmt.Sprintf("invalid shard version: %d", shard.Version)) - } - - bkt := tx.Bucket(bucketShards) - if bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketShards) - } - - // Ensure the table exists. - if val := bkt.Get(tableKey(qtid)); val == nil { - return dax.NewErrTableIDDoesNotExist(qtid) - } - - vsn := make([]byte, 8) - binary.LittleEndian.PutUint64(vsn, uint64(shard.Version)) - - return bkt.Put(shardKey(qtid, shard.Num), vsn) -} - -func (s *VersionStore) Shards(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedShards, bool, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, false, errors.Wrap(err, "getting tx") - } - defer tx.Rollback() - - shards, err := s.getShards(ctx, tx, qtid) - if err != nil { - return nil, false, errors.Wrap(err, "getting shards") - } - - return shards, true, nil -} - -func (s *VersionStore) getShards(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID) (dax.VersionedShards, error) { - c := tx.Bucket(bucketShards).Cursor() - - // Deserialize rows into Shard objects. - shards := make(dax.VersionedShards, 0) - - prefix := []byte(fmt.Sprintf(prefixFmtShards, qtid.OrganizationID, qtid.DatabaseID, qtid.ID)) - for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { - if v == nil { - s.logger.Printf("nil value for key: %s", k) - continue - } - - var shard dax.VersionedShard - - shardNum, err := keyShardNum(k) - if err != nil { - return nil, errors.Wrapf(err, "getting shardNum from key: %v", k) - } - - shard.Num = shardNum - shard.Version = int(binary.LittleEndian.Uint64(v)) - - shards = append(shards, shard) - } - - return shards, nil -} - -// ShardVersion return the current version for the given table/shardNum. -// If a version is not being tracked, it returns a bool value of false. -func (s *VersionStore) ShardVersion(ctx context.Context, qtid dax.QualifiedTableID, shardNum dax.ShardNum) (int, bool, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return -1, false, err - } - defer tx.Rollback() - - return getShardVersion(ctx, tx, qtid, shardNum) -} - -func getShardVersion(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID, shardNum dax.ShardNum) (int, bool, error) { - version := -1 - - bkt := tx.Bucket(bucketShards) - if bkt == nil { - return version, false, errors.Errorf(ErrFmtBucketNotFound, bucketShards) - } - - b := bkt.Get(shardKey(qtid, shardNum)) - if b == nil { - return version, false, nil - } - version = int(binary.LittleEndian.Uint64(b)) - - return version, true, nil -} - -func (s *VersionStore) ShardTables(ctx context.Context, qual dax.TableQualifier) (dax.TableIDs, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, errors.Wrap(err, "beginning tx") - } - defer tx.Rollback() - - return s.getTableIDs(ctx, tx, qual, bucketShards) -} - -func (s *VersionStore) getTableIDs(ctx context.Context, tx *Tx, qual dax.TableQualifier, bucket Bucket) (dax.TableIDs, error) { - c := tx.Bucket(bucket).Cursor() - - // Deserialize rows into Tables objects. - tableIDs := make(dax.TableIDs, 0) - - prefix := []byte(fmt.Sprintf(prefixFmtTables, qual.OrganizationID, qual.DatabaseID)) - for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { - if v == nil { - s.logger.Printf("nil value for key: %s", k) - continue - } - - var tableID dax.TableID - - tableID, err := keyTableID(k) - if err != nil { - return nil, errors.Wrapf(err, "getting table name from key: %v", k) - } - - tableIDs = append(tableIDs, tableID) - } - - return tableIDs, nil -} - -func (s *VersionStore) bucketTables(ctx context.Context, bucket Bucket) ([]dax.QualifiedTableID, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, errors.Wrap(err, "beginning tx") - } - defer tx.Rollback() - - c := tx.Bucket(bucket).Cursor() - - // Deserialize rows into Tables objects. - qtids := make([]dax.QualifiedTableID, 0) - - prefix := []byte(prefixTables) - for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { - if v == nil { - s.logger.Printf("nil value for key: %s", k) - continue - } - - qtid, err := keyQualifiedTableID(k) - if err != nil { - return nil, errors.Wrapf(err, "getting qualified table id from key: %v", k) - } - - qtids = append(qtids, qtid) - } - - return qtids, nil -} - -// AddPartitions adds new partitions to be managed by VersionStore. It returns -// the number of partitions added or an error. -func (s *VersionStore) AddPartitions(ctx context.Context, qtid dax.QualifiedTableID, partitions ...dax.VersionedPartition) error { - tx, err := s.db.BeginTx(ctx, true) - if err != nil { - return errors.Wrap(err, "getting transaction") - } - defer tx.Rollback() - - for _, partition := range partitions { - if err := createPartition(ctx, tx, qtid, partition); err != nil { - return errors.Wrap(err, "creating partition") - } - } - - return tx.Commit() -} - -func createPartition(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID, partition dax.VersionedPartition) error { - // TODO: validate data more formally - if partition.Version < 0 { - return errors.New(errors.ErrUncoded, fmt.Sprintf("invalid partition version: %d", partition.Version)) - } - - bkt := tx.Bucket(bucketTableKeys) - if bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketTableKeys) - } - - // Ensure the table exists. - if val := bkt.Get(tableKey(qtid)); val == nil { - return dax.NewErrTableIDDoesNotExist(qtid) - } - - vsn := make([]byte, 8) - binary.LittleEndian.PutUint64(vsn, uint64(partition.Version)) - - return bkt.Put(partitionKey(qtid, partition.Num), vsn) -} - -func (s *VersionStore) Partitions(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedPartitions, bool, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, false, errors.Wrap(err, "getting tx") - } - defer tx.Rollback() - - partitions, err := s.getPartitions(ctx, tx, qtid) - if err != nil { - return nil, false, errors.Wrap(err, "getting partitions") - } - - return partitions, true, nil -} - -func (s *VersionStore) getPartitions(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID) (dax.VersionedPartitions, error) { - c := tx.Bucket(bucketTableKeys).Cursor() - - // Deserialize rows into Partition objects. - partitions := make(dax.VersionedPartitions, 0) - - prefix := []byte(fmt.Sprintf(prefixFmtTableKeys, qtid.OrganizationID, qtid.DatabaseID, qtid.ID)) - for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { - if v == nil { - s.logger.Printf("nil value for key: %s", k) - continue - } - - var partition dax.VersionedPartition - - partitionNum, err := keyPartitionNum(k) - if err != nil { - return nil, errors.Wrapf(err, "getting partitionNum from key: %v", k) - } - - partition.Num = partitionNum - partition.Version = int(binary.LittleEndian.Uint64(v)) - - partitions = append(partitions, partition) - } - - return partitions, nil -} - -func (s *VersionStore) PartitionVersion(ctx context.Context, qtid dax.QualifiedTableID, partitionNum dax.PartitionNum) (int, bool, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return -1, false, err - } - defer tx.Rollback() - - return getPartitionVersion(ctx, tx, qtid, partitionNum) -} - -func getPartitionVersion(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID, partitionNum dax.PartitionNum) (int, bool, error) { - version := -1 - - bkt := tx.Bucket(bucketTableKeys) - if bkt == nil { - return version, false, errors.Errorf(ErrFmtBucketNotFound, bucketTableKeys) - } - - b := bkt.Get(partitionKey(qtid, partitionNum)) - if b == nil { - return version, false, nil - } - version = int(binary.LittleEndian.Uint64(b)) - - return version, true, nil -} - -func (s *VersionStore) PartitionTables(ctx context.Context, qual dax.TableQualifier) (dax.TableIDs, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, errors.Wrap(err, "beginning tx") - } - defer tx.Rollback() - - return s.getTableIDs(ctx, tx, qual, bucketTableKeys) -} - -// AddFields adds new fields to be managed by VersionStore. It returns the -// number of fields added or an error. -func (s *VersionStore) AddFields(ctx context.Context, qtid dax.QualifiedTableID, fields ...dax.VersionedField) error { - tx, err := s.db.BeginTx(ctx, true) - if err != nil { - return err - } - defer tx.Rollback() - - for _, field := range fields { - if err := createFieldVersion(ctx, tx, qtid, field); err != nil { - return errors.Wrap(err, "creating field version") - } - } - - return tx.Commit() -} - -func createFieldVersion(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID, field dax.VersionedField) error { - // TODO: validate data more formally - if field.Version < 0 { - return errors.New(errors.ErrUncoded, fmt.Sprintf("invalid field version: %d", field.Version)) - } - - bkt := tx.Bucket(bucketFieldKeys) - if bkt == nil { - return errors.Errorf(ErrFmtBucketNotFound, bucketFieldKeys) - } - - // Ensure the table exists. - if val := bkt.Get(tableKey(qtid)); val == nil { - return dax.NewErrTableIDDoesNotExist(qtid) - } - - vsn := make([]byte, 8) - binary.LittleEndian.PutUint64(vsn, uint64(field.Version)) - - return bkt.Put(fieldKey(qtid, field.Name), vsn) -} - -func (s *VersionStore) Fields(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedFields, bool, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, false, errors.Wrap(err, "getting tx") - } - defer tx.Rollback() - - fields, err := s.getFields(ctx, tx, qtid) - if err != nil { - return nil, false, errors.Wrap(err, "getting fields") - } - - return fields, true, nil -} - -func (s *VersionStore) getFields(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID) (dax.VersionedFields, error) { - c := tx.Bucket(bucketFieldKeys).Cursor() - - // Deserialize rows into FieldVersion objects. - fieldVersions := make(dax.VersionedFields, 0) - - prefix := []byte(fmt.Sprintf(prefixFmtFieldKeys, qtid.OrganizationID, qtid.DatabaseID, qtid.ID)) - for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { - if v == nil { - s.logger.Printf("nil value for key: %s", k) - continue - } - - var fieldVersion dax.VersionedField - - fieldName, err := keyFieldName(k) - if err != nil { - return nil, errors.Wrapf(err, "getting partitionNum from key: %v", k) - } - - fieldVersion.Name = fieldName - fieldVersion.Version = int(binary.LittleEndian.Uint64(v)) - - fieldVersions = append(fieldVersions, fieldVersion) - } - - return fieldVersions, nil -} - -func (s *VersionStore) FieldVersion(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName) (int, bool, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return -1, false, err - } - defer tx.Rollback() - - return getFieldVersion(ctx, tx, qtid, field) -} - -func getFieldVersion(ctx context.Context, tx *Tx, qtid dax.QualifiedTableID, field dax.FieldName) (int, bool, error) { - version := -1 - - bkt := tx.Bucket(bucketFieldKeys) - if bkt == nil { - return version, false, errors.Errorf(ErrFmtBucketNotFound, bucketFieldKeys) - } - - b := bkt.Get(fieldKey(qtid, field)) - if b == nil { - return version, false, nil - } - version = int(binary.LittleEndian.Uint64(b)) - - return version, true, nil -} - -func (s *VersionStore) FieldTables(ctx context.Context, qual dax.TableQualifier) (dax.TableIDs, error) { - tx, err := s.db.BeginTx(ctx, false) - if err != nil { - return nil, errors.Wrap(err, "beginning tx") - } - defer tx.Rollback() - - return s.getTableIDs(ctx, tx, qual, bucketFieldKeys) -} - -// Copy returns an in-memory copy of VersionStore. -func (s *VersionStore) Copy(ctx context.Context) (dax.VersionStore, error) { - new := inmem.NewVersionStore() - - // shards. - qtids, err := s.bucketTables(ctx, bucketShards) - if err != nil { - return nil, errors.Wrap(err, "getting shard tables") - } - for _, qtid := range qtids { - shards, found, err := s.Shards(ctx, qtid) - if err != nil { - return nil, errors.Wrap(err, "getting shards") - } else if !found { - continue - } - _ = new.AddTable(ctx, qtid) - new.AddShards(ctx, qtid, shards...) - } - - // tableKeys. - qtids, err = s.bucketTables(ctx, bucketTableKeys) - if err != nil { - return nil, errors.Wrap(err, "getting table key tables") - } - for _, qtid := range qtids { - partitions, found, err := s.Partitions(ctx, qtid) - if err != nil { - return nil, errors.Wrap(err, "getting partitions") - } else if !found { - continue - } - _ = new.AddTable(ctx, qtid) - new.AddPartitions(ctx, qtid, partitions...) - } - - // fieldKeys. - qtids, err = s.bucketTables(ctx, bucketFieldKeys) - if err != nil { - return nil, errors.Wrap(err, "getting field key tables") - } - for _, qtid := range qtids { - fields, found, err := s.Fields(ctx, qtid) - if err != nil { - return nil, errors.Wrap(err, "getting fields") - } else if !found { - continue - } - _ = new.AddTable(ctx, qtid) - new.AddFields(ctx, qtid, fields...) - } - - return new, nil -} - -///////////////////////////////////////////////////////// - -const ( - prefixShards = "shards/" - prefixFmtShards = prefixShards + "%s/%s/%s/" - - prefixTableKeys = "tablekeys/" - prefixFmtTableKeys = prefixTableKeys + "%s/%s/%s/" - - prefixFieldKeys = "fieldkeys/" - prefixFmtFieldKeys = prefixFieldKeys + "%s/%s/%s/" - - prefixTables = "tables/" - prefixFmtTables = prefixTables + "%s/%s/" -) - -// tableKey returns a key based on table name. -func tableKey(qtid dax.QualifiedTableID) []byte { - qual := qtid.TableQualifier - key := fmt.Sprintf(prefixFmtTables+"%s", qual.OrganizationID, qual.DatabaseID, qtid.ID) - return []byte(key) -} - -// keyTableID gets the table ID out of the key. -func keyTableID(key []byte) (dax.TableID, error) { - parts := strings.Split(string(key), "/") - if len(parts) != 4 { - return "", errors.New(errors.ErrUncoded, "table key format expected: `tables/orgID/dbID/tableID`") - } - - return dax.TableID(parts[3]), nil -} - -// keyQualifiedTableID gets the qualified table ID out of the key. -func keyQualifiedTableID(key []byte) (dax.QualifiedTableID, error) { - parts := strings.Split(string(key), "/") - if len(parts) != 4 { - return dax.QualifiedTableID{}, errors.New(errors.ErrUncoded, "table key format expected: `tables/orgID/dbID/tableID`") - } - - return dax.NewQualifiedTableID( - dax.NewTableQualifier(dax.OrganizationID(parts[1]), dax.DatabaseID(parts[2])), - dax.TableID(parts[3]), - ), nil -} - -// shardKey returns a key based on table and shard. -func shardKey(qtid dax.QualifiedTableID, shard dax.ShardNum) []byte { - key := fmt.Sprintf(prefixFmtShards+"%d", qtid.OrganizationID, qtid.DatabaseID, qtid.ID, shard) - return []byte(key) -} - -// keyShardNum gets the shardNum out of the key. -func keyShardNum(key []byte) (dax.ShardNum, error) { - parts := strings.Split(string(key), "/") - if len(parts) != 5 { - return 0, errors.New(errors.ErrUncoded, "shard key format expected: `shards/orgID/dbID/table/shard`") - } - - intVar, err := strconv.Atoi(parts[4]) - if err != nil { - return 0, errors.Wrapf(err, "converting string to shardNum: %s", parts[4]) - } - - return dax.ShardNum(intVar), nil -} - -// partitionKey returns a key based on table and partition. -func partitionKey(qtid dax.QualifiedTableID, partition dax.PartitionNum) []byte { - key := fmt.Sprintf(prefixFmtTableKeys+"%d", qtid.OrganizationID, qtid.DatabaseID, qtid.ID, partition) - return []byte(key) -} - -// keyPartitionNum gets the partitionNum out of the key. -func keyPartitionNum(key []byte) (dax.PartitionNum, error) { - parts := strings.Split(string(key), "/") - if len(parts) != 5 { - return 0, errors.New(errors.ErrUncoded, "partition key format expected: `tablekeys/orgID/dbID/table/partition`") - } - - intVar, err := strconv.Atoi(parts[4]) - if err != nil { - return 0, errors.Wrapf(err, "converting string to partitionNum: %s", parts[4]) - } - - return dax.PartitionNum(intVar), nil -} - -// fieldKey returns a key based on table and field. -func fieldKey(qtid dax.QualifiedTableID, field dax.FieldName) []byte { - key := fmt.Sprintf(prefixFmtFieldKeys+"%s", qtid.OrganizationID, qtid.DatabaseID, qtid.ID, field) - return []byte(key) -} - -// keyFieldName gets the fieldName out of the key. -func keyFieldName(key []byte) (dax.FieldName, error) { - parts := strings.Split(string(key), "/") - if len(parts) != 5 { - return "", errors.New(errors.ErrUncoded, "field key format expected: `fieldkeys/orgID/dbID/table/field`") - } - - return dax.FieldName(parts[4]), nil -} diff --git a/dax/boltdb/versionstore_test.go b/dax/boltdb/versionstore_test.go deleted file mode 100644 index 6ad687a69..000000000 --- a/dax/boltdb/versionstore_test.go +++ /dev/null @@ -1,388 +0,0 @@ -package boltdb_test - -import ( - "context" - "fmt" - "sort" - "testing" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/boltdb" - testbolt "github.com/featurebasedb/featurebase/v3/dax/test/boltdb" - "github.com/featurebasedb/featurebase/v3/logger" - "github.com/stretchr/testify/assert" -) - -func TestVersionStore(t *testing.T) { - db := testbolt.MustOpenDB(t) - defer testbolt.MustCloseDB(t, db) - - ctx := context.Background() - - t.Cleanup(func() { - testbolt.CleanupDB(t, db.Path()) - }) - - orgID := dax.OrganizationID("acme") - dbID := dax.DatabaseID("db1") - - qual := dax.NewTableQualifier(orgID, dbID) - - // Initialize the buckets. - assert.NoError(t, db.InitializeBuckets(boltdb.VersionStoreBuckets...)) - - t.Run("Tables", func(t *testing.T) { - vs := boltdb.NewVersionStore(db, logger.NopLogger) - - qtids := newQualifiedTableIDs(t, qual, 3) - qtid1 := qtids[0] - qtid2 := qtids[1] - qtid3 := qtids[2] - defer vs.RemoveTable(ctx, qtid1) - defer vs.RemoveTable(ctx, qtid2) - defer vs.RemoveTable(ctx, qtid3) - - // Add table 1. - assert.NoError(t, vs.AddTable(ctx, qtid1)) - - // Add table 2. - assert.NoError(t, vs.AddTable(ctx, qtid2)) - - // Add table 3. - assert.NoError(t, vs.AddTable(ctx, qtid3)) - }) - - t.Run("Shards", func(t *testing.T) { - vs := boltdb.NewVersionStore(db, logger.NopLogger) - - qtids := newQualifiedTableIDs(t, qual, 3) - qtid1 := qtids[0] - qtid2 := qtids[1] - qtid3 := qtids[2] - - // Add tables. - assert.NoError(t, vs.AddTable(ctx, qtid1)) - assert.NoError(t, vs.AddTable(ctx, qtid2)) - assert.NoError(t, vs.AddTable(ctx, qtid3)) - defer vs.RemoveTable(ctx, qtid1) - defer vs.RemoveTable(ctx, qtid2) - defer vs.RemoveTable(ctx, qtid3) - - // Create some shards to insert into the table. - shards := make(dax.VersionedShards, 3) - for i := range shards { - shards[i] = dax.VersionedShard{ - Num: dax.ShardNum(i), - Version: i * 2, - } - } - - // Add shards to table 1. - { - err := vs.AddShards(ctx, qtid1, shards...) - assert.NoError(t, err) - } - - // Add shards to table 2. - { - err := vs.AddShards(ctx, qtid2, shards...) - assert.NoError(t, err) - } - - // Fetch a shard and compare. - { - version, found, err := vs.ShardVersion(ctx, qtid1, 2) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, 4, version) - } - - // Fetch all shards and compare. - { - shrds, found, err := vs.Shards(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, shards, shrds) - } - - // Fetch tables. - { - tblIDs, err := vs.ShardTables(ctx, qual) - assert.NoError(t, err) - exp := dax.TableIDs{qtid1.ID, qtid2.ID, qtid3.ID} - assert.Equal(t, exp, tblIDs) - } - - // Remove table 1. - { - shards, partitions, err := vs.RemoveTable(ctx, qtid1) - assert.NoError(t, err) - assert.Equal(t, shards, shards) - assert.Equal(t, dax.VersionedPartitions{}, partitions) - } - - // Fetch all shards and compare. - { - shrds, found, err := vs.Shards(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, dax.VersionedShards{}, shrds) - } - }) - - t.Run("Partitions", func(t *testing.T) { - vs := boltdb.NewVersionStore(db, logger.NopLogger) - - // Create some partitions to insert into the table. - partitions := make(dax.VersionedPartitions, 3) - for i := range partitions { - partitions[i] = dax.VersionedPartition{ - Num: dax.PartitionNum(i), - Version: i * 2, - } - } - - qtids := newQualifiedTableIDs(t, qual, 2) - qtid1 := qtids[0] - qtid2 := qtids[1] - - // Add tables. - assert.NoError(t, vs.AddTable(ctx, qtid1)) - assert.NoError(t, vs.AddTable(ctx, qtid2)) - defer vs.RemoveTable(ctx, qtid1) - defer vs.RemoveTable(ctx, qtid2) - - // Add partitions to table 1. - { - err := vs.AddPartitions(ctx, qtid1, partitions...) - assert.NoError(t, err) - } - - // Add partitions to table 2. - { - err := vs.AddPartitions(ctx, qtid2, partitions...) - assert.NoError(t, err) - } - - // Fetch a partition and compare. - { - version, found, err := vs.PartitionVersion(ctx, qtid1, 2) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, 4, version) - } - - // Fetch all partitions and compare. - { - parts, found, err := vs.Partitions(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, partitions, parts) - } - - // Fetch tables. - { - tblIDs, err := vs.PartitionTables(ctx, qual) - assert.NoError(t, err) - exp := dax.TableIDs{qtid1.ID, qtid2.ID} - assert.Equal(t, exp, tblIDs) - } - - // Remove table 1. - { - shards, partitions, err := vs.RemoveTable(ctx, qtid1) - assert.NoError(t, err) - assert.Equal(t, dax.VersionedShards{}, shards) - assert.Equal(t, partitions, partitions) - } - - // Fetch all partitions and compare. - { - parts, found, err := vs.Partitions(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, dax.VersionedPartitions{}, parts) - } - }) - - t.Run("FieldVersions", func(t *testing.T) { - vs := boltdb.NewVersionStore(db, logger.NopLogger) - - qtids := newQualifiedTableIDs(t, qual, 2) - qtid1 := qtids[0] - qtid2 := qtids[1] - - // Add tables. - assert.NoError(t, vs.AddTable(ctx, qtid1)) - assert.NoError(t, vs.AddTable(ctx, qtid2)) - defer vs.RemoveTable(ctx, qtid1) - defer vs.RemoveTable(ctx, qtid2) - - // Create some fieldVersions to insert into the table. - fieldVersions := make(dax.VersionedFields, 3) - for i := range fieldVersions { - fieldVersions[i] = dax.VersionedField{ - Name: dax.FieldName(fmt.Sprintf("fld-%d", i)), - Version: i * 2, - } - } - - // Add fieldVersions to table 1. - { - err := vs.AddFields(ctx, qtid1, fieldVersions...) - assert.NoError(t, err) - } - - // Add fieldVersions to table 2. - { - err := vs.AddFields(ctx, qtid2, fieldVersions...) - assert.NoError(t, err) - } - - // Fetch a fieldVersion and compare. - { - version, found, err := vs.FieldVersion(ctx, qtid1, dax.FieldName("fld-2")) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, 4, version) - } - - // Fetch all fieldVersions and compare. - { - flds, found, err := vs.Fields(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, fieldVersions, flds) - } - - // Fetch tables. - { - tblIDs, err := vs.FieldTables(ctx, qual) - assert.NoError(t, err) - exp := dax.TableIDs{qtid1.ID, qtid2.ID} - assert.Equal(t, exp, tblIDs) - } - - // Remove table 1. - { - shards, partitions, err := vs.RemoveTable(ctx, qtid1) - assert.NoError(t, err) - assert.Equal(t, dax.VersionedShards{}, shards) - assert.Equal(t, dax.VersionedPartitions{}, partitions) - } - - // Fetch all fieldVersions and compare. - { - flds, found, err := vs.Fields(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, dax.VersionedFields{}, flds) - } - }) - - t.Run("Copy", func(t *testing.T) { - vs := boltdb.NewVersionStore(db, logger.NopLogger) - - qtids := newQualifiedTableIDs(t, qual, 1) - qtid1 := qtids[0] - - // Add tables. - assert.NoError(t, vs.AddTable(ctx, qtid1)) - defer vs.RemoveTable(ctx, qtid1) - - // Create some shards to insert into the table. - shards := make(dax.VersionedShards, 3) - for i := range shards { - shards[i] = dax.VersionedShard{ - Num: dax.ShardNum(i), - Version: i * 2, - } - } - - // Create some partitions to insert into the table. - partitions := make(dax.VersionedPartitions, 3) - for i := range partitions { - partitions[i] = dax.VersionedPartition{ - Num: dax.PartitionNum(i), - Version: i * 2, - } - } - - // Create some fieldVersions to insert into the table. - fieldVersions := make(dax.VersionedFields, 3) - for i := range fieldVersions { - fieldVersions[i] = dax.VersionedField{ - Name: dax.FieldName(fmt.Sprintf("fld-%d", i)), - Version: i * 2, - } - } - - // Add shards to table 1. - { - err := vs.AddShards(ctx, qtid1, shards...) - assert.NoError(t, err) - } - - // Add partitions to table 1. - { - err := vs.AddPartitions(ctx, qtid1, partitions...) - assert.NoError(t, err) - } - - // Add fieldVersions to table 1. - { - err := vs.AddFields(ctx, qtid1, fieldVersions...) - assert.NoError(t, err) - } - - copy, err := vs.Copy(ctx) - assert.NoError(t, err) - - // Fetch a shard and compare. - { - version, found, err := copy.ShardVersion(ctx, qtid1, 2) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, 4, version) - } - - // Fetch all partitions and compare. - { - parts, found, err := copy.Partitions(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, partitions, parts) - } - - // Fetch all fieldVersions and compare. - { - flds, found, err := copy.Fields(ctx, qtid1) - assert.NoError(t, err) - assert.True(t, found) - assert.Equal(t, fieldVersions, flds) - } - }) -} - -// newQualifiedTableIDs is a test helper function which generates a slice of n -// qtid. The entries in the slice will be ordered by TableID. -func newQualifiedTableIDs(t *testing.T, qual dax.TableQualifier, n int) []dax.QualifiedTableID { - t.Helper() - - qtids := make([]dax.QualifiedTableID, n) - for i := range qtids { - tbl := dax.NewTable("testvstore") - tbl.CreateID() - qtids[i] = dax.NewQualifiedTableID( - qual, - tbl.ID, - ) - } - - // sort the qtids by ID - sort.Slice(qtids, func(i, j int) bool { - return qtids[i].ID < qtids[j].ID - }) - - return qtids -} diff --git a/dax/computer/interfaces.go b/dax/computer/interfaces.go index f04e926cc..ee2a6dfa1 100644 --- a/dax/computer/interfaces.go +++ b/dax/computer/interfaces.go @@ -18,8 +18,12 @@ type Registrar interface { // These are typically implemented by the WriteLogger client. type WriteLogService interface { AppendMessage(bucket string, key string, version int, msg []byte) error - LogReader(bucket string, key string, version int) (io.Reader, io.Closer, error) + LogReader(bucket string, key string, version int) (io.ReadCloser, error) + LogReaderFrom(bucket string, key string, version int, offset int) (io.ReadCloser, error) DeleteLog(bucket string, key string, version int) error + List(bucket, key string) ([]WriteLogInfo, error) + Lock(bucket, key string) error + Unlock(bucket, key string) error } // SnapshotService represents the SnapshotService methods which Computer uses. @@ -28,71 +32,17 @@ type SnapshotService interface { Read(bucket string, key string, version int) (io.ReadCloser, error) Write(bucket string, key string, version int, rc io.ReadCloser) error WriteTo(bucket string, key string, version int, wrTo io.WriterTo) error + List(bucket, key string) ([]SnapInfo, error) } -// SnapshotReadWriter provides the interface for all snapshot read and writes in -// FeatureBase. -type SnapshotReadWriter interface { - WriteShardData(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int, rc io.ReadCloser) error - ReadShardData(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) (io.ReadCloser, error) - - WriteTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int, wrTo io.WriterTo) error - ReadTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) (io.ReadCloser, error) - - WriteFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int, wrTo io.WriterTo) error - ReadFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) (io.ReadCloser, error) -} - -// WriteLogWriter provides the interface for all data writes to FeatureBase. After -// data has been written to the local FeatureBase node, the respective interface -// method(s) will be called. -type WriteLogWriter interface { - // CreateTableKeys sends a map of string key to uint64 ID for the table and - // partition provided. - CreateTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int, _ map[string]uint64) error - - // DeleteTableKeys deletes all table keys for the table and partition - // provided. - DeleteTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) error - - // CreateFieldKeys sends a map of string key to uint64 ID for the table and - // field provided. - CreateFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int, _ map[string]uint64) error - - // DeleteTableKeys deletes all field keys for the table and field provided. - DeleteFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) error - - // WriteShard sends shard data for the table and shard provided. - WriteShard(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int, msg LogMessage) error - - // DeleteShard deletes all data for the table and shard provided. - DeleteShard(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) error -} - -// WriteLogReader provides the interface for all reads from the write log. -type WriteLogReader interface { - ShardReader(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) ShardReader - TableKeyReader(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) TableKeyReader - FieldKeyReader(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) FieldKeyReader -} - -type TableKeyReader interface { - Open() error - Read() (PartitionKeyMap, error) - Close() error -} - -type FieldKeyReader interface { - Open() error - Read() (FieldKeyMap, error) - Close() error +// SnapInfo holds metadata about a snapshot. +type SnapInfo struct { + Version int + // Date time.Time } -type ShardReader interface { - Open() error - Read() (LogMessage, error) - Close() error -} +// WriteLogInfo holds metadata about a write log. +type WriteLogInfo SnapInfo // LogMessage is implemented by a variety of types which can be serialized as // messages to the WriteLogger. diff --git a/dax/computer/noop.go b/dax/computer/noop.go deleted file mode 100644 index 296ab1661..000000000 --- a/dax/computer/noop.go +++ /dev/null @@ -1,162 +0,0 @@ -package computer - -import ( - "context" - "io" - - "github.com/featurebasedb/featurebase/v3/dax" -) - -// Ensure type implements interface. -var _ WriteLogWriter = (*NopWriteLogWriter)(nil) - -// NopWriteLogWriter is a no-op implementation of the WriteLogWriter interface. -type NopWriteLogWriter struct{} - -func NewNopWriteLogWriter() *NopWriteLogWriter { - return &NopWriteLogWriter{} -} - -func (w *NopWriteLogWriter) CreateTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int, m map[string]uint64) error { - return nil -} - -func (w *NopWriteLogWriter) DeleteTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) error { - return nil -} - -func (w *NopWriteLogWriter) CreateFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int, m map[string]uint64) error { - return nil -} - -func (w *NopWriteLogWriter) DeleteFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) error { - return nil -} - -func (w *NopWriteLogWriter) WriteShard(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int, msg LogMessage) error { - return nil -} - -func (w *NopWriteLogWriter) DeleteShard(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) error { - return nil -} - -// Ensure type implements interface. -var _ WriteLogReader = (*NopWriteLogReader)(nil) - -// NopWriteLogReader is a no-op implementation of the WriteLogReader interface. -type NopWriteLogReader struct{} - -func NewNopWriteLogReader() *NopWriteLogReader { - return &NopWriteLogReader{} -} - -func (w *NopWriteLogReader) TableKeyReader(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) TableKeyReader { - return NewNopTableKeyReader() -} - -func (w *NopWriteLogReader) FieldKeyReader(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) FieldKeyReader { - return NewNopFieldKeyReader() -} - -func (w *NopWriteLogReader) ShardReader(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) ShardReader { - return NewNopShardReader() -} - -//////////////////////////////////////////////// - -// Ensure type implements interface. -var _ TableKeyReader = &NopTableKeyReader{} - -// NopTableKeyReader is a no-op implementation of the TableKeyReader -// interface. -type NopTableKeyReader struct{} - -func NewNopTableKeyReader() *NopTableKeyReader { - return &NopTableKeyReader{} -} - -func (r *NopTableKeyReader) Open() error { return nil } -func (r *NopTableKeyReader) Read() (PartitionKeyMap, error) { - return PartitionKeyMap{}, io.EOF -} -func (r *NopTableKeyReader) Close() error { return nil } - -//////////////////////////////////////////////// - -// Ensure type implements interface. -var _ FieldKeyReader = &NopFieldKeyReader{} - -// NopFieldKeyReader is a no-op implementation of the FieldKeyReader -// interface. -type NopFieldKeyReader struct{} - -func NewNopFieldKeyReader() *NopFieldKeyReader { - return &NopFieldKeyReader{} -} - -func (r *NopFieldKeyReader) Open() error { return nil } -func (r *NopFieldKeyReader) Read() (FieldKeyMap, error) { - return FieldKeyMap{}, io.EOF -} -func (r *NopFieldKeyReader) Close() error { return nil } - -//////////////////////////////////////////////// - -// Ensure type implements interface. -var _ ShardReader = &NopShardReader{} - -// NopShardReader is a no-op implementation of the ShardReader interface. -type NopShardReader struct{} - -func NewNopShardReader() *NopShardReader { - return &NopShardReader{} -} - -func (r *NopShardReader) Open() error { return nil } -func (r *NopShardReader) Read() (LogMessage, error) { - return nil, io.EOF -} -func (r *NopShardReader) Close() error { return nil } - -////////////// SNAPSHOT //////////////////////// - -// Ensure type implements interface. -var _ SnapshotReadWriter = &NopSnapshotReadWriter{} - -// NopSnapshotReadWriter is a no-op implementation of the SnapshotReadWriter -// interface. -type NopSnapshotReadWriter struct{} - -func NewNopSnapshotReadWriter() *NopSnapshotReadWriter { - return &NopSnapshotReadWriter{} -} - -func (w *NopSnapshotReadWriter) WriteShardData(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int, rc io.ReadCloser) error { - return nil -} - -func (w *NopSnapshotReadWriter) ReadShardData(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) (io.ReadCloser, error) { - return &nopReadCloser{}, nil -} - -func (w *NopSnapshotReadWriter) WriteTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int, wrTo io.WriterTo) error { - return nil -} - -func (w *NopSnapshotReadWriter) ReadTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) (io.ReadCloser, error) { - return &nopReadCloser{}, nil -} - -func (w *NopSnapshotReadWriter) WriteFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int, wrTo io.WriterTo) error { - return nil -} - -func (w *NopSnapshotReadWriter) ReadFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) (io.ReadCloser, error) { - return &nopReadCloser{}, nil -} - -type nopReadCloser struct{} - -func (n *nopReadCloser) Read([]byte) (int, error) { return 0, nil } -func (n *nopReadCloser) Close() error { return nil } diff --git a/dax/computer/service/computer.go b/dax/computer/service/computer.go index b41b05123..63eb52c2a 100644 --- a/dax/computer/service/computer.go +++ b/dax/computer/service/computer.go @@ -7,16 +7,14 @@ import ( "net/http" featurebase "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/computer" - mdsclient "github.com/featurebasedb/featurebase/v3/dax/mds/client" - "github.com/featurebasedb/featurebase/v3/dax/snapshotter" - snapshotterclient "github.com/featurebasedb/featurebase/v3/dax/snapshotter/client" - "github.com/featurebasedb/featurebase/v3/dax/writelogger" - writeloggerclient "github.com/featurebasedb/featurebase/v3/dax/writelogger/client" - "github.com/featurebasedb/featurebase/v3/errors" - "github.com/featurebasedb/featurebase/v3/logger" - fbserver "github.com/featurebasedb/featurebase/v3/server" + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/dax/computer" + mdsclient "github.com/featurebasedb/featurebase/v3/dax/mds/client" + "github.com/featurebasedb/featurebase/v3/dax/snapshotter" + "github.com/featurebasedb/featurebase/v3/dax/writelogger" + "github.com/featurebasedb/featurebase/v3/errors" + "github.com/featurebasedb/featurebase/v3/logger" + fbserver "github.com/featurebasedb/featurebase/v3/server" ) // Ensure type implements interface. @@ -157,7 +155,8 @@ func newCommand(addr dax.Address, cfg CommandConfig) *fbserver.Command { var writeLoggerImpl computer.WriteLogService if cfg.ComputerConfig.WriteLogger != "" { - writeLoggerImpl = writeloggerclient.New(dax.Address(cfg.ComputerConfig.WriteLogger)) + panic("running separate writelogger is currently unsupported") + // writeLoggerImpl = writeloggerclient.New(dax.Address(cfg.ComputerConfig.WriteLogger)) } else if wlSvc != nil { writeLoggerImpl = wlSvc } else { @@ -166,7 +165,8 @@ func newCommand(addr dax.Address, cfg CommandConfig) *fbserver.Command { var snapshotterImpl computer.SnapshotService if cfg.ComputerConfig.Snapshotter != "" { - snapshotterImpl = snapshotterclient.New(dax.Address(cfg.ComputerConfig.Snapshotter)) + panic("running separate snapshotter is currently unsupported") + // snapshotterImpl = snapshotterclient.New(dax.Address(cfg.ComputerConfig.Snapshotter)) } else if ssSvc != nil { snapshotterImpl = ssSvc } else { diff --git a/dax/computer/snapshot.go b/dax/computer/snapshot.go deleted file mode 100644 index fd5103098..000000000 --- a/dax/computer/snapshot.go +++ /dev/null @@ -1,94 +0,0 @@ -package computer - -import ( - "context" - "io" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/errors" -) - -// Ensure type implements interface. -var _ SnapshotReadWriter = &snapshotReadWriter{} - -// snapshotReadWriter uses a SnapshotService implementation (which could be, for -// example, an http client or a locally running sub-service) to store its -// snapshots. -type snapshotReadWriter struct { - ss SnapshotService -} - -func NewSnapshotReadWriter(ss SnapshotService) *snapshotReadWriter { - return &snapshotReadWriter{ - ss: ss, - } -} - -func (s *snapshotReadWriter) WriteShardData(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int, rc io.ReadCloser) error { - bucket := partitionBucket(qtid.Key(), partition) - key := shardKey(shard) - - if err := s.ss.Write(bucket, key, version, rc); err != nil { - return errors.Wrapf(err, "writing shard data: %s, %d", key, version) - } - - return nil -} - -func (s *snapshotReadWriter) ReadShardData(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) (io.ReadCloser, error) { - bucket := partitionBucket(qtid.Key(), partition) - key := shardKey(shard) - - rc, err := s.ss.Read(bucket, key, version) - if err != nil { - return nil, errors.Wrapf(err, "reading shard data: %s, %s, %d", bucket, key, version) - } - - return rc, nil -} - -func (s *snapshotReadWriter) WriteTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int, wrTo io.WriterTo) error { - bucket := partitionBucket(qtid.Key(), partition) - key := keysFileName - - if err := s.ss.WriteTo(bucket, key, version, wrTo); err != nil { - return errors.Wrapf(err, "writing table keys: %s, %d", key, version) - } - - return nil -} - -func (s *snapshotReadWriter) ReadTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) (io.ReadCloser, error) { - bucket := partitionBucket(qtid.Key(), partition) - key := keysFileName - - rc, err := s.ss.Read(bucket, key, version) - if err != nil { - return nil, errors.Wrapf(err, "reading table keys: %s, %s, %d", bucket, key, version) - } - - return rc, nil -} - -func (s *snapshotReadWriter) WriteFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int, wrTo io.WriterTo) error { - bucket := fieldBucket(qtid.Key(), field) - key := keysFileName - - if err := s.ss.WriteTo(bucket, key, version, wrTo); err != nil { - return errors.Wrapf(err, "writing field keys: %s, %d", key, version) - } - - return nil -} - -func (s *snapshotReadWriter) ReadFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) (io.ReadCloser, error) { - bucket := fieldBucket(qtid.Key(), field) - key := keysFileName - - rc, err := s.ss.Read(bucket, key, version) - if err != nil { - return nil, errors.Wrapf(err, "reading field keys: %s, %s, %d", bucket, key, version) - } - - return rc, nil -} diff --git a/dax/computer/writelog.go b/dax/computer/writelog.go deleted file mode 100644 index df6999869..000000000 --- a/dax/computer/writelog.go +++ /dev/null @@ -1,307 +0,0 @@ -package computer - -import ( - "bufio" - "context" - "encoding/json" - "io" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/errors" -) - -// Ensure type implements interface. -var _ WriteLogReader = &writeLogReadWriter{} -var _ WriteLogWriter = &writeLogReadWriter{} - -// writeLogReadWriter is an implementation of the WriteLogReader and WriteLogWriter -// interfaces. It uses a WriteLogService implementation (which could be, for -// example, an http client or a locally running sub-service) to store its log -// messages. -type writeLogReadWriter struct { - wls WriteLogService -} - -func NewWriteLogReadWriter(wls WriteLogService) *writeLogReadWriter { - return &writeLogReadWriter{ - wls: wls, - } -} - -func (w *writeLogReadWriter) CreateTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int, m map[string]uint64) error { - msg := PartitionKeyMap{ - TableKey: qtid.Key(), - Partition: partition, - StringToID: m, - } - - b, err := json.Marshal(msg) - if err != nil { - return errors.Wrap(err, "marshalling partition key map to json") - } - - bucket := partitionBucket(qtid.Key(), partition) - - if err := w.wls.AppendMessage(bucket, keysFileName, version, b); err != nil { - return errors.Wrapf(err, "appending partition key message: %s, %d", keysFileName, version) - } - - return nil -} - -func (w *writeLogReadWriter) DeleteTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) error { - bucket := partitionBucket(qtid.Key(), partition) - return w.wls.DeleteLog(bucket, keysFileName, version) -} - -func (w *writeLogReadWriter) CreateFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int, m map[string]uint64) error { - msg := FieldKeyMap{ - TableKey: qtid.Key(), - Field: field, - StringToID: m, - } - - b, err := json.Marshal(msg) - if err != nil { - return errors.Wrap(err, "marshalling field key map to json") - } - - bucket := fieldBucket(qtid.Key(), field) - - if err := w.wls.AppendMessage(bucket, keysFileName, version, b); err != nil { - return errors.Wrapf(err, "appending field key message: %s, %d", keysFileName, version) - } - - return nil -} - -func (w *writeLogReadWriter) DeleteFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) error { - bucket := fieldBucket(qtid.Key(), field) - return w.wls.DeleteLog(bucket, keysFileName, version) -} - -func (w *writeLogReadWriter) WriteShard(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int, msg LogMessage) error { - b, err := MarshalLogMessage(msg, EncodeTypeJSON) - if err != nil { - return errors.Wrap(err, "marshalling log message") - } - - bucket := partitionBucket(qtid.Key(), partition) - shardKey := shardKey(shard) - - if err := w.wls.AppendMessage(bucket, shardKey, version, b); err != nil { - return errors.Wrapf(err, "appending shard key message: %s, %d", shardKey, version) - } - - return nil -} - -func (w *writeLogReadWriter) DeleteShard(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) error { - bucket := partitionBucket(qtid.Key(), partition) - shardKey := shardKey(shard) - - return w.wls.DeleteLog(bucket, shardKey, version) -} - -//////////////////////////////////////////////// - -func (w *writeLogReadWriter) TableKeyReader(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) TableKeyReader { - return newTableKeyReader(w.wls, qtid, partition, version) -} - -type tableKeyReader struct { - wl WriteLogService - table dax.TableKey - partition dax.PartitionNum - version int - scanner *bufio.Scanner - closer io.Closer -} - -func newTableKeyReader(wl WriteLogService, qtid dax.QualifiedTableID, partition dax.PartitionNum, version int) *tableKeyReader { - r := &tableKeyReader{ - wl: wl, - table: qtid.Key(), - partition: partition, - version: version, - } - - return r -} - -func (r *tableKeyReader) Open() error { - bucket := partitionBucket(r.table, r.partition) - - reader, closer, err := r.wl.LogReader(bucket, keysFileName, r.version) - if err != nil { - return errors.Wrapf(err, "getting log reader: %s, %s, %d", bucket, keysFileName, r.version) - } - - r.closer = closer - r.scanner = bufio.NewScanner(reader) - - return nil -} - -func (r *tableKeyReader) Read() (PartitionKeyMap, error) { - if r.scanner == nil { - return PartitionKeyMap{}, io.EOF - } - - var b []byte - var out PartitionKeyMap - - if r.scanner.Scan() { - b = r.scanner.Bytes() - if err := json.Unmarshal(b, &out); err != nil { - return out, err - } - return out, nil - } - if err := r.scanner.Err(); err != nil { - return out, err - } - - return out, io.EOF -} - -func (r *tableKeyReader) Close() error { - if r.closer != nil { - return r.closer.Close() - } - return nil -} - -//////////////////////////////////////////////// - -func (w *writeLogReadWriter) FieldKeyReader(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName, version int) FieldKeyReader { - return newFieldKeyReader(w.wls, qtid, field, version) -} - -type fieldKeyReader struct { - wl WriteLogService - table dax.TableKey - field dax.FieldName - version int - scanner *bufio.Scanner - closer io.Closer -} - -func newFieldKeyReader(wl WriteLogService, qtid dax.QualifiedTableID, field dax.FieldName, version int) *fieldKeyReader { - r := &fieldKeyReader{ - wl: wl, - table: qtid.Key(), - field: field, - version: version, - } - - return r -} - -func (r *fieldKeyReader) Open() error { - bucket := fieldBucket(r.table, r.field) - - reader, closer, err := r.wl.LogReader(bucket, keysFileName, r.version) - if err != nil { - return errors.Wrapf(err, "getting log reader: %s, %s, %d", bucket, keysFileName, r.version) - } - - r.closer = closer - r.scanner = bufio.NewScanner(reader) - - return nil -} - -func (r *fieldKeyReader) Read() (FieldKeyMap, error) { - if r.scanner == nil { - return FieldKeyMap{}, io.EOF - } - - var b []byte - var out FieldKeyMap - - if r.scanner.Scan() { - b = r.scanner.Bytes() - if err := json.Unmarshal(b, &out); err != nil { - return out, err - } - return out, nil - } - if err := r.scanner.Err(); err != nil { - return out, err - } - - return out, io.EOF -} - -func (r *fieldKeyReader) Close() error { - if r.closer != nil { - return r.closer.Close() - } - return nil -} - -//////////////////////////////////////////////// - -func (w *writeLogReadWriter) ShardReader(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) ShardReader { - return newShardReader(w.wls, qtid, partition, shard, version) -} - -type shardReader struct { - wl WriteLogService - table dax.TableKey - partition dax.PartitionNum - shard dax.ShardNum - version int - scanner *bufio.Scanner - closer io.Closer -} - -func newShardReader(wl WriteLogService, qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, version int) *shardReader { - r := &shardReader{ - wl: wl, - table: qtid.Key(), - partition: partition, - shard: shard, - version: version, - } - - return r -} - -func (r *shardReader) Open() error { - bucket := partitionBucket(r.table, r.partition) - shardKey := shardKey(r.shard) - - reader, closer, err := r.wl.LogReader(bucket, shardKey, r.version) - if err != nil { - return errors.Wrapf(err, "getting log reader: %s, %s, %d", bucket, shardKey, r.version) - } - - r.closer = closer - r.scanner = bufio.NewScanner(reader) - - return nil -} - -func (r *shardReader) Read() (LogMessage, error) { - if r.scanner == nil { - return nil, io.EOF - } - - if r.scanner.Scan() { - return UnmarshalLogMessage(r.scanner.Bytes()) - } - if err := r.scanner.Err(); err != nil { - return nil, err - } - - return nil, io.EOF -} - -func (r *shardReader) Close() error { - if r.closer != nil { - return r.closer.Close() - } - return nil -} diff --git a/dax/directive.go b/dax/directive.go index f5504864d..a0eccad26 100644 --- a/dax/directive.go +++ b/dax/directive.go @@ -1,5 +1,7 @@ package dax +import "context" + // Directive contains the instructions, sent from MDS, which a compute node is // to follow. A Directive is typically JSON-encoded and POSTed to a compute // node's `/directive` endpoint. @@ -19,6 +21,10 @@ type Directive struct { Version uint64 `json:"version"` } +type DirectiveVersion interface { + Increment(ctx context.Context, delta uint64) (uint64, error) +} + // DirectiveMethod is used to tell the compute node how it should handle the // Directive. type DirectiveMethod string @@ -57,9 +63,9 @@ func (d *Directive) Table(qtid QualifiedTableID) (*QualifiedTable, error) { // compute node is responsible. It assumes that the Directive does not contain // more than one ComputeRole for the same table; in that case, we would need to // return the union of Shards. -func (d *Directive) ComputeShards(tbl TableKey) VersionedShards { +func (d *Directive) ComputeShards(tbl TableKey) ShardNums { if d == nil || d.ComputeRoles == nil { - return VersionedShards{} + return nil } for _, cr := range d.ComputeRoles { @@ -68,14 +74,14 @@ func (d *Directive) ComputeShards(tbl TableKey) VersionedShards { } } - return VersionedShards{} + return nil } // ComputeShardsMap returns a map of table to shards. It assumes that the // Directive does not contain more than one ComputeRole for the same table; in // that case, we would need to return the union of Shards. -func (d *Directive) ComputeShardsMap() map[TableKey]VersionedShards { - m := make(map[TableKey]VersionedShards) +func (d *Directive) ComputeShardsMap() map[TableKey]ShardNums { + m := make(map[TableKey]ShardNums) if d == nil || d.ComputeRoles == nil { return m } @@ -91,9 +97,9 @@ func (d *Directive) ComputeShardsMap() map[TableKey]VersionedShards { // which this translate node is responsible. It assumes that the Directive does // not contain more than one TranslateRole for the same table; in that case, we // would need to return the union of Shards. -func (d *Directive) TranslatePartitions(tbl TableKey) VersionedPartitions { +func (d *Directive) TranslatePartitions(tbl TableKey) PartitionNums { if d == nil || d.TranslateRoles == nil { - return VersionedPartitions{} + return PartitionNums{} } for _, tr := range d.TranslateRoles { @@ -101,14 +107,14 @@ func (d *Directive) TranslatePartitions(tbl TableKey) VersionedPartitions { return tr.Partitions } } - return VersionedPartitions{} + return PartitionNums{} } // TranslatePartitionsMap returns a map of table to partitions. It assumes that // the Directive does not contain more than one TranslateRole for the same // table; in that case, we would need to return the union of Partitions. -func (d *Directive) TranslatePartitionsMap() map[TableKey]VersionedPartitions { - m := make(map[TableKey]VersionedPartitions) +func (d *Directive) TranslatePartitionsMap() map[TableKey]PartitionNums { + m := make(map[TableKey]PartitionNums) if d == nil || d.TranslateRoles == nil { return m } @@ -129,8 +135,8 @@ func (d *Directive) TranslatePartitionsMap() map[TableKey]VersionedPartitions { // TranslateFieldsMap returns a map of table to fields. It assumes that // the Directive does not contain more than one TranslateRole for the same // table; in that case, we would need to return the union of FieldValues. -func (d *Directive) TranslateFieldsMap() map[TableKey]VersionedFields { - m := make(map[TableKey]VersionedFields) +func (d *Directive) TranslateFieldsMap() map[TableKey][]FieldName { + m := make(map[TableKey][]FieldName) if d == nil || d.TranslateRoles == nil { return m } diff --git a/dax/docker-compose.yml b/dax/docker-compose.yml index 69e995297..c6ca7f8f7 100644 --- a/dax/docker-compose.yml +++ b/dax/docker-compose.yml @@ -8,9 +8,8 @@ services: environment: FEATUREBASE_BIND: 0.0.0.0:8080 FEATUREBASE_VERBOSE: "true" - FEATUREBASE_STORAGE_METHOD: boltdb - FEATUREBASE_STORAGE_DSN: file:/dax-data/mds.boldtb FEATUREBASE_MDS_RUN: "true" + FEATUREBASE_CONFIG_DATA_DIR: file:/dax-data/mds ports: - "8081:8080" @@ -22,7 +21,7 @@ services: FEATUREBASE_BIND: 0.0.0.0:8080 FEATUREBASE_VERBOSE: "true" FEATUREBASE_QUERYER_RUN: "true" - FEATUREBASE_QUERYER_CONFIG_MDS_ADDRESS: "mds:8080" + FEATUREBASE_QUERYER_CONFIG_MDS_ADDRESS: "mds:8080/mds" depends_on: - mds ports: @@ -34,8 +33,9 @@ services: dockerfile: ../Dockerfile-dax-quick environment: FEATUREBASE_COMPUTER_RUN: "true" - FEATUREBASE_COMPUTER_CONFIG_MDS_ADDRESS: "mds:8080" + FEATUREBASE_COMPUTER_CONFIG_MDS_ADDRESS: "mds:8080/mds" FEATUREBASE_COMPUTER_CONFIG_DATA_DIR: /dax-data/computer + FEATUREBASE_COMPUTER_CONFIG_VERBOSE: true FEATUREBASE_BIND: 0.0.0.0:8080 FEATUREBASE_VERBOSE: "true" FEATUREBASE_STORAGE_METHOD: boltdb @@ -63,4 +63,4 @@ services: GEN_USE_SHARD_TRANSACTIONAL_ENDPOINT: "true" GEN_SOURCE: "custom" GEN_TARGET: "mds" - GEN_MDS_ADDRESS: "mds:8080" + GEN_MDS_ADDRESS: "mds:8080/mds" diff --git a/dax/inmem/inmem.go b/dax/inmem/inmem.go deleted file mode 100644 index 9a2663214..000000000 --- a/dax/inmem/inmem.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package inmem contains the in-memory implementation of the dax interfaces. -package inmem diff --git a/dax/inmem/versionstore.go b/dax/inmem/versionstore.go deleted file mode 100644 index a6071344a..000000000 --- a/dax/inmem/versionstore.go +++ /dev/null @@ -1,430 +0,0 @@ -package inmem - -import ( - "context" - "sort" - "sync" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/errors" -) - -// Ensure type implements interface. -var _ dax.VersionStore = (*VersionStore)(nil) - -// VersionStore manages all version info for shard, table keys, and field keys. -type VersionStore struct { - mu sync.RWMutex - - // shards is a map of all shards, by table, by shard number, known to - // contain data. - shards map[dax.TableQualifierKey]map[dax.TableID]map[dax.ShardNum]dax.VersionedShard - - // tableKeys is a map of all partitions, by table, by partition number, - // known to contain key data. - tableKeys map[dax.TableQualifierKey]map[dax.TableID]map[dax.PartitionNum]int - - // fieldKeys is a map of all fields, by table, known to contain key data. - fieldKeys map[dax.TableQualifierKey]map[dax.TableID]map[dax.FieldName]int -} - -// NewVersionStore returns a new instance of VersionStore with default values. -func NewVersionStore() *VersionStore { - return &VersionStore{ - shards: make(map[dax.TableQualifierKey]map[dax.TableID]map[dax.ShardNum]dax.VersionedShard), - tableKeys: make(map[dax.TableQualifierKey]map[dax.TableID]map[dax.PartitionNum]int), - fieldKeys: make(map[dax.TableQualifierKey]map[dax.TableID]map[dax.FieldName]int), - } -} - -// AddTable adds a table to be managed by VersionStore. -func (s *VersionStore) AddTable(ctx context.Context, qtid dax.QualifiedTableID) error { - s.mu.Lock() - defer s.mu.Unlock() - - // This check is clunky; three maps contain the table, but we only check for - // existence in one of them. It also seems weird to check all three, because - // if we get in a state where one of the maps doesn't contain a table that - // the other maps do contain, the state of the data is in question. - if _, found := s.shards[qtid.TableQualifier.Key()][qtid.ID]; found { - return dax.NewErrTableIDExists(qtid) - } - - // Initialize the maps in case VersionStore wasn't created with NewVersionStore(). - if s.shards == nil { - s.shards = make(map[dax.TableQualifierKey]map[dax.TableID]map[dax.ShardNum]dax.VersionedShard) - } - if s.tableKeys == nil { - s.tableKeys = make(map[dax.TableQualifierKey]map[dax.TableID]map[dax.PartitionNum]int) - } - if s.fieldKeys == nil { - s.fieldKeys = make(map[dax.TableQualifierKey]map[dax.TableID]map[dax.FieldName]int) - } - - // shards. - if _, ok := s.shards[qtid.TableQualifier.Key()]; !ok { - s.shards[qtid.TableQualifier.Key()] = make(map[dax.TableID]map[dax.ShardNum]dax.VersionedShard, 0) - } - if _, ok := s.shards[qtid.TableQualifier.Key()][qtid.ID]; !ok { - s.shards[qtid.TableQualifier.Key()][qtid.ID] = make(map[dax.ShardNum]dax.VersionedShard, 0) - } - - // tableKeys. - if _, ok := s.tableKeys[qtid.TableQualifier.Key()]; !ok { - s.tableKeys[qtid.TableQualifier.Key()] = make(map[dax.TableID]map[dax.PartitionNum]int, 0) - } - if _, ok := s.tableKeys[qtid.TableQualifier.Key()][qtid.ID]; !ok { - s.tableKeys[qtid.TableQualifier.Key()][qtid.ID] = make(map[dax.PartitionNum]int, 0) - } - - // fieldKeys. - if _, ok := s.fieldKeys[qtid.TableQualifier.Key()]; !ok { - s.fieldKeys[qtid.TableQualifier.Key()] = make(map[dax.TableID]map[dax.FieldName]int, 0) - } - if _, ok := s.fieldKeys[qtid.TableQualifier.Key()][qtid.ID]; !ok { - s.fieldKeys[qtid.TableQualifier.Key()][qtid.ID] = make(map[dax.FieldName]int, 0) - } - - return nil -} - -// RemoveTable removes the given table. An error will be returned if the table -// does not exist. -func (s *VersionStore) RemoveTable(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedShards, dax.VersionedPartitions, error) { - s.mu.Lock() - defer s.mu.Unlock() - - var foundTable bool - var shards dax.VersionedShards - var partitions dax.VersionedPartitions - var err error - - // Remove shards for table. - if s.shards != nil { - if _, ok := s.shards[qtid.TableQualifier.Key()][qtid.ID]; ok { - foundTable = true - - // Get the shards to return before deleting from map. - shards, _, err = s.shardSlice(qtid) - if err != nil { - return nil, nil, errors.Wrapf(err, "getting shard slice: %s", qtid) - } - - // Remove the shards. - delete(s.shards[qtid.TableQualifier.Key()], qtid.ID) - } - } - - // Remove tableKeys for table. - if s.tableKeys != nil { - if _, ok := s.tableKeys[qtid.TableQualifier.Key()][qtid.ID]; ok { - foundTable = true - - // Get the partitions to return before deleting from map. - partitions, _, err = s.partitionSlice(qtid) - if err != nil { - return nil, nil, errors.Wrapf(err, "getting partition slice: %s", qtid) - } - - // Remove the tableKeys. - delete(s.tableKeys[qtid.TableQualifier.Key()], qtid.ID) - } - } - - // Remove fieldKeys for table. - if s.fieldKeys != nil { - if _, ok := s.fieldKeys[qtid.TableQualifier.Key()][qtid.ID]; ok { - foundTable = true - - // Remove the fieldKeys. - delete(s.fieldKeys[qtid.TableQualifier.Key()], qtid.ID) - } - } - - if !foundTable { - return nil, nil, dax.NewErrTableIDDoesNotExist(qtid) - } - - return shards, partitions, nil -} - -// AddShards adds new shards to be managed by VersionStore. It returns the -// number of shards added or an error. -func (s *VersionStore) AddShards(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.VersionedShard) error { - s.mu.Lock() - defer s.mu.Unlock() - - sh, ok := s.shards[qtid.TableQualifier.Key()][qtid.ID] - if !ok { - return dax.NewErrTableIDDoesNotExist(qtid) - } - - var n int - - for _, shard := range shards { - if _, ok := sh[shard.Num]; !ok { - n++ // TODO: this isn't considering a shard that exists, but the version changes. - } - sh[shard.Num] = shard - } - - return nil -} - -// Shards returns the list of shards available for the give table. It returns -// false if the table does not exist. -func (s *VersionStore) Shards(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedShards, bool, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - return s.shardSlice(qtid) -} - -// shardSlice is an unprotected version of Shards(). -func (s *VersionStore) shardSlice(qtid dax.QualifiedTableID) (dax.VersionedShards, bool, error) { - if s.shards == nil { - return nil, false, nil - } - - if shardNumMap, ok := s.shards[qtid.TableQualifier.Key()][qtid.ID]; ok { - rtn := make(dax.VersionedShards, 0, len(shardNumMap)) - for _, shard := range shardNumMap { - rtn = append(rtn, shard) - } - sort.Sort(rtn) - return rtn, true, nil - } - - return nil, false, nil -} - -// ShardVersion return the current version for the given table/shardNum. -// If a version is not being tracked, it returns a bool value of false. -func (s *VersionStore) ShardVersion(ctx context.Context, qtid dax.QualifiedTableID, shardNum dax.ShardNum) (int, bool, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - t, ok := s.shards[qtid.TableQualifier.Key()][qtid.ID] - if !ok { - return -1, false, nil - } - - v, ok := t[shardNum] - if !ok { - return -1, false, nil - } - return v.Version, true, nil -} - -func (s *VersionStore) ShardTables(ctx context.Context, qual dax.TableQualifier) (dax.TableIDs, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - qual.Key() - - tableIDs := make(dax.TableIDs, 0, len(s.shards[qual.Key()])) - - for tableID := range s.shards[qual.Key()] { - tableIDs = append(tableIDs, tableID) - } - - return tableIDs, nil -} - -// AddPartitions adds new partitions to be managed by VersionStore. It returns -// the number of partitions added or an error. -func (s *VersionStore) AddPartitions(ctx context.Context, qtid dax.QualifiedTableID, partitions ...dax.VersionedPartition) error { - s.mu.Lock() - defer s.mu.Unlock() - - tk, ok := s.tableKeys[qtid.TableQualifier.Key()][qtid.ID] - if !ok { - return dax.NewErrTableIDDoesNotExist(qtid) - } - - for _, partition := range partitions { - tk[partition.Num] = partition.Version - } - - return nil -} - -// Partitions returns the list of partitions available for the give table. It -// returns false if the table does not exist. -func (s *VersionStore) Partitions(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedPartitions, bool, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - return s.partitionSlice(qtid) -} - -// partitionSlice is an unprotected version of Partitions(). -func (s *VersionStore) partitionSlice(qtid dax.QualifiedTableID) (dax.VersionedPartitions, bool, error) { - if s.tableKeys == nil { - return nil, false, nil - } - - if partitionNumMap, ok := s.tableKeys[qtid.TableQualifier.Key()][qtid.ID]; ok { - rtn := make(dax.VersionedPartitions, 0, len(partitionNumMap)) - for partitionNum, version := range partitionNumMap { - rtn = append(rtn, dax.NewVersionedPartition(partitionNum, version)) - } - sort.Sort(rtn) - return rtn, true, nil - } - - return nil, false, nil -} - -// PartitionVersion return the current version for the given table/partitionNum. -// If a version is not being tracked, it returns a bool value of false. -func (s *VersionStore) PartitionVersion(ctx context.Context, qtid dax.QualifiedTableID, partitionNum dax.PartitionNum) (int, bool, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - t, ok := s.tableKeys[qtid.TableQualifier.Key()][qtid.ID] - if !ok { - return -1, false, nil - } - - v, ok := t[partitionNum] - if !ok { - return -1, false, nil - } - return v, true, nil -} - -func (s *VersionStore) PartitionTables(ctx context.Context, qual dax.TableQualifier) (dax.TableIDs, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - tableIDs := make(dax.TableIDs, 0, len(s.tableKeys[qual.Key()])) - - for tableName := range s.tableKeys[qual.Key()] { - tableIDs = append(tableIDs, tableName) - } - - return tableIDs, nil -} - -// AddFields adds new fields to be managed by VersionStore. It returns the -// number of fields added or an error. -func (s *VersionStore) AddFields(ctx context.Context, qtid dax.QualifiedTableID, fields ...dax.VersionedField) error { - s.mu.Lock() - defer s.mu.Unlock() - - fk, ok := s.fieldKeys[qtid.TableQualifier.Key()][qtid.ID] - if !ok { - return dax.NewErrTableIDDoesNotExist(qtid) - } - - for _, field := range fields { - fk[field.Name] = field.Version - } - - return nil -} - -// Fields returns the list of fields available for the give table. It returns -// false if the table does not exist. -func (s *VersionStore) Fields(ctx context.Context, qtid dax.QualifiedTableID) (dax.VersionedFields, bool, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - return s.fieldSlice(qtid) -} - -// fieldSlice is an unprotected version of Fields(). -func (s *VersionStore) fieldSlice(qtid dax.QualifiedTableID) (dax.VersionedFields, bool, error) { - if s.fieldKeys == nil { - return nil, false, nil - } - - if fieldNameMap, ok := s.fieldKeys[qtid.TableQualifier.Key()][qtid.ID]; ok { - rtn := make(dax.VersionedFields, 0, len(fieldNameMap)) - for fieldName, version := range fieldNameMap { - rtn = append(rtn, dax.NewVersionedField(fieldName, version)) - } - sort.Sort(rtn) - return rtn, true, nil - } - - return nil, false, nil -} - -// FieldVersion return the current version for the given table/field. -// If a version is not being tracked, it returns a bool value of false. -func (s *VersionStore) FieldVersion(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName) (int, bool, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - t, ok := s.fieldKeys[qtid.TableQualifier.Key()][qtid.ID] - if !ok { - return -1, false, nil - } - - v, ok := t[field] - if !ok { - return -1, false, nil - } - return v, true, nil -} - -func (s *VersionStore) FieldTables(ctx context.Context, qual dax.TableQualifier) (dax.TableIDs, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - tableIDs := make(dax.TableIDs, 0, len(s.fieldKeys[qual.Key()])) - - for tableID := range s.fieldKeys[qual.Key()] { - tableIDs = append(tableIDs, tableID) - } - - return tableIDs, nil -} - -// Copy returns a new copy of VersionStore. -func (s *VersionStore) Copy(ctx context.Context) (dax.VersionStore, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - new := NewVersionStore() - - // shards. - for qkey, tableIDs := range s.shards { - for tableID, shards := range tableIDs { - qual := dax.NewTableQualifier(qkey.OrganizationID(), qkey.DatabaseID()) - qtid := dax.NewQualifiedTableID(qual, tableID) - _ = new.AddTable(ctx, qtid) - for shardNum, shard := range shards { - new.shards[qual.Key()][tableID][shardNum] = shard - } - } - } - - // tableKeys. - for qkey, tableIDs := range s.tableKeys { - for tableID, partitions := range tableIDs { - qual := dax.NewTableQualifier(qkey.OrganizationID(), qkey.DatabaseID()) - qtid := dax.NewQualifiedTableID(qual, tableID) - _ = new.AddTable(ctx, qtid) - for partitionNum, version := range partitions { - new.tableKeys[qual.Key()][tableID][partitionNum] = version - } - } - } - - // fieldKeys. - for qkey, tableIDs := range s.fieldKeys { - for tableID, fields := range tableIDs { - qual := dax.NewTableQualifier(qkey.OrganizationID(), qkey.DatabaseID()) - qtid := dax.NewQualifiedTableID(qual, tableID) - _ = new.AddTable(ctx, qtid) - for field, version := range fields { - new.fieldKeys[qual.Key()][tableID][field] = version - } - } - } - - return new, nil -} diff --git a/dax/inmem/versionstore_test.go b/dax/inmem/versionstore_test.go deleted file mode 100644 index 3ccf39967..000000000 --- a/dax/inmem/versionstore_test.go +++ /dev/null @@ -1,166 +0,0 @@ -package inmem_test - -import ( - "context" - "testing" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/inmem" - "github.com/featurebasedb/featurebase/v3/errors" - "github.com/stretchr/testify/assert" -) - -func TestVersionStore(t *testing.T) { - orgID := dax.OrganizationID("acme") - dbID := dax.DatabaseID("db1") - - tableID := dax.TableID("0000000000000001") - - qual := dax.NewTableQualifier(orgID, dbID) - qtid := dax.NewQualifiedTableID(qual, tableID) - - invalidQtid := dax.NewQualifiedTableID(qual, dax.TableID("0000000000000000")) - - ctx := context.Background() - - // Ensure that when using a Schemar not initiated with NewSchemar, the error - // handling works as expected. - t.Run("EmptyVersionStore", func(t *testing.T) { - s := inmem.VersionStore{} - - t.Run("GetShardsInvalid", func(t *testing.T) { - sh, ok, err := s.Shards(ctx, invalidQtid) - assert.NoError(t, err) - assert.False(t, ok) - assert.Nil(t, sh) - }) - - // Add new table. - assert.NoError(t, s.AddTable(ctx, qtid)) - }) - - t.Run("NewVersionStore", func(t *testing.T) { - s := inmem.NewVersionStore() - - // Add new table. - assert.NoError(t, s.AddTable(ctx, qtid)) - - t.Run("AddTableAgain", func(t *testing.T) { - err := s.AddTable(ctx, qtid) - if assert.Error(t, err) { - assert.True(t, errors.Is(err, dax.ErrTableIDExists)) - } - }) - - t.Run("AddShards", func(t *testing.T) { - err := s.AddShards(ctx, invalidQtid, dax.NewVersionedShard(1, 0)) - if assert.Error(t, err) { - assert.True(t, errors.Is(err, dax.ErrTableIDDoesNotExist)) - } - - { - _, ok, err := s.Shards(ctx, invalidQtid) - assert.NoError(t, err) - assert.False(t, ok) - } - - // Shards is empty if no shards have been added. - { - sh, ok, err := s.Shards(ctx, qtid) - assert.NoError(t, err) - assert.True(t, ok) - assert.Equal(t, sh, dax.VersionedShards{}) - } - - // Add the first set of shards (with a duplicate (8)). - { - err := s.AddShards(ctx, qtid, - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(9, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(10, 0), - ) - assert.NoError(t, err) - } - - { - sh, ok, err := s.Shards(ctx, qtid) - assert.NoError(t, err) - assert.True(t, ok) - assert.Equal(t, dax.VersionedShards{ - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(9, 0), - dax.NewVersionedShard(10, 0), - }, sh) - } - - // Add another set of shards (with one duplicate (11) and one - // existing (10)). - { - err := s.AddShards(ctx, qtid, - dax.NewVersionedShard(10, 0), - dax.NewVersionedShard(11, 0), - dax.NewVersionedShard(12, 0), - dax.NewVersionedShard(11, 0), - ) - assert.NoError(t, err) - } - - { - sh, ok, err := s.Shards(ctx, qtid) - assert.NoError(t, err) - assert.True(t, ok) - assert.Equal(t, dax.VersionedShards{ - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(9, 0), - dax.NewVersionedShard(10, 0), - dax.NewVersionedShard(11, 0), - dax.NewVersionedShard(12, 0), - }, sh) - } - }) - - t.Run("RemoveTable", func(t *testing.T) { - shards, partitions, err := s.RemoveTable(ctx, qtid) - assert.NoError(t, err) - assert.Equal(t, dax.VersionedPartitions{}, partitions) - assert.Equal(t, dax.VersionedShards{ - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(9, 0), - dax.NewVersionedShard(10, 0), - dax.NewVersionedShard(11, 0), - dax.NewVersionedShard(12, 0), - }, shards) - - // Make sure the table was removed. - shards, ok, err := s.Shards(ctx, qtid) - assert.NoError(t, err) - assert.False(t, ok) - assert.Nil(t, shards) - }) - }) - - t.Run("ErrorConditions", func(t *testing.T) { - t.Run("JustSchemar", func(t *testing.T) { - s := inmem.VersionStore{} - - shards, partitions, err := s.RemoveTable(ctx, qtid) - assert.Nil(t, shards) - assert.Nil(t, partitions) - if assert.Error(t, err) { - assert.True(t, errors.Is(err, dax.ErrTableIDDoesNotExist)) - } - }) - - t.Run("NewSchemar", func(t *testing.T) { - s := inmem.NewVersionStore() - - shards, partitions, err := s.RemoveTable(ctx, qtid) - assert.Nil(t, shards) - assert.Nil(t, partitions) - if assert.Error(t, err) { - assert.True(t, errors.Is(err, dax.ErrTableIDDoesNotExist)) - } - }) - }) -} diff --git a/dax/mds/client/client.go b/dax/mds/client/client.go index b87b057b0..d105d1f1a 100644 --- a/dax/mds/client/client.go +++ b/dax/mds/client/client.go @@ -10,7 +10,6 @@ import ( "net/http" "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/mds/controller" mdshttp "github.com/featurebasedb/featurebase/v3/dax/mds/http" "github.com/featurebasedb/featurebase/v3/errors" "github.com/featurebasedb/featurebase/v3/logger" @@ -49,6 +48,20 @@ func (c *Client) Health() bool { return true } +// TODO(tlt): collapse Table into this +func (c *Client) TableByID(ctx context.Context, qtid dax.QualifiedTableID) (*dax.QualifiedTable, error) { + return c.Table(ctx, qtid) +} + +// TODO(tlt): collapse TableID into this +func (c *Client) TableByName(ctx context.Context, qual dax.TableQualifier, tname dax.TableName) (*dax.QualifiedTable, error) { + qtid, err := c.TableID(ctx, qual, tname) + if err != nil { + return nil, errors.Wrap(err, "getting table id") + } + return c.Table(ctx, qtid) +} + func (c *Client) Table(ctx context.Context, qtid dax.QualifiedTableID) (*dax.QualifiedTable, error) { url := fmt.Sprintf("%s/table", c.address.WithScheme(defaultScheme)) @@ -336,11 +349,11 @@ func (c *Client) IngestPartition(ctx context.Context, qtid dax.QualifiedTableID, return isr.Address, nil } -func (c *Client) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.ShardNum) ([]controller.ComputeNode, error) { +func (c *Client) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.ShardNum) ([]dax.ComputeNode, error) { url := fmt.Sprintf("%s/compute-nodes", c.address.WithScheme(defaultScheme)) c.logger.Debugf("ComputeNodes url: %s", url) - var nodes []controller.ComputeNode + var nodes []dax.ComputeNode req := &mdshttp.ComputeNodesRequest{ Table: qtid, @@ -374,11 +387,11 @@ func (c *Client) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, sh return cnr.ComputeNodes, nil } -func (c *Client) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitions ...dax.PartitionNum) ([]controller.TranslateNode, error) { +func (c *Client) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitions ...dax.PartitionNum) ([]dax.TranslateNode, error) { url := fmt.Sprintf("%s/translate-nodes", c.address.WithScheme(defaultScheme)) c.logger.Debugf("TranslateNodes url: %s", url) - var nodes []controller.TranslateNode + var nodes []dax.TranslateNode req := &mdshttp.TranslateNodesRequest{ Table: qtid, @@ -414,7 +427,7 @@ func (c *Client) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, func (c *Client) RegisterNode(ctx context.Context, node *dax.Node) error { url := fmt.Sprintf("%s/register-node", c.address.WithScheme(defaultScheme)) - c.logger.Debugf("RegisterNode url: %s", url) + c.logger.Debugf("RegisterNode: %s, url: %s", node.Address, url) req := &mdshttp.RegisterNodeRequest{ Address: node.Address, @@ -437,7 +450,7 @@ func (c *Client) RegisterNode(ctx context.Context, node *dax.Node) error { if resp.StatusCode != http.StatusOK { b, _ := io.ReadAll(resp.Body) - return errors.Errorf("status code: %d: %s", resp.StatusCode, b) + return errors.Errorf("registration request to %s status code: %d: %s", url, resp.StatusCode, b) } return nil diff --git a/dax/mds/controller/alpha/director.go b/dax/mds/controller/alpha/director.go deleted file mode 100644 index b7caa4903..000000000 --- a/dax/mds/controller/alpha/director.go +++ /dev/null @@ -1,125 +0,0 @@ -// Package alpha contains inter-service implemenations of interfaces. -package alpha - -import ( - "context" - "encoding/json" - - featurebase "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/mds/controller" - "github.com/featurebasedb/featurebase/v3/errors" - featurebaseserver "github.com/featurebasedb/featurebase/v3/server" -) - -// Ensure type implements interface. -var _ controller.Director = (*Director)(nil) - -// Director is a direct, service-to-service implementation of the Director -// interface. -type Director struct { - computers map[dax.Address]*featurebaseserver.Command -} - -func NewDirector() *Director { - return &Director{ - computers: make(map[dax.Address]*featurebaseserver.Command), - } -} - -func (d *Director) AddCmd(addr dax.Address, cmd *featurebaseserver.Command) error { - if cmd == nil { - return errors.New(errors.ErrUncoded, "cannot add nil cmd to director") - } - d.computers[addr] = cmd - return nil -} - -func (d *Director) api(addr dax.Address) (*featurebase.API, error) { - cmd, found := d.computers[addr] - if !found { - // Address not registered with the Director. - return nil, errors.New(errors.ErrUncoded, "cmd not registered with director") - } - - api := cmd.API - if api == nil { - // Command does not have an API. - return nil, errors.New(errors.ErrUncoded, "cmd does not have an api") - } - - return api, nil -} - -func (d *Director) SendDirective(ctx context.Context, dir *dax.Directive) error { - api, err := d.api(dir.Address) - if err != nil { - return errors.Wrap(err, "getting api from director") - } - - ndir, err := marshalUnmarshal(dir) - if err != nil { - return errors.Wrap(err, "marshalUnmarshal") - } - - return api.Directive(ctx, ndir) -} - -func (d *Director) SendSnapshotShardDataRequest(ctx context.Context, req *dax.SnapshotShardDataRequest) error { - api, err := d.api(req.Address) - if err != nil { - return errors.Wrap(err, "getting api from director") - } - - nreq, err := marshalUnmarshal(req) - if err != nil { - return errors.Wrap(err, "marshalUnmarshal") - } - - return api.SnapshotShardData(ctx, nreq) -} - -func (d *Director) SendSnapshotTableKeysRequest(ctx context.Context, req *dax.SnapshotTableKeysRequest) error { - api, err := d.api(req.Address) - if err != nil { - return errors.Wrap(err, "getting api from director") - } - - nreq, err := marshalUnmarshal(req) - if err != nil { - return errors.Wrap(err, "marshalUnmarshal") - } - - return api.SnapshotTableKeys(ctx, nreq) -} - -func (d *Director) SendSnapshotFieldKeysRequest(ctx context.Context, req *dax.SnapshotFieldKeysRequest) error { - api, err := d.api(req.Address) - if err != nil { - return errors.Wrap(err, "getting api from director") - } - - nreq, err := marshalUnmarshal(req) - if err != nil { - return errors.Wrap(err, "marshalUnmarshal") - } - - return api.SnapshotFieldKeys(ctx, nreq) -} - -// marshalUnmarshal simply marshals anything to json, and then -// unmarshals it. This might seem a bit silly. The reason it exists is -// to exercise the same encode/decode logic that we'd need to if we -// were traversing the network, and guarantee that we aren't sharing -// pointers across API boundaries. -func marshalUnmarshal[K any](a K) (K, error) { - var newA K - abytes, err := json.Marshal(a) - if err != nil { - return newA, errors.Wrap(err, "marshaling directive") - } - if err := json.Unmarshal(abytes, &newA); err != nil { - return newA, errors.Wrap(err, "unmarshaling directive") - } - return newA, nil -} diff --git a/dax/mds/controller/balancer.go b/dax/mds/controller/balancer.go index 0736eddb4..a6402c6fb 100644 --- a/dax/mds/controller/balancer.go +++ b/dax/mds/controller/balancer.go @@ -28,6 +28,11 @@ type Balancer interface { // which are not assigned to any worker, that means the query // would return incomplete data, so we want to error. WorkersForJobPrefix(ctx context.Context, prefix string) ([]dax.WorkerInfo, error) + + // RemoveJobs is for e.g. when dropping a table remove all jobs + // associated with that table without needing to look up in + // advance which shards or partitions are actually present. + RemoveJobs(ctx context.Context, prefix string) ([]dax.WorkerDiff, error) } // Ensure type implements interface. @@ -67,3 +72,7 @@ func (b *NopBalancer) WorkersForJobs(ctx context.Context, jobs []dax.Job) ([]dax func (b *NopBalancer) WorkersForJobPrefix(ctx context.Context, prefix string) ([]dax.WorkerInfo, error) { return []dax.WorkerInfo{}, nil } + +func (b *NopBalancer) RemoveJobs(ctx context.Context, prefix string) ([]dax.WorkerDiff, error) { + return nil, nil +} diff --git a/dax/mds/controller/config.go b/dax/mds/controller/config.go index b9247a936..1d77fc73c 100644 --- a/dax/mds/controller/config.go +++ b/dax/mds/controller/config.go @@ -25,5 +25,12 @@ type Config struct { // have been registered. RegistrationBatchTimeout time.Duration + // SnappingTurtleTimeout is the period on which the automatic + // snapshotting routine will run. If performing all the snapshots + // takes longer than this amount of time, snapshotting will run + // continuously. If it finishes before the timeout, it will wait + // until the timeout expires to start another round of snapshots. + SnappingTurtleTimeout time.Duration + Logger logger.Logger } diff --git a/dax/mds/controller/controller.go b/dax/mds/controller/controller.go index 0ca89272a..df71b5bda 100644 --- a/dax/mds/controller/controller.go +++ b/dax/mds/controller/controller.go @@ -22,9 +22,6 @@ type Controller struct { // initialization. mu sync.RWMutex - // versionStore - versionStore dax.VersionStore - // Schemar used by the controller to get table information. The controller // should NOT call Schemar methods which modify data. Schema mutations are // made outside of the controller (at this point that happens in MDS). @@ -47,6 +44,8 @@ type Controller struct { registrationBatchTimeout time.Duration nodeChan chan *dax.Node + snappingTurtleTimeout time.Duration + snapControl chan struct{} stopping chan struct{} logger logger.Logger @@ -69,10 +68,13 @@ func New(cfg Config) *Controller { poller: dax.NewNopAddressManager(), - logger: logger.NopLogger, + registrationBatchTimeout: cfg.RegistrationBatchTimeout, + nodeChan: make(chan *dax.Node, 10), + snappingTurtleTimeout: cfg.SnappingTurtleTimeout, + snapControl: make(chan struct{}), - nodeChan: make(chan *dax.Node, 10), stopping: make(chan struct{}), + logger: logger.NopLogger, } if cfg.Logger != nil { @@ -81,11 +83,6 @@ func New(cfg Config) *Controller { switch cfg.StorageMethod { case "boltdb": - if err := cfg.BoltDB.InitializeBuckets(boltdb.VersionStoreBuckets...); err != nil { - c.logger.Panicf("initializing version store buckets: %v", err) - } - c.versionStore = boltdb.NewVersionStore(cfg.BoltDB, c.logger) - if err := cfg.BoltDB.InitializeBuckets(boltdb.NodeServiceBuckets...); err != nil { c.logger.Panicf("initializing node service buckets: %v", err) } @@ -106,14 +103,13 @@ func New(cfg Config) *Controller { c.Schemar = cfg.Schemar } - c.registrationBatchTimeout = cfg.RegistrationBatchTimeout - return c } -// Run starts the node registration goroutine. +// Run starts long running subroutines. func (c *Controller) Run() error { go c.nodeRegistrationRoutine(c.nodeChan, c.registrationBatchTimeout) + go c.snappingTurtleRoutine(c.snappingTurtleTimeout, c.snapControl) return nil } @@ -518,12 +514,6 @@ func (c *Controller) nodesTranslateReadOrWrite(ctx context.Context, role *dax.Tr for _, diff := range diffs { workerSet.Add(dax.Address(diff.WorkerID)) } - - // Initialize the partition version to 0. - qtid := j.table().QualifiedTableID() - if err := c.versionStore.AddPartitions(ctx, qtid, dax.NewVersionedPartition(j.partitionNum(), 0)); err != nil { - return nil, false, NewErrInternal(err.Error()) - } } // Convert the slice of addresses into a slice of addressMethod containing @@ -544,26 +534,14 @@ func (c *Controller) nodesTranslateReadOrWrite(ctx context.Context, role *dax.Tr for _, worker := range workers { // covert worker.Jobs []string to map[string][]Partition - translateMap := make(map[dax.TableKey]dax.VersionedPartitions) + translateMap := make(map[dax.TableKey]dax.PartitionNums) for _, job := range worker.Jobs { j, err := decodePartition(job) if err != nil { return nil, false, NewErrInternal(err.Error()) } - // Get the partition version from the local versionStore. - tkey := j.table() - qtid := tkey.QualifiedTableID() - partitionVersion, found, err := c.versionStore.PartitionVersion(ctx, qtid, j.partitionNum()) - if err != nil { - return nil, false, err - } else if !found { - return nil, false, NewErrInternal("partition version not found in cache") - } - - translateMap[tkey] = append(translateMap[tkey], - dax.NewVersionedPartition(j.partitionNum(), partitionVersion), - ) + translateMap[j.table()] = append(translateMap[j.table()], j.partitionNum()) } for table, partitions := range translateMap { @@ -705,12 +683,6 @@ func (c *Controller) nodesComputeReadOrWrite(ctx context.Context, role *dax.Comp for _, diff := range diffs { workerSet.Add(dax.Address(diff.WorkerID)) } - - // Initialize the shard version to 0. - qtid := j.table().QualifiedTableID() - if err := c.versionStore.AddShards(ctx, qtid, dax.NewVersionedShard(j.shardNum(), 0)); err != nil { - return nil, false, NewErrInternal(err.Error()) - } } // Convert the slice of addresses into a slice of addressMethod containing @@ -737,26 +709,14 @@ func (c *Controller) workersToAssignedNodes(ctx context.Context, workers []dax.W nodes := []dax.AssignedNode{} for _, worker := range workers { // convert worker.Jobs []string to map[TableName][]Shard - computeMap := make(map[dax.TableKey]dax.VersionedShards) + computeMap := make(map[dax.TableKey]dax.ShardNums) for _, job := range worker.Jobs { j, err := decodeShard(job) if err != nil { return nil, NewErrInternal(err.Error()) } - // Get the shard version from the local versionStore. - tkey := j.table() - qtid := tkey.QualifiedTableID() - shardVersion, found, err := c.versionStore.ShardVersion(ctx, qtid, j.shardNum()) - if err != nil { - return nil, err - } else if !found { - return nil, NewErrInternal("shard version not found in cache") - } - - computeMap[tkey] = append(computeMap[tkey], - dax.NewVersionedShard(j.shardNum(), shardVersion), - ) + computeMap[j.table()] = append(computeMap[j.table()], j.shardNum()) } for table, shards := range computeMap { @@ -781,31 +741,6 @@ func (c *Controller) CreateTable(ctx context.Context, qtbl *dax.QualifiedTable) c.mu.Lock() defer c.mu.Unlock() - qtid := qtbl.QualifiedID() - - // Add the table to the versionStore. - if err := c.versionStore.AddTable(ctx, qtid); err != nil { - return errors.Wrapf(err, "adding table: %s", qtid) - } - - // Add fields which have string keys to the local versionStore. - fieldVersions := make(dax.VersionedFields, 0) - for _, field := range qtbl.Fields { - if !field.StringKeys() { - continue - } - - fieldVersions = append(fieldVersions, dax.VersionedField{ - Name: field.Name, - Version: 0, - }) - } - if len(fieldVersions) > 0 { - if err := c.versionStore.AddFields(ctx, qtid, fieldVersions...); err != nil { - return errors.Wrapf(err, "adding fields: %s, %v", qtid, fieldVersions) - } - } - // If the table is keyed, add partitions to the balancer. if qtbl.StringKeys() { // workerSet maintains the set of workers which have a job assignment change @@ -813,15 +748,9 @@ func (c *Controller) CreateTable(ctx context.Context, qtbl *dax.QualifiedTable) workerSet := NewAddressSet() // Generate the list of partitionsToAdd to be added. - partitionsToAdd := make(dax.VersionedPartitions, qtbl.PartitionN) + partitionsToAdd := make(dax.PartitionNums, qtbl.PartitionN) for partitionNum := 0; partitionNum < qtbl.PartitionN; partitionNum++ { - partitionsToAdd[partitionNum] = dax.NewVersionedPartition(dax.PartitionNum(partitionNum), 0) - } - - // Add partitions to versionStore. Version is intentionally set to 0 - // here as this is the initial instance of the partition. - if err := c.versionStore.AddPartitions(ctx, qtid, partitionsToAdd...); err != nil { - return NewErrInternal(err.Error()) + partitionsToAdd[partitionNum] = dax.PartitionNum(partitionNum) } stringers := make([]fmt.Stringer, 0, len(partitionsToAdd)) @@ -856,13 +785,7 @@ func (c *Controller) CreateTable(ctx context.Context, qtbl *dax.QualifiedTable) // and therefore need to be sent an updated Directive. workerSet := NewAddressSet() - p := dax.NewVersionedPartition(0, 0) - - // Add partition 0 to versionStore. Version is intentionally set to 0 - // here as this is the initial instance of the partition. - if err := c.versionStore.AddPartitions(ctx, qtid, p); err != nil { - return NewErrInternal(err.Error()) - } + p := dax.PartitionNum(0) // We don't currently use the returned diff, other than to determine // which worker was affected, because we send the full Directive @@ -898,39 +821,24 @@ func (c *Controller) DropTable(ctx context.Context, qtid dax.QualifiedTableID) e return errors.Wrapf(err, "table not in schemar: %s", qtid) } - // Remove the table from the versionStore. - // Since the schemar should be the system of record for the existence of a - // table, if the versionStore is not aware of the table, we just log it and - // continue. - shards, partitions, err := c.versionStore.RemoveTable(ctx, qtid) - if err != nil { - return errors.Wrapf(err, "removing table: %s", qtid) - } - // workerSet maintains the set of workers which have a job assignment change // and therefore need to be sent an updated Directive. workerSet := NewAddressSet() - // Remove shards. - for _, s := range shards { - diffs, err := c.ComputeBalancer.RemoveJob(ctx, shard(qtid.Key(), s)) - if err != nil { - return errors.Wrap(err, "removing job") - } - for _, diff := range diffs { - workerSet.Add(dax.Address(diff.WorkerID)) - } + diffs, err := c.ComputeBalancer.RemoveJobs(ctx, string(qtid.Key())) + if err != nil { + return errors.Wrap(err, "removing jobs") + } + for _, diff := range diffs { + workerSet.Add(dax.Address(diff.WorkerID)) } - // Remove partitions. - for _, p := range partitions { - diffs, err := c.TranslateBalancer.RemoveJob(ctx, partition(qtid.Key(), p)) - if err != nil { - return errors.Wrap(err, "removing job") - } - for _, diff := range diffs { - workerSet.Add(dax.Address(diff.WorkerID)) - } + diffs, err = c.TranslateBalancer.RemoveJobs(ctx, string(qtid.Key())) + if err != nil { + return errors.Wrap(err, "removing job") + } + for _, diff := range diffs { + workerSet.Add(dax.Address(diff.WorkerID)) } // Convert the slice of addresses into a slice of addressMethod containing @@ -964,15 +872,10 @@ func (c *Controller) Tables(ctx context.Context, qual dax.TableQualifier, ids .. // AddShards registers the table/shard combinations with the controller and // sends the necessary directive. -func (c *Controller) AddShards(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.VersionedShard) error { +func (c *Controller) AddShards(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.ShardNum) error { c.mu.Lock() defer c.mu.Unlock() - // Add shards to versionStore. - if err := c.versionStore.AddShards(ctx, qtid, shards...); err != nil { - return errors.Wrapf(err, "adding shards: %s, %v", qtid, shards) - } - // workerSet maintains the set of workers which have a job assignment change // and therefore need to be sent an updated Directive. workerSet := NewAddressSet() @@ -988,11 +891,6 @@ func (c *Controller) AddShards(ctx context.Context, qtid dax.QualifiedTableID, s for _, diff := range diffs { workerSet.Add(dax.Address(diff.WorkerID)) } - - // Initialize the shard version to 0. - if err := c.versionStore.AddShards(ctx, qtid, dax.NewVersionedShard(s.Num, 0)); err != nil { - return NewErrInternal(err.Error()) - } } // Convert the slice of addresses into a slice of addressMethod containing @@ -1008,7 +906,7 @@ func (c *Controller) AddShards(ctx context.Context, qtid dax.QualifiedTableID, s // RemoveShards deregisters the table/shard combinations with the controller and // sends the necessary directives. -func (c *Controller) RemoveShards(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.VersionedShard) error { +func (c *Controller) RemoveShards(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.ShardNum) error { c.mu.Lock() defer c.mu.Unlock() @@ -1048,7 +946,7 @@ func (c *Controller) sendDirectives(ctx context.Context, addrs ...addressMethod) return nil } - directives, err := c.buildDirectives(ctx, addrs, c.versionStore) + directives, err := c.buildDirectives(ctx, addrs) if err != nil { return errors.Wrap(err, "building directives") } @@ -1118,7 +1016,7 @@ func applyAddressMethod(addrs []dax.Address, method dax.DirectiveMethod) []addre // buildDirectives builds a list of directives for the given addrs (i.e. nodes) // using information (i.e. current state) from the balancers. -func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, versionStore dax.VersionStore) ([]*dax.Directive, error) { +func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod) ([]*dax.Directive, error) { directives := make([]*dax.Directive, len(addrs)) for i, addressMethod := range addrs { @@ -1139,12 +1037,12 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, // computeMap maps a table to a list of shards for that table. We need // to aggregate them here because the list of jobs from WorkerState() // can contain a mixture of table/shards. - computeMap := make(map[dax.TableKey][]dax.VersionedShard) + computeMap := make(map[dax.TableKey][]dax.ShardNum) // translateMap maps a table to a list of partitions for that table. We // need to aggregate them here because the list of jobs from // WorkerState() can contain a mixture of table/partitions. - translateMap := make(map[dax.TableKey]dax.VersionedPartitions) + translateMap := make(map[dax.TableKey][]dax.PartitionNum) // tableSet maintains the set of tables which have a job assignment // change and therefore need to be included in the Directive schema. @@ -1172,24 +1070,8 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, return nil, errors.Wrapf(err, "decoding shard job: %s", job) } - // The Shard object decoded from the balancer doesn't - // contain a valid version (because the balancer - // intentionally does not store version information). Here, - // we get the current shard version from the controller's - // cache (i.e. versionStore) and inject that into the Shard - // sent in the directive. tkey := j.table() - qtid := tkey.QualifiedTableID() - shardVersion, found, err := versionStore.ShardVersion(ctx, qtid, j.shardNum()) - if err != nil { - return nil, errors.Wrapf(err, "getting shard version: %s, %d", qtid, j.shardNum()) - } else if !found { - return nil, NewErrInternal("shard version not found in cache") - } - - computeMap[tkey] = append(computeMap[tkey], - dax.NewVersionedShard(j.shardNum(), shardVersion), - ) + computeMap[tkey] = append(computeMap[tkey], j.shardNum()) tableSet.Add(tkey) } case dax.RoleTypeTranslate: @@ -1207,24 +1089,8 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, ownsPartition0[j.table()] = struct{}{} } - // The Partition object decoded from the balancer doesn't - // contain a valid version (because the balancer - // intentionally does not store version information). Here, - // we get the current partition version from the - // controller's cache (i.e. versionStore) and inject that - // into the Partition sent in the directive. tkey := j.table() - qtid := tkey.QualifiedTableID() - partitionVersion, found, err := versionStore.PartitionVersion(ctx, qtid, j.partitionNum()) - if err != nil { - return nil, errors.Wrapf(err, "getting partition version: %s, %d", qtid, j.partitionNum()) - } else if !found { - return nil, NewErrInternal("partition version not found in cache") - } - - translateMap[tkey] = append(translateMap[tkey], - dax.NewVersionedPartition(j.partitionNum(), partitionVersion), - ) + translateMap[tkey] = append(translateMap[tkey], j.partitionNum()) tableSet.Add(tkey) } } @@ -1235,7 +1101,7 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, // Because these were encoded as strings in the balancer and may be // out of order numerically, sort them as integers. //sort.Slice(v, func(i, j int) bool { return v[i] < v[j] }) - sort.Sort(dax.VersionedShards(v)) + sort.Sort(dax.ShardNums(v)) d.ComputeRoles = append(d.ComputeRoles, dax.ComputeRole{ TableKey: k, @@ -1247,7 +1113,7 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, for k, v := range translateMap { // Because these were encoded as strings in the balancer and may be // out of order numerically, sort them as integers. - sort.Sort(v) + sort.Sort(dax.PartitionNums(v)) d.TranslateRoles = append(d.TranslateRoles, dax.TranslateRole{ TableKey: k, @@ -1269,7 +1135,7 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, return nil, errors.Wrapf(err, "getting table: %s", tkey) } - fieldVersions := make(dax.VersionedFields, 0) + fieldNames := make([]dax.FieldName, 0) for _, field := range table.Fields { if !field.StringKeys() { continue @@ -1280,26 +1146,16 @@ func (c *Controller) buildDirectives(ctx context.Context, addrs []addressMethod, continue } - fieldVersion, found, err := versionStore.FieldVersion(ctx, qtid, field.Name) - if err != nil { - return nil, errors.Wrapf(err, "getting field version: %s, %s", qtid, field) - } else if !found { - return nil, NewErrInternal("field version not found in cache") - } - - fieldVersions = append(fieldVersions, dax.VersionedField{ - Name: field.Name, - Version: fieldVersion, - }) + fieldNames = append(fieldNames, field.Name) } - if len(fieldVersions) == 0 { + if len(fieldNames) == 0 { continue } d.TranslateRoles = append(d.TranslateRoles, dax.TranslateRole{ TableKey: tkey, - Fields: fieldVersions, + Fields: fieldNames, }) tableSet.Add(tkey) @@ -1352,48 +1208,10 @@ func (c *Controller) InitializePoller(ctx context.Context) error { return nil } -// SnapshotTable snapshots a table. +// SnapshotTable snapshots a table. It might also snapshot everything +// else... no guarantees here, only used in tests as of this writing. func (c *Controller) SnapshotTable(ctx context.Context, qtid dax.QualifiedTableID) error { - shards, ok, err := c.versionStore.Shards(ctx, qtid) - if err != nil { - return errors.Wrap(err, "getting shards from version store") - } else if !ok { - return errors.New(errors.ErrUncoded, "got false back from versionStore.Shards") - } - - for _, shard := range shards { - if err := c.SnapshotShardData(ctx, qtid, shard.Num); err != nil { - return errors.Wrapf(err, "snapshotting shard data: qtid: %s, shard: %d", qtid, shard.Num) - } - } - - partitions, ok, err := c.versionStore.Partitions(ctx, qtid) - if err != nil { - return errors.Wrap(err, "getting partitions from version store") - } else if !ok { - return errors.New(errors.ErrUncoded, "got false back from versionStore.Partitions") - } - - for _, part := range partitions { - if err := c.SnapshotTableKeys(ctx, qtid, part.Num); err != nil { - return errors.Wrapf(err, "snapshotting table keys: qtid: %s, partition: %d", qtid, part.Num) - } - } - - fields, ok, err := c.versionStore.Fields(ctx, qtid) - if err != nil { - return errors.Wrap(err, "getting fields from version store") - } else if !ok { - return errors.New(errors.ErrUncoded, "got false back from versionStore.Fields") - } - - for _, fld := range fields { - if fld.Name != "_id" { - if err := c.SnapshotFieldKeys(ctx, qtid, fld.Name); err != nil { - return errors.Wrapf(err, "snapshotting field keys: qtid: %s, field: %s", qtid, fld.Name) - } - } - } + c.snapControl <- struct{}{} return nil } @@ -1401,22 +1219,9 @@ func (c *Controller) SnapshotTable(ctx context.Context, qtid dax.QualifiedTableI // snapshot that shard, then increment its shard version for logs written to the // WriteLogger. func (c *Controller) SnapshotShardData(ctx context.Context, qtid dax.QualifiedTableID, shardNum dax.ShardNum) error { - // Confirm table/shard is being tracked; get the current shard. - fromShardVersion, ok, err := c.versionStore.ShardVersion(ctx, qtid, shardNum) - if err != nil { - return errors.Wrapf(err, "getting shard version: %s, %d", qtid, shardNum) - } else if !ok { - return NewErrInternal( - fmt.Sprintf("shard to snapshot not found: %s, %d", qtid, shardNum), - ) - } - toShardVersion := fromShardVersion + 1 - // Get the node responsible for the shard. bal := c.ComputeBalancer - - job := shard(qtid.Key(), dax.NewVersionedShard(shardNum, -1)) - + job := shard(qtid.Key(), shardNum) workers, err := bal.WorkersForJobs(ctx, []dax.Job{dax.Job(job.String())}) if err != nil { return errors.Wrapf(err, "getting workers for jobs: %s", job) @@ -1428,54 +1233,17 @@ func (c *Controller) SnapshotShardData(ctx context.Context, qtid dax.QualifiedTa addr := dax.Address(workers[0].ID) - // Make a copy of the controller's versionStore, and update the current - // shard so that the directive sent along with the SnapshotRequest reflects - // the state that we want after a successful snapshot. - versionStoreCopy, err := c.versionStore.Copy(ctx) - if err != nil { - return errors.Wrap(err, "copying version store") - } - if err := versionStoreCopy.AddShards(ctx, qtid, - dax.NewVersionedShard(shardNum, toShardVersion), - ); err != nil { - return NewErrInternal(err.Error()) - } - - // Convert the address into a slice of addressMethod containing the - // appropriate method. - addressMethods := applyAddressMethod([]dax.Address{addr}, dax.DirectiveMethodSnapshot) - - var toDirective dax.Directive - if directives, err := c.buildDirectives(ctx, addressMethods, versionStoreCopy); err != nil { - return NewErrInternal(err.Error()) - } else if ld := len(directives); ld != 1 { - msg := fmt.Sprintf("buildDirectives returned invalid number of directives: %d", ld) - return NewErrInternal(msg) - } else { - toDirective = *directives[0] - } - // Send the node a snapshot request. req := &dax.SnapshotShardDataRequest{ - Address: addr, - TableKey: qtid.Key(), - ShardNum: shardNum, - FromVersion: fromShardVersion, - ToVersion: toShardVersion, - Directive: toDirective, + Address: addr, + TableKey: qtid.Key(), + ShardNum: shardNum, } if err := c.Director.SendSnapshotShardDataRequest(ctx, req); err != nil { return NewErrInternal(err.Error()) } - // A successful request means the shard version can be incremented. - if err := c.versionStore.AddShards(ctx, qtid, - dax.NewVersionedShard(shardNum, toShardVersion), - ); err != nil { - return NewErrInternal(err.Error()) - } - return nil } @@ -1483,22 +1251,9 @@ func (c *Controller) SnapshotShardData(ctx context.Context, qtid dax.QualifiedTa // partition to snapshot the table keys for that partition, then increment its // version for logs written to the WriteLogger. func (c *Controller) SnapshotTableKeys(ctx context.Context, qtid dax.QualifiedTableID, partitionNum dax.PartitionNum) error { - // Confirm table/shard is being tracked; get the current shard. - fromPartitionVersion, found, err := c.versionStore.PartitionVersion(ctx, qtid, partitionNum) - if err != nil { - return errors.Wrapf(err, "getting partition version: %s, %d", qtid, partitionNum) - } else if !found { - return NewErrInternal( - fmt.Sprintf("partition to snapshot not found: %s, %d", qtid, partitionNum), - ) - } - toPartitionVersion := fromPartitionVersion + 1 - // Get the node responsible for the partition. bal := c.TranslateBalancer - - job := partition(qtid.Key(), dax.NewVersionedPartition(partitionNum, -1)) - + job := partition(qtid.Key(), partitionNum) workers, err := bal.WorkersForJobs(ctx, []dax.Job{dax.Job(job.String())}) if err != nil { return errors.Wrapf(err, "getting workers for jobs: %s", job) @@ -1510,54 +1265,17 @@ func (c *Controller) SnapshotTableKeys(ctx context.Context, qtid dax.QualifiedTa addr := dax.Address(workers[0].ID) - // Make a copy of the controller's versionStore, and update the current - // partition so that the directive sent along with the SnapshotRequest - // reflects the state that we want after a successful snapshot. - versionStoreCopy, err := c.versionStore.Copy(ctx) - if err != nil { - return errors.Wrap(err, "copying version store") - } - if err := versionStoreCopy.AddPartitions(ctx, qtid, - dax.NewVersionedPartition(partitionNum, toPartitionVersion), - ); err != nil { - return NewErrInternal(err.Error()) - } - - // Convert the address into a slice of addressMethod containing the - // appropriate method. - addressMethods := applyAddressMethod([]dax.Address{addr}, dax.DirectiveMethodSnapshot) - - var toDirective dax.Directive - if directives, err := c.buildDirectives(ctx, addressMethods, versionStoreCopy); err != nil { - return NewErrInternal(err.Error()) - } else if ld := len(directives); ld != 1 { - msg := fmt.Sprintf("buildDirectives returned invalid number of directives: %d", ld) - return NewErrInternal(msg) - } else { - toDirective = *directives[0] - } - // Send the node a snapshot request. req := &dax.SnapshotTableKeysRequest{ Address: addr, TableKey: qtid.Key(), PartitionNum: partitionNum, - FromVersion: fromPartitionVersion, - ToVersion: toPartitionVersion, - Directive: toDirective, } if err := c.Director.SendSnapshotTableKeysRequest(ctx, req); err != nil { return NewErrInternal(err.Error()) } - // A successful request means the partition version can be incremented. - if err := c.versionStore.AddPartitions(ctx, qtid, - dax.NewVersionedPartition(partitionNum, toPartitionVersion), - ); err != nil { - return NewErrInternal(err.Error()) - } - return nil } @@ -1565,23 +1283,11 @@ func (c *Controller) SnapshotTableKeys(ctx context.Context, qtid dax.QualifiedTa // to snapshot the keys for that field, then increment its version for logs // written to the WriteLogger. func (c *Controller) SnapshotFieldKeys(ctx context.Context, qtid dax.QualifiedTableID, field dax.FieldName) error { - // Confirm table/field is being tracked; get the current field. - fromFieldVersion, ok, err := c.versionStore.FieldVersion(ctx, qtid, field) - if err != nil { - return errors.Wrapf(err, "getting field version: %s, %s", qtid, field) - } else if !ok { - return NewErrInternal( - fmt.Sprintf("field to snapshot not found: %s, %s", qtid, field), - ) - } - toFieldVersion := fromFieldVersion + 1 - // Get the node responsible for the field. bal := c.TranslateBalancer - // Field translation is currently handled by partition 0. partitionNum := dax.PartitionNum(0) - job := partition(qtid.Key(), dax.NewVersionedPartition(partitionNum, -1)) + job := partition(qtid.Key(), partitionNum) workers, err := bal.WorkersForJobs(ctx, []dax.Job{dax.Job(job.String())}) if err != nil { @@ -1594,63 +1300,26 @@ func (c *Controller) SnapshotFieldKeys(ctx context.Context, qtid dax.QualifiedTa addr := dax.Address(workers[0].ID) - // Make a copy of the controller's versionStore, and update the current - // field so that the directive sent along with the SnapshotRequest reflects - // the state that we want after a successful snapshot. - versionStoreCopy, err := c.versionStore.Copy(ctx) - if err != nil { - return errors.Wrap(err, "copying version store") - } - if err := versionStoreCopy.AddFields(ctx, qtid, - dax.NewVersionedField(field, toFieldVersion), - ); err != nil { - return NewErrInternal(err.Error()) - } - - // Convert the address into a slice of addressMethod containing the - // appropriate method. - addressMethods := applyAddressMethod([]dax.Address{addr}, dax.DirectiveMethodSnapshot) - - var toDirective dax.Directive - if directives, err := c.buildDirectives(ctx, addressMethods, versionStoreCopy); err != nil { - return NewErrInternal(err.Error()) - } else if ld := len(directives); ld != 1 { - msg := fmt.Sprintf("buildDirectives returned invalid number of directives: %d", ld) - return NewErrInternal(msg) - } else { - toDirective = *directives[0] - } - // Send the node a snapshot request. req := &dax.SnapshotFieldKeysRequest{ - Address: addr, - TableKey: qtid.Key(), - Field: field, - FromVersion: fromFieldVersion, - ToVersion: toFieldVersion, - Directive: toDirective, + Address: addr, + TableKey: qtid.Key(), + Field: field, } if err := c.Director.SendSnapshotFieldKeysRequest(ctx, req); err != nil { return NewErrInternal(err.Error()) } - // A successful request means the field version can be incremented. - if err := c.versionStore.AddFields(ctx, qtid, - dax.NewVersionedField(field, toFieldVersion), - ); err != nil { - return NewErrInternal(err.Error()) - } - return nil } ///////////// -func (c *Controller) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shards dax.ShardNums, isWrite bool) ([]ComputeNode, error) { +func (c *Controller) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shards dax.ShardNums, isWrite bool) ([]dax.ComputeNode, error) { inRole := &dax.ComputeRole{ TableKey: qtid.Key(), - Shards: dax.NewVersionedShards(shards...), + Shards: shards, } nodes, err := c.Nodes(ctx, inRole, isWrite) @@ -1658,7 +1327,7 @@ func (c *Controller) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID return nil, errors.Wrap(err, "getting compute nodes") } - computeNodes := make([]ComputeNode, 0) + computeNodes := make([]dax.ComputeNode, 0) for _, node := range nodes { role, ok := node.Role.(*dax.ComputeRole) @@ -1668,20 +1337,20 @@ func (c *Controller) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID return nil, NewErrInternal("not a compute node") } - computeNodes = append(computeNodes, ComputeNode{ + computeNodes = append(computeNodes, dax.ComputeNode{ Address: node.Address, Table: role.TableKey, - Shards: role.Shards.Nums(), + Shards: role.Shards, }) } return computeNodes, nil } -func (c *Controller) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitions dax.PartitionNums, isWrite bool) ([]TranslateNode, error) { +func (c *Controller) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitions dax.PartitionNums, isWrite bool) ([]dax.TranslateNode, error) { inRole := &dax.TranslateRole{ TableKey: qtid.Key(), - Partitions: dax.NewVersionedPartitions(partitions...), + Partitions: partitions, } nodes, err := c.Nodes(ctx, inRole, isWrite) @@ -1689,7 +1358,7 @@ func (c *Controller) TranslateNodes(ctx context.Context, qtid dax.QualifiedTable return nil, errors.Wrap(err, "getting translate nodes") } - translateNodes := make([]TranslateNode, 0) + translateNodes := make([]dax.TranslateNode, 0) for _, node := range nodes { role, ok := node.Role.(*dax.TranslateRole) @@ -1699,10 +1368,10 @@ func (c *Controller) TranslateNodes(ctx context.Context, qtid dax.QualifiedTable return nil, NewErrInternal("not a translate node") } - translateNodes = append(translateNodes, TranslateNode{ + translateNodes = append(translateNodes, dax.TranslateNode{ Address: node.Address, Table: role.TableKey, - Partitions: role.Partitions.Nums(), + Partitions: role.Partitions, }) } @@ -1759,35 +1428,15 @@ func (c *Controller) CreateField(ctx context.Context, qtid dax.QualifiedTableID, // and therefore need to be sent an updated Directive. workerSet := NewAddressSet() - // If the field has string keys, add it to the local versionStore. - if fld.StringKeys() { - fieldVersion := dax.VersionedField{ - Name: fld.Name, - Version: 0, - } - if err := c.versionStore.AddFields(ctx, qtid, fieldVersion); err != nil { - return errors.Wrapf(err, "adding fields: %s, %s", qtid, fieldVersion) - } + // Get the worker(s) responsible for partition 0. + job := partition(qtid.Key(), 0).String() + workers, err := c.TranslateBalancer.WorkersForJobs(ctx, []dax.Job{dax.Job(job)}) + if err != nil { + return errors.Wrapf(err, "getting workers for job: %s", job) } - // Get the worker responsible for partition 0, which handles field key - // translation. Be sure to get the current version. - if v, found, err := c.versionStore.PartitionVersion(ctx, qtid, 0); err != nil { - return errors.Wrapf(err, "getting partition version: %s/0", qtid) - } else if found { - // Get the worker(s) responsible for partition 0. - job := partition(qtid.Key(), dax.VersionedPartition{ - Num: 0, - Version: v, - }).String() - workers, err := c.TranslateBalancer.WorkersForJobs(ctx, []dax.Job{dax.Job(job)}) - if err != nil { - return errors.Wrapf(err, "getting workers for job: %s", job) - } - - for _, w := range workers { - workerSet.Add(dax.Address(w.ID)) - } + for _, w := range workers { + workerSet.Add(dax.Address(w.ID)) } // Get the list of workers responsible for shard data for this table. @@ -1826,27 +1475,15 @@ func (c *Controller) DropField(ctx context.Context, qtid dax.QualifiedTableID, f // and therefore need to be sent an updated Directive. workerSet := NewAddressSet() - // If the field has string keys, remove it from the local versionStore. - // TODO: implement RemoveField() on VersionStore interface. - - // Get the worker responsible for partition 0, which handles field key - // translation. Be sure to get the current version. - if v, found, err := c.versionStore.PartitionVersion(ctx, qtid, 0); err != nil { - return errors.Wrapf(err, "getting partition version: %s/0", qtid) - } else if found { - // Get the worker(s) responsible for partition 0. - job := partition(qtid.Key(), dax.VersionedPartition{ - Num: 0, - Version: v, - }).String() - workers, err := c.TranslateBalancer.WorkersForJobs(ctx, []dax.Job{dax.Job(job)}) - if err != nil { - return errors.Wrapf(err, "getting workers for job: %s", job) - } + // Get the worker(s) responsible for partition 0. + job := partition(qtid.Key(), 0).String() + workers, err := c.TranslateBalancer.WorkersForJobs(ctx, []dax.Job{dax.Job(job)}) + if err != nil { + return errors.Wrapf(err, "getting workers for job: %s", job) + } - for _, w := range workers { - workerSet.Add(dax.Address(w.ID)) - } + for _, w := range workers { + workerSet.Add(dax.Address(w.ID)) } // Get the list of workers responsible for shard data for this table. diff --git a/dax/mds/controller/controller_test.go b/dax/mds/controller/controller_test.go index 07eebfe19..e57148b16 100644 --- a/dax/mds/controller/controller_test.go +++ b/dax/mds/controller/controller_test.go @@ -108,19 +108,8 @@ func TestController(t *testing.T) { exp = []*dax.Directive{} assert.Equal(t, exp, director.flush()) - // Add the same non-keyed table again. - err := con.CreateTable(ctx, tbl0) - if assert.Error(t, err) { - assert.True(t, errors.Is(err, dax.ErrTableIDExists)) - } - - exp = []*dax.Directive{} - assert.Equal(t, exp, director.flush()) - // Add a shard. - assert.NoError(t, con.AddShards(ctx, tbl0.QualifiedID(), - dax.NewVersionedShard(0, 0), - )) + assert.NoError(t, con.AddShards(ctx, tbl0.QualifiedID(), 0)) exp = []*dax.Directive{ { @@ -132,9 +121,7 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - }, + Shards: dax.NewShardNums(0), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -185,13 +172,7 @@ func TestController(t *testing.T) { assert.Equal(t, exp, director.flush()) // Add more shards. - assert.NoError(t, con.AddShards(ctx, tbl0.QualifiedID(), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - )) + assert.NoError(t, con.AddShards(ctx, tbl0.QualifiedID(), dax.NewShardNums(1, 2, 3, 5, 8)...)) exp = []*dax.Directive{ { @@ -203,10 +184,7 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(3, 0), - }, + Shards: dax.NewShardNums(0, 3), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -221,10 +199,7 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(5, 0), - }, + Shards: dax.NewShardNums(1, 5), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -239,10 +214,7 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(2, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -257,12 +229,7 @@ func TestController(t *testing.T) { assert.NoError(t, con.CreateTable(ctx, tbl1)) // Add more shards. - assert.NoError(t, con.AddShards(ctx, tbl1.QualifiedID(), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(13, 0), - )) + assert.NoError(t, con.AddShards(ctx, tbl1.QualifiedID(), dax.NewShardNums(3, 5, 8, 13)...)) exp = []*dax.Directive{ { @@ -275,17 +242,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 13), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(3, 0), - }, + Shards: dax.NewShardNums(0, 3), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -301,16 +262,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(5, 0), - }, + Shards: dax.NewShardNums(5), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(5, 0), - }, + Shards: dax.NewShardNums(1, 5), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -326,16 +282,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(8), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(2, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -358,18 +309,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 13), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(3, 0), - }, + Shards: dax.NewShardNums(0, 1, 3), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -385,18 +329,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(5, 8), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(2, 5, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -419,23 +356,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 5, 8, 13), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(0, 1, 2, 3, 5, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -470,23 +395,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 5, 8, 13), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(0, 1, 2, 3, 5, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -496,10 +409,7 @@ func TestController(t *testing.T) { assert.Equal(t, exp, director.flush()) // Remove shards. - assert.NoError(t, con.RemoveShards(ctx, tbl0.QualifiedID(), - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(5, 0), - )) + assert.NoError(t, con.RemoveShards(ctx, tbl0.QualifiedID(), dax.NewShardNums(2, 5)...)) exp = []*dax.Directive{ { @@ -512,21 +422,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 5, 8, 13), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(0, 1, 3, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -538,10 +438,7 @@ func TestController(t *testing.T) { // Remove shards, one which does not exist. // Currently that doesn't result in an error, it simply no-ops on trying // to remove 99. - assert.NoError(t, con.RemoveShards(ctx, tbl0.QualifiedID(), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(99, 0), - )) + assert.NoError(t, con.RemoveShards(ctx, tbl0.QualifiedID(), dax.NewShardNums(3, 99)...)) exp = []*dax.Directive{ { @@ -554,20 +451,11 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 5, 8, 13), }, { TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(8, 0), - }, + Shards: dax.NewShardNums(0, 1, 8), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -589,12 +477,7 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{ { TableKey: tbl1.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(5, 0), - dax.NewVersionedShard(8, 0), - dax.NewVersionedShard(13, 0), - }, + Shards: dax.NewShardNums(3, 5, 8, 13), }, }, TranslateRoles: []dax.TranslateRole{}, @@ -604,7 +487,7 @@ func TestController(t *testing.T) { assert.Equal(t, exp, director.flush()) // Remove a node which doesn't exist. - err = con.DeregisterNodes(ctx, "invalidNode") + err := con.DeregisterNodes(ctx, "invalidNode") if assert.Error(t, err) { assert.True(t, errors.Is(err, dax.ErrNodeDoesNotExist)) } @@ -683,17 +566,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(2, 0), - dax.NewVersionedPartition(3, 0), - dax.NewVersionedPartition(4, 0), - dax.NewVersionedPartition(5, 0), - dax.NewVersionedPartition(6, 0), - dax.NewVersionedPartition(7, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0, 1, 2, 3, 4, 5, 6, 7), }, }, Version: 2, @@ -720,13 +594,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(2, 0), - dax.NewVersionedPartition(3, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0, 1, 2, 3), }, }, Version: 3, @@ -740,13 +609,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(4, 0), - dax.NewVersionedPartition(5, 0), - dax.NewVersionedPartition(6, 0), - dax.NewVersionedPartition(7, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(4, 5, 6, 7), }, }, Version: 4, @@ -772,12 +636,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(2, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0, 1, 2), }, }, Version: 5, @@ -791,12 +651,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(4, 0), - dax.NewVersionedPartition(5, 0), - dax.NewVersionedPartition(6, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(4, 5, 6), }, }, Version: 6, @@ -810,11 +666,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(3, 0), - dax.NewVersionedPartition(7, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(3, 7), }, }, Version: 7, @@ -842,25 +695,12 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl1.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(4, 0), - dax.NewVersionedPartition(7, 0), - dax.NewVersionedPartition(10, 0), - dax.NewVersionedPartition(13, 0), - dax.NewVersionedPartition(16, 0), - dax.NewVersionedPartition(19, 0), - dax.NewVersionedPartition(22, 0), - }, + TableKey: tbl1.Key(), + Partitions: dax.NewPartitionNums(1, 4, 7, 10, 13, 16, 19, 22), }, { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(2, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0, 1, 2), }, }, Version: 8, @@ -875,25 +715,12 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl1.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(2, 0), - dax.NewVersionedPartition(5, 0), - dax.NewVersionedPartition(8, 0), - dax.NewVersionedPartition(11, 0), - dax.NewVersionedPartition(14, 0), - dax.NewVersionedPartition(17, 0), - dax.NewVersionedPartition(20, 0), - dax.NewVersionedPartition(23, 0), - }, + TableKey: tbl1.Key(), + Partitions: dax.NewPartitionNums(2, 5, 8, 11, 14, 17, 20, 23), }, { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(4, 0), - dax.NewVersionedPartition(5, 0), - dax.NewVersionedPartition(6, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(4, 5, 6), }, }, Version: 9, @@ -908,24 +735,12 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl1.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(3, 0), - dax.NewVersionedPartition(6, 0), - dax.NewVersionedPartition(9, 0), - dax.NewVersionedPartition(12, 0), - dax.NewVersionedPartition(15, 0), - dax.NewVersionedPartition(18, 0), - dax.NewVersionedPartition(21, 0), - }, + TableKey: tbl1.Key(), + Partitions: dax.NewPartitionNums(0, 3, 6, 9, 12, 15, 18, 21), }, { - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(3, 0), - dax.NewVersionedPartition(7, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(3, 7), }, }, Version: 10, @@ -947,17 +762,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl1.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(4, 0), - dax.NewVersionedPartition(7, 0), - dax.NewVersionedPartition(10, 0), - dax.NewVersionedPartition(13, 0), - dax.NewVersionedPartition(16, 0), - dax.NewVersionedPartition(19, 0), - dax.NewVersionedPartition(22, 0), - }, + TableKey: tbl1.Key(), + Partitions: dax.NewPartitionNums(1, 4, 7, 10, 13, 16, 19, 22), }, }, Version: 11, @@ -971,17 +777,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl1.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(2, 0), - dax.NewVersionedPartition(5, 0), - dax.NewVersionedPartition(8, 0), - dax.NewVersionedPartition(11, 0), - dax.NewVersionedPartition(14, 0), - dax.NewVersionedPartition(17, 0), - dax.NewVersionedPartition(20, 0), - dax.NewVersionedPartition(23, 0), - }, + TableKey: tbl1.Key(), + Partitions: dax.NewPartitionNums(2, 5, 8, 11, 14, 17, 20, 23), }, }, Version: 12, @@ -995,17 +792,8 @@ func TestController(t *testing.T) { ComputeRoles: []dax.ComputeRole{}, TranslateRoles: []dax.TranslateRole{ { - TableKey: tbl1.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(3, 0), - dax.NewVersionedPartition(6, 0), - dax.NewVersionedPartition(9, 0), - dax.NewVersionedPartition(12, 0), - dax.NewVersionedPartition(15, 0), - dax.NewVersionedPartition(18, 0), - dax.NewVersionedPartition(21, 0), - }, + TableKey: tbl1.Key(), + Partitions: dax.NewPartitionNums(0, 3, 6, 9, 12, 15, 18, 21), }, }, Version: 13, @@ -1019,15 +807,6 @@ func TestController(t *testing.T) { assert.True(t, errors.Is(err, dax.ErrTableIDDoesNotExist)) } - // Add shards to a table which doesn't exist. - err = con.AddShards(ctx, invalidQtid, - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(2, 0), - ) - if assert.Error(t, err) { - assert.True(t, errors.Is(err, dax.ErrTableIDDoesNotExist)) - } - // Register an invalid node. nodeX := &dax.Node{ Address: "", @@ -1082,14 +861,7 @@ func TestController(t *testing.T) { assert.NoError(t, con.CreateTable(ctx, tbl0)) // Add shards. - assert.NoError(t, con.AddShards(ctx, tbl0.QualifiedID(), - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(3, 0), - dax.NewVersionedShard(11, 0), - dax.NewVersionedShard(12, 0), - )) + assert.NoError(t, con.AddShards(ctx, tbl0.QualifiedID(), 0, 1, 2, 3, 11, 12)) t.Run("ComputeRole", func(t *testing.T) { tests := []struct { @@ -1100,27 +872,21 @@ func TestController(t *testing.T) { { role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.NewVersionedShards(0, 1, 2, 3), + Shards: dax.NewShardNums(0, 1, 2, 3), }, exp: []dax.AssignedNode{ { Address: node0.Address, Role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(0, 0), - dax.NewVersionedShard(2, 0), - }, + Shards: dax.NewShardNums(0, 2), }, }, { Address: node1.Address, Role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(3, 0), - }, + Shards: dax.NewShardNums(1, 3), }, }, }, @@ -1128,16 +894,14 @@ func TestController(t *testing.T) { { role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.NewVersionedShards(1), + Shards: dax.NewShardNums(1), }, exp: []dax.AssignedNode{ { Address: node1.Address, Role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(1, 0), - }, + Shards: dax.NewShardNums(1), }, }, }, @@ -1146,7 +910,7 @@ func TestController(t *testing.T) { // Add unassigned shards. role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.NewVersionedShards(1, 888, 889), + Shards: dax.NewShardNums(1, 888, 889), }, isWrite: true, exp: []dax.AssignedNode{ @@ -1154,19 +918,14 @@ func TestController(t *testing.T) { Address: node0.Address, Role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(888, 0), - }, + Shards: dax.NewShardNums(888), }, }, { Address: node1.Address, Role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(1, 0), - dax.NewVersionedShard(889, 0), - }, + Shards: dax.NewShardNums(1, 889), }, }, }, @@ -1175,17 +934,14 @@ func TestController(t *testing.T) { // Ensure shards are not returned sorted as strings. role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.NewVersionedShards(2, 11), + Shards: dax.NewShardNums(2, 11), }, exp: []dax.AssignedNode{ { Address: node0.Address, Role: &dax.ComputeRole{ TableKey: tbl0.Key(), - Shards: dax.VersionedShards{ - dax.NewVersionedShard(2, 0), - dax.NewVersionedShard(11, 0), - }, + Shards: dax.NewShardNums(2, 11), }, }, }, @@ -1209,75 +965,55 @@ func TestController(t *testing.T) { }{ { role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, -1), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0), }, isWrite: true, exp: []dax.AssignedNode{ { Address: node0.Address, Role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0), }, }, }, }, { role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, -1), - dax.NewVersionedPartition(1, -1), - dax.NewVersionedPartition(2, -1), - dax.NewVersionedPartition(3, -1), - dax.NewVersionedPartition(999, -1), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0, 1, 2, 3, 999), }, isWrite: false, exp: []dax.AssignedNode{ { Address: node0.Address, Role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(0, 0), - dax.NewVersionedPartition(2, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(0, 2), }, }, { Address: node1.Address, Role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(1, 0), - dax.NewVersionedPartition(3, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(1, 3), }, }, }, }, { role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(1, -1), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(1), }, isWrite: false, exp: []dax.AssignedNode{ { Address: node1.Address, Role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(1, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(1), }, }, }, @@ -1285,22 +1021,16 @@ func TestController(t *testing.T) { { // Ensure partitions are not returned sorted as strings. role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(2, -1), - dax.NewVersionedPartition(10, -1), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(2, 10), }, isWrite: false, exp: []dax.AssignedNode{ { Address: node0.Address, Role: &dax.TranslateRole{ - TableKey: tbl0.Key(), - Partitions: dax.VersionedPartitions{ - dax.NewVersionedPartition(2, 0), - dax.NewVersionedPartition(10, 0), - }, + TableKey: tbl0.Key(), + Partitions: dax.NewPartitionNums(2, 10), }, }, }, diff --git a/dax/mds/controller/http/director.go b/dax/mds/controller/http/director.go index 81b8127ef..c7969cc2e 100644 --- a/dax/mds/controller/http/director.go +++ b/dax/mds/controller/http/director.go @@ -100,7 +100,6 @@ func (d *Director) SendDirective(ctx context.Context, dir *dax.Directive) error func (d *Director) SendSnapshotShardDataRequest(ctx context.Context, req *dax.SnapshotShardDataRequest) error { url := fmt.Sprintf("%s/%s/shard-data", req.Address.WithScheme("http"), d.snapshotRequestPath) - d.logger.Printf("SEND HTTP snapshot shard data request to: %s\n", url) // Encode the request. postBody, err := json.Marshal(req) diff --git a/dax/mds/controller/naive/balancer.go b/dax/mds/controller/naive/balancer.go index 4cc2ec18e..8a8d2a996 100644 --- a/dax/mds/controller/naive/balancer.go +++ b/dax/mds/controller/naive/balancer.go @@ -55,15 +55,18 @@ type WorkerJobService interface { CreateJobs(ctx context.Context, balancerName string, worker dax.Worker, job ...dax.Job) error DeleteJob(ctx context.Context, balancerName string, worker dax.Worker, job dax.Job) error + DeleteJobs(ctx context.Context, balancerName, prefix string) (InternalDiffs, error) JobCounts(ctx context.Context, balancerName string, worker ...dax.Worker) (map[dax.Worker]int, error) ListJobs(ctx context.Context, balancerName string, worker dax.Worker) (dax.Jobs, error) } +// TODO: I don't think all these method names need "Free" in them. type FreeJobService interface { CreateFreeJobs(ctx context.Context, balancerName string, job ...dax.Job) error DeleteFreeJob(ctx context.Context, balancerName string, job dax.Job) error ListFreeJobs(ctx context.Context, balancerName string) (dax.Jobs, error) MergeFreeJobs(ctx context.Context, balancerName string, jobs dax.Jobs) error + DeleteFreeJobs(ctx context.Context, balancerName, prefix string) error } // New returns a new instance of Balancer. @@ -90,15 +93,15 @@ func (b *Balancer) AddWorker(ctx context.Context, worker fmt.Stringer) ([]dax.Wo return nil, errors.Wrap(err, "adding worker") } - return diff.output(), nil + return diff.Output(), nil } -func (b *Balancer) addWorker(ctx context.Context, worker dax.Worker) (internalDiffs, error) { +func (b *Balancer) addWorker(ctx context.Context, worker dax.Worker) (InternalDiffs, error) { // If this worker already exists, don't do anything. if exists, err := b.current.WorkerExists(ctx, b.name, worker); err != nil { return nil, errors.Wrap(err, "checking if worker exists") } else if exists { - return internalDiffs{}, nil + return InternalDiffs{}, nil } if err := b.current.CreateWorker(ctx, b.name, worker); err != nil { @@ -133,15 +136,15 @@ func (b *Balancer) RemoveWorker(ctx context.Context, worker fmt.Stringer) ([]dax return nil, errors.Wrap(err, "removing worker") } - return diff.output(), nil + return diff.Output(), nil } -func (b *Balancer) removeWorker(ctx context.Context, worker dax.Worker) (internalDiffs, error) { +func (b *Balancer) removeWorker(ctx context.Context, worker dax.Worker) (InternalDiffs, error) { // If this worker doesn't exist, don't do anything else. if exists, err := b.current.WorkerExists(ctx, b.name, worker); err != nil { return nil, errors.Wrap(err, "checking if worker exists") } else if !exists { - return internalDiffs{}, nil + return InternalDiffs{}, nil } jobs, err := b.current.ListJobs(ctx, b.name, worker) @@ -162,9 +165,9 @@ func (b *Balancer) removeWorker(ctx context.Context, worker dax.Worker) (interna // Even though this may not be useful to the caller (for example, in the // case where the worker has died and no longer exists), return the diffs // which represent the removal of jobs from the worker. - diff := newInternalDiffs() + diff := NewInternalDiffs() for _, job := range jobs { - diff.removed(worker, job) + diff.Removed(worker, job) } return diff, nil @@ -198,10 +201,10 @@ func (b *Balancer) AddJobs(ctx context.Context, jobs ...fmt.Stringer) ([]dax.Wor return nil, errors.Wrap(err, "adding job") } - return diff.output(), nil + return diff.Output(), nil } -func (b *Balancer) addJobs(ctx context.Context, jobs ...dax.Job) (internalDiffs, error) { +func (b *Balancer) addJobs(ctx context.Context, jobs ...dax.Job) (InternalDiffs, error) { if cnt, err := b.current.WorkerCount(ctx, b.name); err != nil { return nil, errors.Wrap(err, "getting worker count") } else if cnt == 0 { @@ -210,7 +213,7 @@ func (b *Balancer) addJobs(ctx context.Context, jobs ...dax.Job) (internalDiffs, } // TODO: we might want to inform the user that a job is in the free list // because there are no workers. - return internalDiffs{}, nil + return InternalDiffs{}, nil } workerJobs, err := b.current.WorkersJobs(ctx, b.name) @@ -229,7 +232,7 @@ func (b *Balancer) addJobs(ctx context.Context, jobs ...dax.Job) (internalDiffs, jobCounts[v.ID] = len(v.Jobs) } - diffs := newInternalDiffs() + diffs := NewInternalDiffs() jobsToCreate := make(map[dax.Worker][]dax.Job) @@ -263,7 +266,7 @@ func (b *Balancer) addJobs(ctx context.Context, jobs ...dax.Job) (internalDiffs, return nil, errors.Wrap(err, "creating job") } for _, job := range jobs { - diffs.added(worker, job) + diffs.Added(worker, job) } } @@ -282,10 +285,24 @@ func (b *Balancer) RemoveJob(ctx context.Context, job fmt.Stringer) ([]dax.Worke return nil, errors.Wrapf(err, "removing job: %s", job) } - return diff.output(), nil + return diff.Output(), nil } -func (b *Balancer) removeJob(ctx context.Context, job dax.Job) (internalDiffs, error) { +func (b *Balancer) RemoveJobs(ctx context.Context, prefix string) ([]dax.WorkerDiff, error) { + b.mu.Lock() + defer b.mu.Unlock() + + idiffs, err := b.current.DeleteJobs(ctx, b.name, prefix) + if err != nil { + return nil, errors.Wrap(err, "deleting worker jobs") + } + if err := b.freeJobs.DeleteFreeJobs(ctx, b.name, prefix); err != nil { + return nil, errors.Wrap(err, "deleting free jobs") + } + return idiffs.Output(), nil +} + +func (b *Balancer) removeJob(ctx context.Context, job dax.Job) (InternalDiffs, error) { if worker, ok, err := b.workerForJob(ctx, job); err != nil { return nil, errors.Wrapf(err, "getting worker for job: %s", job) } else if ok { @@ -293,8 +310,8 @@ func (b *Balancer) removeJob(ctx context.Context, job dax.Job) (internalDiffs, e return nil, errors.Wrapf(err, "deleting job: %s", job) } - diffs := newInternalDiffs() - diffs.removed(worker, job) + diffs := NewInternalDiffs() + diffs.Removed(worker, job) return diffs, nil } @@ -307,7 +324,7 @@ func (b *Balancer) removeJob(ctx context.Context, job dax.Job) (internalDiffs, e return nil, errors.Wrapf(err, "deleting free job: %s", job) } - return internalDiffs{}, nil + return InternalDiffs{}, nil } // Balance ensures that all jobs are being handled by a worker by assigning jobs @@ -336,7 +353,7 @@ func (b *Balancer) Balance(ctx context.Context) ([]dax.WorkerDiff, error) { return nil, errors.Wrap(err, "balancing jobs") } - return diff.output(), nil + return diff.Output(), nil } // balance moves jobs among workers with the goal of having an equal number of @@ -346,7 +363,7 @@ func (b *Balancer) Balance(ctx context.Context) ([]dax.WorkerDiff, error) { // the internalDiffs.merge() method, but we would need to modify that method to // be smarter about the order in which it applies the add/remove operations. // Until that's in place, we'll pass in a value here. -func (b *Balancer) balance(ctx context.Context, diffs internalDiffs) (internalDiffs, error) { +func (b *Balancer) balance(ctx context.Context, diffs InternalDiffs) (InternalDiffs, error) { numWorkers, err := b.current.WorkerCount(ctx, b.name) if err != nil { return nil, errors.Wrapf(err, "getting worker count: %s", b.name) @@ -405,12 +422,12 @@ func (b *Balancer) balance(ctx context.Context, diffs internalDiffs) (internalDi if rj, err := b.removeJob(ctx, sortedJobs[i]); err != nil { return nil, errors.Wrapf(err, "removing job: %s", sortedJobs[i]) } else { - diffs.merge(rj) + diffs.Merge(rj) } if aj, err := b.addJobs(ctx, sortedJobs[i]); err != nil { return nil, errors.Wrapf(err, "adding job: %s", sortedJobs[i]) } else { - diffs.merge(aj) + diffs.Merge(aj) } } } @@ -547,8 +564,8 @@ func (b *Balancer) WorkersForJobPrefix(ctx context.Context, prefix string) ([]da } // processFreeJobs assigns all jobs in the free list to a worker. -func (b *Balancer) processFreeJobs(ctx context.Context) (internalDiffs, error) { - diffs := newInternalDiffs() +func (b *Balancer) processFreeJobs(ctx context.Context) (InternalDiffs, error) { + diffs := NewInternalDiffs() jobs, err := b.freeJobs.ListFreeJobs(ctx, b.name) if err != nil { return nil, errors.Wrapf(err, "listing free jobs: %s", b.name) @@ -557,7 +574,7 @@ func (b *Balancer) processFreeJobs(ctx context.Context) (internalDiffs, error) { if aj, err := b.addJobs(ctx, job); err != nil { return nil, errors.Wrapf(err, "adding job: %s", job) } else { - diffs.merge(aj) + diffs.Merge(aj) } if err := b.freeJobs.DeleteFreeJob(ctx, b.name, job); err != nil { return nil, errors.Wrapf(err, "deleting free job: %s", job) diff --git a/dax/mds/controller/naive/boltdb/balancer.go b/dax/mds/controller/naive/boltdb/balancer.go index 93c5ebff7..b8e202913 100644 --- a/dax/mds/controller/naive/boltdb/balancer.go +++ b/dax/mds/controller/naive/boltdb/balancer.go @@ -289,6 +289,53 @@ func (w *workerJobService) DeleteJob(ctx context.Context, balancerName string, w return tx.Commit() } +func (w *workerJobService) DeleteJobs(ctx context.Context, balancerName, prefix string) (naive.InternalDiffs, error) { + tx, err := w.db.BeginTx(ctx, true) + if err != nil { + return nil, errors.Wrap(err, "beginning tx") + } + defer tx.Rollback() + + bkt := tx.Bucket(bucketNaiveBalancer) + if bkt == nil { + return nil, errors.Errorf(boltdb.ErrFmtBucketNotFound, bucketNaiveBalancer) + } + + workers, err := w.getWorkers(ctx, tx, balancerName) + if err != nil { + return nil, errors.Wrap(err, "getting workers") + } + + idiffs := naive.NewInternalDiffs() + for _, worker := range workers { + // get worker + wrkr := bkt.Get(workerKey(balancerName, worker)) + if wrkr == nil { + panic("didn't find worker that should... definitely exist") + } + jobset, err := decodeJobSet(wrkr) + if err != nil { + return nil, errors.Wrap(err, "decoding job set") + } + + jobs := jobset.RemovePrefix(prefix) + for _, job := range jobs { + idiffs.Removed(worker, job) + } + val, err := encodeJobSet(jobset) + if err != nil { + return nil, errors.Wrap(err, "encoding job set") + } + + if err := bkt.Put(workerKey(balancerName, worker), val); err != nil { + return nil, errors.Wrap(err, "putting worker") + } + + } + + return idiffs, tx.Commit() +} + func (w *workerJobService) ListJobs(ctx context.Context, balancerName string, worker dax.Worker) (dax.Jobs, error) { tx, err := w.db.BeginTx(ctx, false) if err != nil { @@ -429,6 +476,42 @@ func (f *freeJobService) DeleteFreeJob(ctx context.Context, balancerName string, return tx.Commit() } +func (f *freeJobService) DeleteFreeJobs(ctx context.Context, balancerName, prefix string) error { + tx, err := f.db.BeginTx(ctx, true) + if err != nil { + return errors.Wrap(err, "beginning tx") + } + defer tx.Rollback() + + bkt := tx.Bucket(bucketNaiveBalancer) + if bkt == nil { + return errors.Errorf(boltdb.ErrFmtBucketNotFound, bucketNaiveBalancer) + } + + // get free jobs + fjs := bkt.Get(freeJobKey(balancerName)) + if fjs == nil { + return nil + } + + jobset, err := decodeJobSet(fjs) + if err != nil { + return errors.Wrap(err, "decoding job set") + } + + jobset.RemovePrefix(prefix) + val, err := encodeJobSet(jobset) + if err != nil { + return errors.Wrap(err, "encoding job set") + } + + if err := bkt.Put(freeJobKey(balancerName), val); err != nil { + return errors.Wrap(err, "putting free job") + } + + return tx.Commit() +} + func (f *freeJobService) ListFreeJobs(ctx context.Context, balancerName string) (dax.Jobs, error) { tx, err := f.db.BeginTx(ctx, false) if err != nil { diff --git a/dax/mds/controller/naive/types.go b/dax/mds/controller/naive/types.go index 6995e24d0..6659b7f9d 100644 --- a/dax/mds/controller/naive/types.go +++ b/dax/mds/controller/naive/types.go @@ -20,13 +20,13 @@ func newJobSetDiffs() jobSetDiffs { } } -type internalDiffs map[dax.Worker]jobSetDiffs +type InternalDiffs map[dax.Worker]jobSetDiffs -func newInternalDiffs() internalDiffs { - return make(internalDiffs) +func NewInternalDiffs() InternalDiffs { + return make(InternalDiffs) } -func (d internalDiffs) added(worker dax.Worker, job dax.Job) { +func (d InternalDiffs) Added(worker dax.Worker, job dax.Job) { if _, ok := d[worker]; !ok { d[worker] = newJobSetDiffs() } @@ -39,7 +39,7 @@ func (d internalDiffs) added(worker dax.Worker, job dax.Job) { d[worker].added.Add(job) } -func (d internalDiffs) removed(worker dax.Worker, job dax.Job) { +func (d InternalDiffs) Removed(worker dax.Worker, job dax.Job) { if _, ok := d[worker]; !ok { d[worker] = newJobSetDiffs() } @@ -52,7 +52,7 @@ func (d internalDiffs) removed(worker dax.Worker, job dax.Job) { d[worker].removed.Add(job) } -func (d internalDiffs) merge(d2 internalDiffs) { +func (d InternalDiffs) Merge(d2 InternalDiffs) { for k, v := range d2 { if _, ok := d[k]; !ok { d[k] = newJobSetDiffs() @@ -62,9 +62,9 @@ func (d internalDiffs) merge(d2 internalDiffs) { } } -// output converts internalDiff to []controller.WorkerDiff for external +// Output converts internalDiff to []controller.WorkerDiff for external // consumption. -func (d internalDiffs) output() []dax.WorkerDiff { +func (d InternalDiffs) Output() []dax.WorkerDiff { out := make([]dax.WorkerDiff, len(d)) i := 0 diff --git a/dax/mds/controller/snapping_turtle.go b/dax/mds/controller/snapping_turtle.go new file mode 100644 index 000000000..0af9e2dd7 --- /dev/null +++ b/dax/mds/controller/snapping_turtle.go @@ -0,0 +1,112 @@ +package controller + +import ( + "context" + "time" + + "github.com/featurebasedb/featurebase/v3/dax" +) + +func (c *Controller) snappingTurtleRoutine(period time.Duration, control chan struct{}) { + if period == 0 { + return // disable automatic snapshotting + } + ticker := time.NewTicker(period) + for { + select { + case <-c.stopping: + ticker.Stop() + c.logger.Debugf("TURTLE: Stopping Snapping Turtle") + return + case <-ticker.C: + c.snapAll() + case <-control: + c.snapAll() + } + } + +} + +func (c *Controller) snapAll() { + c.logger.Debugf("TURTLE: snapAll") + ctx := context.Background() + computeNodes, err := c.ComputeBalancer.CurrentState(ctx) + if err != nil { + c.logger.Printf("Error getting compute balancer state for snapping turtle: %v", err) + } + + // Weird nested loop for snapshotting shard data. The reason for + // this is to avoid hotspotting each node in turn and spread the + // snapshotting load across all nodes rather than snapshotting all + // jobs on one node and then moving onto the next one. + i := 0 + stillWorking := true + for stillWorking { + stillWorking = false + for _, workerInfo := range computeNodes { + if len(workerInfo.Jobs) <= i { + continue + } + stillWorking = true + j, err := decodeShard(workerInfo.Jobs[i]) + if err != nil { + c.logger.Printf("couldn't decode a shard out of the job: '%s', err: %v", workerInfo.Jobs[i], err) + } + c.SnapshotShardData(ctx, j.t.QualifiedTableID(), j.shardNum()) + } + i++ + } + + // Get all tables across all orgs/dbs so we can snapshot all keyed + // fields and look up whether a table is keyed to snapshot it's + // partitions. + tables, err := c.Schemar.Tables(ctx, dax.TableQualifier{}) + if err != nil { + c.logger.Printf("Couldn't get schema for snapshotting keys: %v", err) + return + } + // snapshot keyed fields + tableMap := make(map[dax.TableKey]*dax.QualifiedTable) + for _, table := range tables { + tableMap[table.Key()] = table + for _, f := range table.Fields { + if f.StringKeys() && !f.IsPrimaryKey() { + err := c.SnapshotFieldKeys(ctx, table.QualifiedID(), f.Name) + if err != nil { + c.logger.Printf("Couldn't snapshot table: %s, field: %s, error: %v", table, f.Name, err) + } + } + } + } + + // Get all partition jobs from balancer and snapshot table keys + // for any partition that goes with a keyed table. Doing the same + // weird nested loop thing to avoid doing all jobs on one node + // back to back. + translateNodes, err := c.TranslateBalancer.CurrentState(ctx) + if err != nil { + c.logger.Printf("Error getting translate balancer state for snapping turtle: %v", err) + } + + i = 0 + stillWorking = true + for stillWorking { + stillWorking = false + for _, workerInfo := range translateNodes { + if len(workerInfo.Jobs) <= i { + continue + } + stillWorking = true + j, err := decodePartition(workerInfo.Jobs[i]) + if err != nil { + table := tableMap[j.table()] + if table.StringKeys() { + c.SnapshotTableKeys(ctx, table.QualifiedID(), j.partitionNum()) + } + c.logger.Printf("couldn't decode a partition out of the job: '%s', err: %v", workerInfo.Jobs[i], err) + } + } + i++ + } + c.logger.Debugf("TURTLE: snapAll complete") +} diff --git a/dax/mds/controller/stringers.go b/dax/mds/controller/stringers.go index fae28e92c..d47150470 100644 --- a/dax/mds/controller/stringers.go +++ b/dax/mds/controller/stringers.go @@ -13,11 +13,11 @@ import ( // used as a job in the Balancer. type pUnit struct { t dax.TableKey - p dax.VersionedPartition + p dax.PartitionNum } func (p pUnit) String() string { - return fmt.Sprintf("%s|part_%d", p.t, p.p.Num) + return fmt.Sprintf("%s|part_%d", p.t, p.p) } func (p pUnit) table() dax.TableKey { @@ -25,14 +25,14 @@ func (p pUnit) table() dax.TableKey { } func (p pUnit) partitionNum() dax.PartitionNum { - return p.p.Num + return p.p } -func partition(t dax.TableKey, p dax.VersionedPartition) pUnit { +func partition(t dax.TableKey, p dax.PartitionNum) pUnit { return pUnit{t, p} } -func partitions(t dax.TableKey, p ...dax.VersionedPartition) []pUnit { +func partitions(t dax.TableKey, p ...dax.PartitionNum) []pUnit { ret := make([]pUnit, 0, len(p)) for _, vp := range p { ret = append(ret, pUnit{t, vp}) @@ -57,10 +57,7 @@ func decodePartition(j dax.Job) (pUnit, error) { return pUnit{ t: dax.TableKey(parts[0]), - p: dax.VersionedPartition{ - Num: dax.PartitionNum(intVar), - Version: -1, - }, + p: dax.PartitionNum(intVar), }, nil } @@ -68,11 +65,11 @@ func decodePartition(j dax.Job) (pUnit, error) { // a job in the Balancer. type sUnit struct { t dax.TableKey - s dax.VersionedShard + s dax.ShardNum } func (s sUnit) String() string { - return fmt.Sprintf("%s|shard_%s", s.t, s.s.Num) + return fmt.Sprintf("%s|shard_%s", s.t, s.s) } func (s sUnit) table() dax.TableKey { @@ -80,10 +77,10 @@ func (s sUnit) table() dax.TableKey { } func (s sUnit) shardNum() dax.ShardNum { - return s.s.Num + return s.s } -func shard(t dax.TableKey, s dax.VersionedShard) sUnit { +func shard(t dax.TableKey, s dax.ShardNum) sUnit { return sUnit{t, s} } @@ -104,9 +101,6 @@ func decodeShard(j dax.Job) (sUnit, error) { return sUnit{ t: dax.TableKey(parts[0]), - s: dax.VersionedShard{ - Num: dax.ShardNum(uint64Var), - Version: -1, - }, + s: dax.ShardNum(uint64Var), }, nil } diff --git a/dax/mds/controller/types.go b/dax/mds/controller/types.go deleted file mode 100644 index 1c1ce4dbf..000000000 --- a/dax/mds/controller/types.go +++ /dev/null @@ -1,19 +0,0 @@ -package controller - -import "github.com/featurebasedb/featurebase/v3/dax" - -// ComputeNode represents a compute node and the table/shards for which it is -// responsible. -type ComputeNode struct { - Address dax.Address `json:"address"` - Table dax.TableKey `json:"table"` - Shards dax.ShardNums `json:"shards"` -} - -// TranslateNode represents a translate node and the table/partitions for which -// it is responsible. -type TranslateNode struct { - Address dax.Address `json:"address"` - Table dax.TableKey `json:"table"` - Partitions dax.PartitionNums `json:"partitions"` -} diff --git a/dax/mds/http/handler.go b/dax/mds/http/handler.go index 97686f85a..32b46caf9 100644 --- a/dax/mds/http/handler.go +++ b/dax/mds/http/handler.go @@ -7,7 +7,6 @@ import ( "github.com/gorilla/mux" "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/dax/mds" - "github.com/featurebasedb/featurebase/v3/dax/mds/controller" ) func Handler(mds *mds.MDS) http.Handler { @@ -613,7 +612,7 @@ type ComputeNodesRequest struct { // provided are not included in this response. That might happen if there are // currently no active compute nodes. type ComputeNodesResponse struct { - ComputeNodes []controller.ComputeNode `json:"compute-nodes"` + ComputeNodes []dax.ComputeNode `json:"compute-nodes"` } // POST /translate-nodes @@ -660,5 +659,5 @@ type TranslateNodesRequest struct { // that partitions provided are not included in this response. That might happen // if there are currently no active translate nodes. type TranslateNodesResponse struct { - TranslateNodes []controller.TranslateNode `json:"translate-nodes"` + TranslateNodes []dax.TranslateNode `json:"translate-nodes"` } diff --git a/dax/mds/mds.go b/dax/mds/mds.go index 28d300fd8..666fda0ce 100644 --- a/dax/mds/mds.go +++ b/dax/mds/mds.go @@ -29,6 +29,8 @@ type Config struct { // have been registered. RegistrationBatchTimeout time.Duration `toml:"registration-batch-timeout"` + SnappingTurtleTimeout time.Duration + // Poller PollInterval time.Duration `toml:"poll-interval"` @@ -107,6 +109,7 @@ func New(cfg Config) *MDS { TranslateBalancer: naiveboltdb.NewBalancer("translate", controllerDB, logr), RegistrationBatchTimeout: cfg.RegistrationBatchTimeout, + SnappingTurtleTimeout: cfg.SnappingTurtleTimeout, StorageMethod: cfg.StorageMethod, // just reusing this bolt for internal controller svcs @@ -462,7 +465,7 @@ func (m *MDS) DeregisterNodes(ctx context.Context, addrs ...dax.Address) error { // ComputeNodes gets the compute nodes responsible for the table/shards // specified in the ComputeNodeRequest. -func (m *MDS) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shardNums ...dax.ShardNum) ([]controller.ComputeNode, error) { +func (m *MDS) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shardNums ...dax.ShardNum) ([]dax.ComputeNode, error) { if err := m.sanitizeQTID(ctx, &qtid); err != nil { return nil, errors.Wrap(err, "sanitizing") } @@ -476,7 +479,7 @@ func (m *MDS) DebugNodes(ctx context.Context) ([]*dax.Node, error) { // TranslateNodes gets the translate nodes responsible for the table/partitions // specified in the TranslateNodeRequest. -func (m *MDS) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitionNums ...dax.PartitionNum) ([]controller.TranslateNode, error) { +func (m *MDS) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitionNums ...dax.PartitionNum) ([]dax.TranslateNode, error) { if err := m.sanitizeQTID(ctx, &qtid); err != nil { return nil, errors.Wrap(err, "sanitizing") } diff --git a/dax/mds/poller/poller.go b/dax/mds/poller/poller.go index 96ac34cbe..1676ff7b8 100644 --- a/dax/mds/poller/poller.go +++ b/dax/mds/poller/poller.go @@ -133,24 +133,21 @@ func (p *Poller) pollAll() { toRemove := []dax.Address{} for _, addr := range addrs { - p.logger.Debugf("polling: %s", addr) - start := time.Now() up := p.nodePoller.Poll(addr) if !up { p.logger.Printf("poller removing %s", addr) toRemove = append(toRemove, addr) } - p.logger.Debugf("done poll: %s, %s", addr, time.Since(start)) } if len(toRemove) > 0 { - p.logger.Debugf("removing addresses: %v", toRemove) + p.logger.Debugf("POLLER: removing addresses: %v", toRemove) start := time.Now() err := p.addressManager.RemoveAddresses(ctx, toRemove...) if err != nil { - p.logger.Printf("removing %s: %v", toRemove, err) + p.logger.Printf("POLLER: error removing %s: %v", toRemove, err) } - p.logger.Debugf("remove complete: %s", time.Since(start)) + p.logger.Debugf("POLLER removing %v complete: %s", toRemove, time.Since(start)) } } diff --git a/dax/mds/schemar/boltdb/schemar.go b/dax/mds/schemar/boltdb/schemar.go index a5862a785..14f3c433e 100644 --- a/dax/mds/schemar/boltdb/schemar.go +++ b/dax/mds/schemar/boltdb/schemar.go @@ -253,7 +253,7 @@ func (s *Schemar) tableIDByName(tx *boltdb.Tx, qual dax.TableQualifier, name dax } // Tables returns a list of Table for all existing tables. If one or more table -// names is provided, then only those will be included in the output. +// IDs is provided, then only those will be included in the output. func (s *Schemar) Tables(ctx context.Context, qual dax.TableQualifier, ids ...dax.TableID) ([]*dax.QualifiedTable, error) { tx, err := s.db.BeginTx(ctx, false) if err != nil { @@ -276,6 +276,12 @@ func (s *Schemar) getTables(ctx context.Context, tx *boltdb.Tx, qual dax.TableQu } prefix := []byte(fmt.Sprintf(prefixFmtTables, qual.OrganizationID, qual.DatabaseID)) + if qual.OrganizationID == "" && qual.DatabaseID == "" { + prefix = []byte(prefixTables) + } else if qual.DatabaseID == "" { + prefix = []byte(fmt.Sprintf(prefixFmtTablesOrg, qual.OrganizationID)) + } + for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() { if v == nil { s.logger.Printf("nil value for key: %s", k) @@ -346,7 +352,9 @@ func (s *Schemar) DropTable(ctx context.Context, qtid dax.QualifiedTableID) erro } const ( - prefixFmtTables = "tables/%s/%s/" + prefixTables = "tables/" + prefixFmtTablesOrg = prefixTables + "%s/" + prefixFmtTables = prefixFmtTablesOrg + "%s/" prefixFmtTableNames = "tablenames/%s/%s/" ) diff --git a/dax/mds/schemar/boltdb/schemar_test.go b/dax/mds/schemar/boltdb/schemar_test.go index aba271ebd..6af10d7bd 100644 --- a/dax/mds/schemar/boltdb/schemar_test.go +++ b/dax/mds/schemar/boltdb/schemar_test.go @@ -143,4 +143,38 @@ func TestSchemar(t *testing.T) { assert.NoError(t, err) assert.Equal(t, exp, tables) }) + + t.Run("GetTablesAll", func(t *testing.T) { + // get a fresh DB + db := testbolt.MustOpenDB(t) + defer testbolt.MustCloseDB(t, db) + + t.Cleanup(func() { + testbolt.CleanupDB(t, db.Path()) + }) + // Initialize the buckets. + assert.NoError(t, db.InitializeBuckets(boltdb.SchemarBuckets...)) + + s := boltdb.NewSchemar(db, logger.NopLogger) + + qtbl0 := daxtest.TestQualifiedTableWithID(t, qual, tableID0, tableName0, partitionN, false) + orgID2 := dax.OrganizationID("acme2") + qual2 := dax.NewTableQualifier(orgID2, dbID) + tableID2 := "3" + qtbl2 := daxtest.TestQualifiedTableWithID(t, qual2, tableID2, dax.TableName("two"), partitionN, false) + + assert.NoError(t, s.CreateTable(ctx, qtbl0)) + assert.NoError(t, s.CreateTable(ctx, qtbl2)) + + exp := []*dax.QualifiedTable{qtbl0, qtbl2} + + tables, err := s.Tables(ctx, dax.TableQualifier{}) + assert.NoError(t, err) + assert.Equal(t, exp, tables) + + tables, err = s.Tables(ctx, dax.TableQualifier{OrganizationID: orgID2}) + assert.NoError(t, err) + assert.Equal(t, []*dax.QualifiedTable{qtbl2}, tables) + + }) } diff --git a/dax/mds/schemar/schemar.go b/dax/mds/schemar/schemar.go index e49052863..7f365268c 100644 --- a/dax/mds/schemar/schemar.go +++ b/dax/mds/schemar/schemar.go @@ -13,6 +13,13 @@ type Schemar interface { CreateField(context.Context, dax.QualifiedTableID, *dax.Field) error DropField(context.Context, dax.QualifiedTableID, dax.FieldName) error Table(context.Context, dax.QualifiedTableID) (*dax.QualifiedTable, error) + + // Tables returns a list of tables. If the qualifiers DatabaseID + // is empty, all tables in the org will be returned. If the + // OrganizationID is empty, all tables will be returned. If both + // are populated, only tables in that databse will be returned. If + // greater than zero table IDs are passed in the third argument, + // only tables matching those IDs will be returned. Tables(context.Context, dax.TableQualifier, ...dax.TableID) ([]*dax.QualifiedTable, error) // TableID is a reverse-lookup method to get the TableID for a given diff --git a/dax/node.go b/dax/node.go index 8bc16790e..bd130ee95 100644 --- a/dax/node.go +++ b/dax/node.go @@ -29,6 +29,62 @@ type NodeService interface { Nodes(context.Context) ([]*Node, error) } +// ComputeNode represents a compute node and the table/shards for which it is +// responsible. +type ComputeNode struct { + Address Address `json:"address"` + Table TableKey `json:"table"` + Shards ShardNums `json:"shards"` +} + +// TranslateNode represents a translate node and the table/partitions for which +// it is responsible. +type TranslateNode struct { + Address Address `json:"address"` + Table TableKey `json:"table"` + Partitions PartitionNums `json:"partitions"` +} + +type Noder interface { + ComputeNodes(ctx context.Context, qtid QualifiedTableID, shards ...ShardNum) ([]ComputeNode, error) + TranslateNodes(ctx context.Context, qtid QualifiedTableID, partitions ...PartitionNum) ([]TranslateNode, error) + + // IngestPartition is effectively the "write" version of TranslateNodes. Its + // implementations will return the same Address that TranslateNodes would, + // but it includes the logic to create/assign the partition if it is not + // already being handled by a computer. + IngestPartition(ctx context.Context, qtid QualifiedTableID, partition PartitionNum) (Address, error) + + // IngestShard is effectively the "write" version of ComputeNodes. Its + // implementations will return the same Address that ComputeNodes would, but + // it includes the logic to create/assign the shard if it is not already + // being handled by a computer. + IngestShard(ctx context.Context, qtid QualifiedTableID, shard ShardNum) (Address, error) +} + +// Ensure type implements interface. +var _ Noder = &nopNoder{} + +// NopMDS is a no-op implementation of the MDS interface. +type nopNoder struct{} + +func NewNopNoder() *nopNoder { + return &nopNoder{} +} + +func (n *nopNoder) ComputeNodes(ctx context.Context, qtid QualifiedTableID, shards ...ShardNum) ([]ComputeNode, error) { + return nil, nil +} +func (n *nopNoder) IngestPartition(ctx context.Context, qtid QualifiedTableID, partition PartitionNum) (Address, error) { + return "", nil +} +func (n *nopNoder) IngestShard(ctx context.Context, qtid QualifiedTableID, shard ShardNum) (Address, error) { + return "", nil +} +func (n *nopNoder) TranslateNodes(ctx context.Context, qtid QualifiedTableID, partitions ...PartitionNum) ([]TranslateNode, error) { + return nil, nil +} + //////////////////////////////////////////////////// // Errors //////////////////////////////////////////////////// diff --git a/dax/queryer/featurebase_importer.go b/dax/queryer/featurebase_importer.go deleted file mode 100644 index e829a3d17..000000000 --- a/dax/queryer/featurebase_importer.go +++ /dev/null @@ -1,41 +0,0 @@ -package queryer - -import ( - "context" - - featurebase "github.com/featurebasedb/featurebase/v3" -) - -// Ensure type implements interface. -var _ Importer = &FeatureBaseImporter{} - -// FeatureBaseImporter is an implementation of the Importer interface which uses -// a pointer to a featurebase.API to make the underlying calls. This assumes -// those calls need to be Qcx aware, so this takes that into account. -type FeatureBaseImporter struct { - api *featurebase.API -} - -func NewFeatureBaseImporter(api *featurebase.API) *FeatureBaseImporter { - return &FeatureBaseImporter{ - api: api, - } -} - -func (fi *FeatureBaseImporter) CreateIndexKeys(ctx context.Context, index string, keys ...string) (map[string]uint64, error) { - return fi.api.CreateIndexKeys(ctx, index, keys...) -} - -func (fi *FeatureBaseImporter) CreateFieldKeys(ctx context.Context, index, field string, keys ...string) (map[string]uint64, error) { - return fi.api.CreateFieldKeys(ctx, index, field, keys...) -} - -func (fi *FeatureBaseImporter) Import(ctx context.Context, req *featurebase.ImportRequest, opts ...featurebase.ImportOption) error { - qcx := fi.api.Txf().NewQcx() - return fi.api.Import(ctx, qcx, req, opts...) -} - -func (fi *FeatureBaseImporter) ImportValue(ctx context.Context, req *featurebase.ImportValueRequest, opts ...featurebase.ImportOption) error { - qcx := fi.api.Txf().NewQcx() - return fi.api.ImportValue(ctx, qcx, req, opts...) -} diff --git a/dax/queryer/interfaces.go b/dax/queryer/interfaces.go deleted file mode 100644 index 3ecfc48cc..000000000 --- a/dax/queryer/interfaces.go +++ /dev/null @@ -1,78 +0,0 @@ -package queryer - -import ( - "context" - - featurebase "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/mds/controller" - "github.com/featurebasedb/featurebase/v3/dax/mds/schemar" -) - -type MDS interface { - // Controller-related methods. - ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.ShardNum) ([]controller.ComputeNode, error) - IngestPartition(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum) (dax.Address, error) - IngestShard(ctx context.Context, qtid dax.QualifiedTableID, shard dax.ShardNum) (dax.Address, error) - TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitions ...dax.PartitionNum) ([]controller.TranslateNode, error) - - // Schemar-related methods. - schemar.Schemar -} - -// Ensure type implements interface. -var _ MDS = &NopMDS{} - -// NopMDS is a no-op implementation of the MDS interface. -type NopMDS struct { - schemar.NopSchemar -} - -func NewNopMDS() *NopMDS { - return &NopMDS{} -} - -func (m *NopMDS) ComputeNodes(ctx context.Context, qtid dax.QualifiedTableID, shards ...dax.ShardNum) ([]controller.ComputeNode, error) { - return nil, nil -} -func (m *NopMDS) IngestPartition(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum) (dax.Address, error) { - return "", nil -} -func (m *NopMDS) IngestShard(ctx context.Context, qtid dax.QualifiedTableID, shard dax.ShardNum) (dax.Address, error) { - return "", nil -} -func (m *NopMDS) TranslateNodes(ctx context.Context, qtid dax.QualifiedTableID, partitions ...dax.PartitionNum) ([]controller.TranslateNode, error) { - return nil, nil -} - -type Importer interface { - CreateIndexKeys(ctx context.Context, index string, keys ...string) (map[string]uint64, error) - CreateFieldKeys(ctx context.Context, index, field string, keys ...string) (map[string]uint64, error) - Import(ctx context.Context, req *featurebase.ImportRequest, opts ...featurebase.ImportOption) error - ImportValue(ctx context.Context, req *featurebase.ImportValueRequest, opts ...featurebase.ImportOption) error -} - -// Ensure type implements interface. -var _ Importer = &NopImporter{} - -// NopImporter is a no-op implementation of the Importer interface. -type NopImporter struct{} - -func NewNopImporter() *NopImporter { - return &NopImporter{} -} - -func (n *NopImporter) CreateIndexKeys(ctx context.Context, index string, keys ...string) (map[string]uint64, error) { - return nil, nil -} - -func (n *NopImporter) CreateFieldKeys(ctx context.Context, index, field string, keys ...string) (map[string]uint64, error) { - return nil, nil -} - -func (n *NopImporter) Import(ctx context.Context, req *featurebase.ImportRequest, opts ...featurebase.ImportOption) error { - return nil -} -func (n *NopImporter) ImportValue(ctx context.Context, req *featurebase.ImportValueRequest, opts ...featurebase.ImportOption) error { - return nil -} diff --git a/dax/queryer/orchestrator.go b/dax/queryer/orchestrator.go index 0dedbbe9e..84a1979e3 100644 --- a/dax/queryer/orchestrator.go +++ b/dax/queryer/orchestrator.go @@ -11,8 +11,6 @@ import ( featurebase "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/mds/controller" - "github.com/featurebasedb/featurebase/v3/dax/mds/schemar" "github.com/featurebasedb/featurebase/v3/errors" "github.com/featurebasedb/featurebase/v3/logger" "github.com/featurebasedb/featurebase/v3/pql" @@ -43,22 +41,25 @@ const ( ) type Topologer interface { - ComputeNodes(ctx context.Context, index string, shards []uint64) ([]controller.ComputeNode, error) + ComputeNodes(ctx context.Context, index string, shards []uint64) ([]dax.ComputeNode, error) } type MDSTopology struct { - mds MDS + noder dax.Noder } -func (m *MDSTopology) ComputeNodes(ctx context.Context, index string, shards []uint64) ([]controller.ComputeNode, error) { +func (m *MDSTopology) ComputeNodes(ctx context.Context, index string, shards []uint64) ([]dax.ComputeNode, error) { var daxShards = make(dax.ShardNums, len(shards)) for i, s := range shards { daxShards[i] = dax.ShardNum(s) } + // TODO(tlt): this needs review; MDSTopology is converting from + // string/uint64 to qtid/shardNum?? Perhaps we can get rid of the Topologer + // interface altogether and replace it with dax.Noder. qtid := dax.TableKey(index).QualifiedTableID() - return m.mds.ComputeNodes(ctx, qtid, daxShards...) + return m.noder.ComputeNodes(ctx, qtid, daxShards...) } // TODO(jaffee) we need version info in here ASAP. whenever schema or topo @@ -79,7 +80,7 @@ type Translator interface { // executor recursively executes calls in a PQL query across all shards. type orchestrator struct { - schema featurebase.SchemaInfoAPI + schema featurebase.SchemaAPI topology Topologer trans Translator @@ -105,34 +106,29 @@ func emptyResult(c *pql.Call) interface{} { } // Execute executes a PQL query. -func (o *orchestrator) Execute(ctx context.Context, index string, q *pql.Query, shards []uint64, opt *featurebase.ExecOptions) (featurebase.QueryResponse, error) { +func (o *orchestrator) Execute(ctx context.Context, tableKeyer dax.TableKeyer, q *pql.Query, shards []uint64, opt *featurebase.ExecOptions) (featurebase.QueryResponse, error) { span, ctx := tracing.StartSpanFromContext(ctx, "orchestrator.Execute") span.LogKV("pql", q.String()) defer span.Finish() resp := featurebase.QueryResponse{} + qtbl, ok := tableKeyer.(*dax.QualifiedTable) + if !ok { + return resp, errors.New(errors.ErrUncoded, "orchestrator.Execute expects a dax.QualifiedTable") + } + // Check for query cancellation. if err := validateQueryContext(ctx); err != nil { return resp, err } - // Verify that an index is set. - if index == "" { - return resp, featurebase.ErrIndexRequired - } - - idx, err := o.schema.IndexInfo(ctx, index) - if err != nil { - return resp, errors.Wrap(err, "getting index") - } - // Default options. if opt == nil { opt = &featurebase.ExecOptions{} } - results, err := o.execute(ctx, index, q, shards, opt) + results, err := o.execute(ctx, tableKeyer, q, shards, opt) if err != nil { return resp, err } else if err := validateQueryContext(ctx); err != nil { @@ -140,7 +136,7 @@ func (o *orchestrator) Execute(ctx context.Context, index string, q *pql.Query, } resp.Results = results - if err := o.translateResults(ctx, index, idx, q.Calls, results, opt.MaxMemory); err != nil { + if err := o.translateResults(ctx, qtbl, q.Calls, results, opt.MaxMemory); err != nil { if errors.Cause(err) == featurebase.ErrTranslatingKeyNotFound { // No error - return empty result resp.Results = make([]interface{}, len(q.Calls)) @@ -157,10 +153,12 @@ func (o *orchestrator) Execute(ctx context.Context, index string, q *pql.Query, return resp, nil } -func (o *orchestrator) execute(ctx context.Context, index string, q *pql.Query, shards []uint64, opt *featurebase.ExecOptions) ([]interface{}, error) { +func (o *orchestrator) execute(ctx context.Context, tableKeyer dax.TableKeyer, q *pql.Query, shards []uint64, opt *featurebase.ExecOptions) ([]interface{}, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.execute") defer span.Finish() + index := string(tableKeyer.Key()) + // Apply translations if necessary. var colTranslations map[string]map[string]uint64 // colID := colTranslations[index][key] var rowTranslations map[string]map[string]map[string]uint64 // rowID := rowTranslations[index][field][key] @@ -182,7 +180,7 @@ func (o *orchestrator) execute(ctx context.Context, index string, q *pql.Query, // Apply call translation. if !opt.Remote && !opt.PreTranslated { - translated, err := o.translateCall(ctx, call, index, colTranslations, rowTranslations) + translated, err := o.translateCall(ctx, call, tableKeyer, colTranslations, rowTranslations) if err != nil { return nil, errors.Wrap(err, "translating call") } @@ -203,13 +201,13 @@ func (o *orchestrator) execute(ctx context.Context, index string, q *pql.Query, if call.Name == "Count" { // Handle count specially, skipping the level directly underneath it. for _, child := range call.Children { - err := o.handlePreCallChildren(ctx, index, child, shards, opt) + err := o.handlePreCallChildren(ctx, tableKeyer, child, shards, opt) if err != nil { return nil, err } } } else { - err := o.handlePreCallChildren(ctx, index, call, shards, opt) + err := o.handlePreCallChildren(ctx, tableKeyer, call, shards, opt) if err != nil { return nil, err } @@ -222,10 +220,11 @@ func (o *orchestrator) execute(ctx context.Context, index string, q *pql.Query, // already precomputed by handlePreCallChildren, though, // we don't need this logic in executeCall. newIndex := call.CallIndex() + newTableKeyer := dax.StringTableKeyer(newIndex) if newIndex != "" && newIndex != index { - v, err = o.executeCall(ctx, newIndex, call, nil, opt) + v, err = o.executeCall(ctx, newTableKeyer, call, nil, opt) } else { - v, err = o.executeCall(ctx, index, call, shards, opt) + v, err = o.executeCall(ctx, tableKeyer, call, shards, opt) } if err != nil { return nil, err @@ -248,7 +247,9 @@ func (o *orchestrator) execute(ctx context.Context, index string, q *pql.Query, // handlePreCalls traverses the call tree looking for calls that need // precomputed values (e.g. Distinct, UnionRows, ConstRow...). -func (o *orchestrator) handlePreCalls(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) error { +func (o *orchestrator) handlePreCalls(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) error { + index := string(tableKeyer.Key()) + if c.Name == "Precomputed" { idx := c.Args["valueidx"].(int64) if idx >= 0 && idx < int64(len(opt.EmbeddedData)) { @@ -285,10 +286,11 @@ func (o *orchestrator) handlePreCalls(ctx context.Context, index string, c *pql. if newIndex != "" && newIndex != index { c.Type = pql.PrecallGlobal index = newIndex + tableKeyer = dax.StringTableKeyer(index) // we need to recompute shards, then shards = nil } - if err := o.handlePreCallChildren(ctx, index, c, shards, opt); err != nil { + if err := o.handlePreCallChildren(ctx, tableKeyer, c, shards, opt); err != nil { return err } // child calls already handled, no precall for this, so we're done @@ -304,7 +306,7 @@ func (o *orchestrator) handlePreCalls(ctx context.Context, index string, c *pql. // We set c to look like a normal call, and actually execute it: c.Type = pql.PrecallNone // possibly override call index. - v, err := o.executeCall(ctx, index, c, shards, opt) + v, err := o.executeCall(ctx, tableKeyer, c, shards, opt) if err != nil { return err } @@ -347,12 +349,12 @@ func (o *orchestrator) dumpPrecomputedCalls(ctx context.Context, c *pql.Call) { } // handlePreCallChildren handles any pre-calls in the children of a given call. -func (o *orchestrator) handlePreCallChildren(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) error { +func (o *orchestrator) handlePreCallChildren(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) error { for i := range c.Children { if err := ctx.Err(); err != nil { return err } - if err := o.handlePreCalls(ctx, index, c.Children[i], shards, opt); err != nil { + if err := o.handlePreCalls(ctx, tableKeyer, c.Children[i], shards, opt); err != nil { return err } } @@ -366,7 +368,7 @@ func (o *orchestrator) handlePreCallChildren(ctx context.Context, index string, if err := ctx.Err(); err != nil { return err } - if err := o.handlePreCalls(ctx, index, call, shards, opt); err != nil { + if err := o.handlePreCalls(ctx, tableKeyer, call, shards, opt); err != nil { return err } } @@ -375,7 +377,7 @@ func (o *orchestrator) handlePreCallChildren(ctx context.Context, index string, } // preprocessQuery expands any calls that need preprocessing. -func (o *orchestrator) preprocessQuery(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*pql.Call, error) { +func (o *orchestrator) preprocessQuery(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*pql.Call, error) { switch c.Name { case "All": _, hasLimit, err := c.UintArg("limit") @@ -404,7 +406,7 @@ func (o *orchestrator) preprocessQuery(ctx context.Context, index string, c *pql out := make([]*pql.Call, len(c.Children)) var changed bool for i, child := range c.Children { - res, err := o.preprocessQuery(ctx, index, child, shards, opt) + res, err := o.preprocessQuery(ctx, tableKeyer, child, shards, opt) if err != nil { return nil, err } @@ -422,7 +424,7 @@ func (o *orchestrator) preprocessQuery(ctx context.Context, index string, c *pql } // executeCall executes a call. -func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { +func (o *orchestrator) executeCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeCall") defer span.Finish() @@ -431,7 +433,7 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal } else if err := o.validateCallArgs(c); err != nil { return nil, errors.Wrap(err, "validating args") } - indexTag := "index:" + index + indexTag := "index:" + string(tableKeyer.Key()) metricName := "query_" + strings.ToLower(c.Name) + "_total" statFn := func() { if !opt.Remote { @@ -440,7 +442,7 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal } // Preprocess the query. - c, err := o.preprocessQuery(ctx, index, c, shards, opt) + c, err := o.preprocessQuery(ctx, tableKeyer, c, shards, opt) if err != nil { return nil, err } @@ -448,23 +450,23 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal switch c.Name { case "Sum": statFn() - res, err := o.executeSum(ctx, index, c, shards, opt) + res, err := o.executeSum(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeSum") case "Min": statFn() - res, err := o.executeMin(ctx, index, c, shards, opt) + res, err := o.executeMin(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeMin") case "Max": statFn() - res, err := o.executeMax(ctx, index, c, shards, opt) + res, err := o.executeMax(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeMax") case "MinRow": statFn() - res, err := o.executeMinRow(ctx, index, c, shards, opt) + res, err := o.executeMinRow(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeMinRow") case "MaxRow": statFn() - res, err := o.executeMaxRow(ctx, index, c, shards, opt) + res, err := o.executeMaxRow(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeMaxRow") // case "Clear": // statFn() @@ -476,7 +478,7 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal // return res, errors.Wrap(err, "executeClearRow") case "Distinct": statFn() - res, err := o.executeDistinct(ctx, index, c, shards, opt) + res, err := o.executeDistinct(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeDistinct") // case "Store": // statFn() @@ -484,7 +486,7 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal // return res, errors.Wrap(err, "executeSetRow") case "Count": statFn() - res, err := o.executeCount(ctx, index, c, shards, opt) + res, err := o.executeCount(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeCount") // case "Set": // statFn() @@ -492,49 +494,49 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal // return res, errors.Wrap(err, "executeSet") case "TopK": statFn() - res, err := o.executeTopK(ctx, index, c, shards, opt) + res, err := o.executeTopK(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeTopK") case "TopN": statFn() - res, err := o.executeTopN(ctx, index, c, shards, opt) + res, err := o.executeTopN(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeTopN") case "Rows": statFn() - res, err := o.executeRows(ctx, index, c, shards, opt) + res, err := o.executeRows(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeRows") case "Extract": statFn() - res, err := o.executeExtract(ctx, index, c, shards, opt) + res, err := o.executeExtract(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeExtract") case "GroupBy": statFn() - res, err := o.executeGroupBy(ctx, index, c, shards, opt) + res, err := o.executeGroupBy(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeGroupBy") case "Options": statFn() - res, err := o.executeOptionsCall(ctx, index, c, shards, opt) + res, err := o.executeOptionsCall(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeOptionsCall") case "IncludesColumn": - res, err := o.executeIncludesColumnCall(ctx, index, c, shards, opt) + res, err := o.executeIncludesColumnCall(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeIncludesColumnCall") case "FieldValue": statFn() - res, err := o.executeFieldValueCall(ctx, index, c, shards, opt) + res, err := o.executeFieldValueCall(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeFieldValueCall") case "Precomputed": - res, err := o.executePrecomputedCall(ctx, index, c, shards, opt) + res, err := o.executePrecomputedCall(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executePrecomputedCall") case "UnionRows": - res, err := o.executeUnionRows(ctx, index, c, shards, opt) + res, err := o.executeUnionRows(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeUnionRows") case "ConstRow": - res, err := o.executeConstRow(ctx, index, c) + res, err := o.executeConstRow(ctx, tableKeyer, c) return res, errors.Wrap(err, "executeConstRow") case "Limit": - res, err := o.executeLimitCall(ctx, index, c, shards, opt) + res, err := o.executeLimitCall(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeLimitCall") case "Percentile": - res, err := o.executePercentile(ctx, index, c, shards, opt) + res, err := o.executePercentile(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executePercentile") // case "Delete": // statFn() //TODO(twg) need this? @@ -542,7 +544,7 @@ func (o *orchestrator) executeCall(ctx context.Context, index string, c *pql.Cal // return res, errors.Wrap(err, "executeDelete") default: // o.g. "Row", "Union", "Intersect" or anything that returns a bitmap. statFn() - res, err := o.executeBitmapCall(ctx, index, c, shards, opt) + res, err := o.executeBitmapCall(ctx, tableKeyer, c, shards, opt) return res, errors.Wrap(err, "executeBitmapCall") } } @@ -566,7 +568,7 @@ func (o *orchestrator) validateCallArgs(c *pql.Call) error { return nil } -func (o *orchestrator) executeOptionsCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { +func (o *orchestrator) executeOptionsCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeOptionsCall") defer span.Finish() @@ -587,11 +589,11 @@ func (o *orchestrator) executeOptionsCall(ctx context.Context, index string, c * return nil, errors.New(errors.ErrUncoded, "Query(): shards must be a list of unsigned integers") } } - return o.executeCall(ctx, index, c.Children[0], shards, optCopy) + return o.executeCall(ctx, tableKeyer, c.Children[0], shards, optCopy) } // executeIncludesColumnCall executes an IncludesColumn() call. -func (o *orchestrator) executeIncludesColumnCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (bool, error) { +func (o *orchestrator) executeIncludesColumnCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (bool, error) { // Get the shard containing the column, since that's the only // shard that needs to execute this query. var shard uint64 @@ -609,7 +611,7 @@ func (o *orchestrator) executeIncludesColumnCall(ctx context.Context, index stri return other || v.(bool) } - result, err := o.mapReduce(ctx, index, []uint64{shard}, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, []uint64{shard}, c, opt, reduceFn) if err != nil { return false, err } @@ -617,7 +619,7 @@ func (o *orchestrator) executeIncludesColumnCall(ctx context.Context, index stri } // executeFieldValueCall executes a FieldValue() call. -func (o *orchestrator) executeFieldValueCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { +func (o *orchestrator) executeFieldValueCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { fieldName, ok := c.Args["field"].(string) if !ok || fieldName == "" { return featurebase.ValCount{}, featurebase.ErrFieldRequired @@ -644,7 +646,7 @@ func (o *orchestrator) executeFieldValueCall(ctx context.Context, index string, return v } - result, err := o.mapReduce(ctx, index, []uint64{shard}, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, []uint64{shard}, c, opt, reduceFn) if err != nil { return featurebase.ValCount{}, errors.Wrap(err, "map reduce") } @@ -654,7 +656,7 @@ func (o *orchestrator) executeFieldValueCall(ctx context.Context, index string, } // executeLimitCall executes a Limit() call. -func (o *orchestrator) executeLimitCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.Row, error) { +func (o *orchestrator) executeLimitCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.Row, error) { bitmapCall := c.Children[0] limit, hasLimit, err := c.UintArg("limit") @@ -671,7 +673,7 @@ func (o *orchestrator) executeLimitCall(ctx context.Context, index string, c *pq } // Execute bitmap call, storing the full result on this node. - res, err := o.executeCall(ctx, index, bitmapCall, shards, opt) + res, err := o.executeCall(ctx, tableKeyer, bitmapCall, shards, opt) if err != nil { return nil, errors.Wrap(err, "limit map reduce") } @@ -730,7 +732,7 @@ func (o *orchestrator) executeLimitCall(ctx context.Context, index string, c *pq } // executeSum executes a Sum() call. -func (o *orchestrator) executeSum(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { +func (o *orchestrator) executeSum(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeSum") defer span.Finish() @@ -749,7 +751,7 @@ func (o *orchestrator) executeSum(ctx context.Context, index string, c *pql.Call return other.Add(v.(featurebase.ValCount)) } - result, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return featurebase.ValCount{}, err } @@ -762,7 +764,7 @@ func (o *orchestrator) executeSum(ctx context.Context, index string, c *pql.Call // scale summed response if it's a decimal field and this is // not a remote query (we're about to return to original client). if !opt.Remote { - field, err := o.schema.FieldInfo(ctx, index, fieldName) + field, err := o.schemaFieldInfo(ctx, tableKeyer, fieldName) if field == nil { return featurebase.ValCount{}, errors.Wrapf(err, "%q", fieldName) } @@ -779,7 +781,7 @@ func (o *orchestrator) executeSum(ctx context.Context, index string, c *pql.Call // executeDistinct executes a Distinct call on a field. It returns a // SignedRow for int fields and a *Row for set/mutex/time fields. -func (o *orchestrator) executeDistinct(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { +func (o *orchestrator) executeDistinct(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeDistinct") defer span.Finish() @@ -814,7 +816,7 @@ func (o *orchestrator) executeDistinct(ctx context.Context, index string, c *pql } } - result, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return nil, errors.Wrap(err, "mapReduce") } @@ -826,7 +828,7 @@ func (o *orchestrator) executeDistinct(ctx context.Context, index string, c *pql } // executeMin executes a Min() call. -func (o *orchestrator) executeMin(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { +func (o *orchestrator) executeMin(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeMin") defer span.Finish() @@ -844,7 +846,7 @@ func (o *orchestrator) executeMin(ctx context.Context, index string, c *pql.Call return other.Smaller(v.(featurebase.ValCount)) } - result, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return featurebase.ValCount{}, err } @@ -857,7 +859,7 @@ func (o *orchestrator) executeMin(ctx context.Context, index string, c *pql.Call } // executeMax executes a Max() call. -func (o *orchestrator) executeMax(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { +func (o *orchestrator) executeMax(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeMax") defer span.Finish() @@ -875,7 +877,7 @@ func (o *orchestrator) executeMax(ctx context.Context, index string, c *pql.Call return other.Larger(v.(featurebase.ValCount)) } - result, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return featurebase.ValCount{}, err } @@ -889,7 +891,7 @@ func (o *orchestrator) executeMax(ctx context.Context, index string, c *pql.Call // TODO(jaffee) fix this... valcountize assumes access to field details like base // executePercentile executes a Percentile() call. -func (o *orchestrator) executePercentile(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { +func (o *orchestrator) executePercentile(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ featurebase.ValCount, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executePercentile") defer span.Finish() @@ -916,7 +918,7 @@ func (o *orchestrator) executePercentile(ctx context.Context, index string, c *p if err != nil { return featurebase.ValCount{}, errors.New(errors.ErrUncoded, "Percentile(): field required") } - field, err := o.schema.FieldInfo(ctx, index, fieldName) + field, err := o.schemaFieldInfo(ctx, tableKeyer, fieldName) if err != nil { return featurebase.ValCount{}, ErrFieldNotFound } @@ -935,7 +937,7 @@ func (o *orchestrator) executePercentile(ctx context.Context, index string, c *p if filterCall != nil { minCall.Children = append(minCall.Children, filterCall) } - minVal, err := o.executeMin(ctx, index, minCall, shards, opt) + minVal, err := o.executeMin(ctx, tableKeyer, minCall, shards, opt) if err != nil { return featurebase.ValCount{}, errors.Wrap(err, "executing Min call for Percentile") } @@ -949,7 +951,7 @@ func (o *orchestrator) executePercentile(ctx context.Context, index string, c *p if filterCall != nil { maxCall.Children = append(maxCall.Children, filterCall) } - maxVal, err := o.executeMax(ctx, index, maxCall, shards, opt) + maxVal, err := o.executeMax(ctx, tableKeyer, maxCall, shards, opt) if err != nil { return featurebase.ValCount{}, errors.Wrap(err, "executing Max call for Percentile") } @@ -981,7 +983,7 @@ func (o *orchestrator) executePercentile(ctx context.Context, index string, c *p Op: pql.Token(pql.LT), Value: possibleNthVal, } - leftCountUint64, err := o.executeCount(ctx, index, countCall, shards, opt) + leftCountUint64, err := o.executeCount(ctx, tableKeyer, countCall, shards, opt) if err != nil { return featurebase.ValCount{}, errors.Wrap(err, "executing Count call L for Percentile") } @@ -992,7 +994,7 @@ func (o *orchestrator) executePercentile(ctx context.Context, index string, c *p Op: pql.Token(pql.GT), Value: possibleNthVal, } - rightCountUint64, err := o.executeCount(ctx, index, countCall, shards, opt) + rightCountUint64, err := o.executeCount(ctx, tableKeyer, countCall, shards, opt) if err != nil { return featurebase.ValCount{}, errors.Wrap(err, "executing Count call R for Percentile") } @@ -1029,7 +1031,7 @@ func cookValCount(val int64, cnt uint64, field *featurebase.FieldInfo) featureba } // executeMinRow executes a MinRow() call. -func (o *orchestrator) executeMinRow(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ interface{}, err error) { +func (o *orchestrator) executeMinRow(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ interface{}, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeMinRow") defer span.Finish() @@ -1059,11 +1061,11 @@ func (o *orchestrator) executeMinRow(ctx context.Context, index string, c *pql.C return vp } - return o.mapReduce(ctx, index, shards, c, opt, reduceFn) + return o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) } // executeMaxRow executes a MaxRow() call. -func (o *orchestrator) executeMaxRow(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ interface{}, err error) { +func (o *orchestrator) executeMaxRow(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ interface{}, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeMaxRow") defer span.Finish() @@ -1093,11 +1095,11 @@ func (o *orchestrator) executeMaxRow(ctx context.Context, index string, c *pql.C return vp } - return o.mapReduce(ctx, index, shards, c, opt, reduceFn) + return o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) } // executePrecomputedCall pretends to execute a call that we have a precomputed value for. -func (o *orchestrator) executePrecomputedCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ *featurebase.Row, err error) { +func (o *orchestrator) executePrecomputedCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ *featurebase.Row, err error) { span, _ := tracing.StartSpanFromContext(ctx, "Executor.executePrecomputedCall") defer span.Finish() result := featurebase.NewRow() @@ -1109,13 +1111,12 @@ func (o *orchestrator) executePrecomputedCall(ctx context.Context, index string, } // executeBitmapCall executes a call that returns a bitmap. -func (o *orchestrator) executeBitmapCall(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ *featurebase.Row, err error) { - +func (o *orchestrator) executeBitmapCall(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (_ *featurebase.Row, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeBitmapCall") span.LogKV("pqlCallName", c.Name) defer span.Finish() - indexTag := "index:" + index + indexTag := "index:" + string(tableKeyer.Key()) metricName := "query_" + strings.ToLower(c.Name) + "_total" if c.Name == "Row" && c.HasConditionArg() { metricName = "query_row_bsi_total" @@ -1138,7 +1139,7 @@ func (o *orchestrator) executeBitmapCall(ctx context.Context, index string, c *p return other } - other, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + other, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return nil, errors.Wrap(err, "map reduce") } @@ -1155,7 +1156,7 @@ func (e Error) Error() string { return string(e) } const ViewNotFound = Error("view not found") const FragmentNotFound = Error("fragment not found") -func (o *orchestrator) executeTopK(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { +func (o *orchestrator) executeTopK(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (interface{}, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeTopK") defer span.Finish() @@ -1165,7 +1166,7 @@ func (o *orchestrator) executeTopK(ctx context.Context, index string, c *pql.Cal return ([]*featurebase.Row)(featurebase.AddBSI(x, y)) } - other, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + other, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return nil, err } @@ -1218,7 +1219,7 @@ func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] } // executeTopN executes a TopN() call. // This first performs the TopN() to determine the top results and then // requeries to retrieve the full counts for each of the top results. -func (o *orchestrator) executeTopN(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.PairsField, error) { +func (o *orchestrator) executeTopN(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.PairsField, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeTopN") defer span.Finish() @@ -1234,7 +1235,7 @@ func (o *orchestrator) executeTopN(ctx context.Context, index string, c *pql.Cal } // Execute original query. - pairs, err := o.executeTopNShards(ctx, index, c, shards, opt) + pairs, err := o.executeTopNShards(ctx, tableKeyer, c, shards, opt) if err != nil { return nil, errors.Wrap(err, "finding top results") } @@ -1255,7 +1256,7 @@ func (o *orchestrator) executeTopN(ctx context.Context, index string, c *pql.Cal sort.Sort(uint64Slice(ids)) other.Args["ids"] = ids - trimmedList, err := o.executeTopNShards(ctx, index, other, shards, opt) + trimmedList, err := o.executeTopNShards(ctx, tableKeyer, other, shards, opt) if err != nil { return nil, errors.Wrap(err, "retrieving full counts") } @@ -1270,7 +1271,7 @@ func (o *orchestrator) executeTopN(ctx context.Context, index string, c *pql.Cal }, nil } -func (o *orchestrator) executeTopNShards(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.PairsField, error) { +func (o *orchestrator) executeTopNShards(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.PairsField, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeTopNShards") defer span.Finish() @@ -1290,7 +1291,7 @@ func (o *orchestrator) executeTopNShards(ctx context.Context, index string, c *p return other } - other, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + other, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return nil, err } @@ -1401,9 +1402,10 @@ func findGroupCounts(v interface{}) []featurebase.GroupCount { return nil } -func (o *orchestrator) executeGroupBy(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.GroupCounts, error) { +func (o *orchestrator) executeGroupBy(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.GroupCounts, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeGroupBy") defer span.Finish() + // validate call if len(c.Children) == 0 { return nil, errors.New(errors.ErrUncoded, "need at least one child call") @@ -1477,7 +1479,7 @@ func (o *orchestrator) executeGroupBy(ctx context.Context, index string, c *pql. continue } - r, er := o.executeRows(ctx, index, child, shards, opt) + r, er := o.executeRows(ctx, tableKeyer, child, shards, opt) if er != nil { return nil, errors.Wrap(er, "getting rows for ") } @@ -1506,7 +1508,7 @@ func (o *orchestrator) executeGroupBy(ctx context.Context, index string, c *pql. return mergeGroupCounts(other, findGroupCounts(v), limit) } // Get full result set. - other, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + other, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return nil, errors.Wrap(err, "mapReduce") } @@ -1566,7 +1568,7 @@ func (o *orchestrator) executeGroupBy(ctx context.Context, index string, c *pql. } opt.PreTranslated = true - aggregateCount, err := o.execute(ctx, index, &pql.Query{Calls: []*pql.Call{countDistinctIntersect}}, []uint64{}, opt) + aggregateCount, err := o.execute(ctx, tableKeyer, &pql.Query{Calls: []*pql.Call{countDistinctIntersect}}, []uint64{}, opt) if err != nil { return nil, err } @@ -1685,7 +1687,7 @@ func mergeGroupCounts(a, b []featurebase.GroupCount, limit int) []featurebase.Gr return ret } -func (o *orchestrator) executeRows(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (featurebase.RowIDs, error) { +func (o *orchestrator) executeRows(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (featurebase.RowIDs, error) { // Fetch field name from argument. // Check "field" first for backwards compatibility. // TODO: remove at Pilosa 2.0 @@ -1739,7 +1741,7 @@ func (o *orchestrator) executeRows(ctx context.Context, index string, c *pql.Cal return other.Merge(v.(featurebase.RowIDs), limit) } // Get full result set. - other, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + other, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return nil, err } @@ -1777,7 +1779,7 @@ func (o *orchestrator) executeRows(ctx context.Context, index string, c *pql.Cal return results, nil } -func (o *orchestrator) executeExtract(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (featurebase.ExtractedIDMatrix, error) { +func (o *orchestrator) executeExtract(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (featurebase.ExtractedIDMatrix, error) { // Extract the column filter call. if len(c.Children) < 1 { return featurebase.ExtractedIDMatrix{}, errors.New(errors.ErrUncoded, "missing column filter in Extract") @@ -1817,7 +1819,7 @@ func (o *orchestrator) executeExtract(ctx context.Context, index string, c *pql. } // Get full result set. - other, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + other, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return featurebase.ExtractedIDMatrix{}, err } @@ -1828,7 +1830,7 @@ func (o *orchestrator) executeExtract(ctx context.Context, index string, c *pql. return results, nil } -func (o *orchestrator) executeConstRow(ctx context.Context, index string, c *pql.Call) (res *featurebase.Row, err error) { +func (o *orchestrator) executeConstRow(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call) (res *featurebase.Row, err error) { // Fetch user-provided columns list. ids, ok := c.Args["columns"].([]uint64) if !ok { @@ -1838,7 +1840,7 @@ func (o *orchestrator) executeConstRow(ctx context.Context, index string, c *pql return featurebase.NewRow(ids...), nil } -func (o *orchestrator) executeUnionRows(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.Row, error) { +func (o *orchestrator) executeUnionRows(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (*featurebase.Row, error) { // Turn UnionRows(Rows(...)) into Union(Row(...), ...). var rows []*pql.Call for _, child := range c.Children { @@ -1851,7 +1853,7 @@ func (o *orchestrator) executeUnionRows(ctx context.Context, index string, c *pq } // Execute the call. - rowsResult, err := o.executeCall(ctx, index, child, shards, opt) + rowsResult, err := o.executeCall(ctx, tableKeyer, child, shards, opt) if err != nil { return nil, err } @@ -1918,11 +1920,11 @@ func (o *orchestrator) executeUnionRows(ctx context.Context, index string, c *pq } // Execute the generated Union() call. - return o.executeBitmapCall(ctx, index, c, shards, opt) + return o.executeBitmapCall(ctx, tableKeyer, c, shards, opt) } // executeCount executes a count() call. -func (o *orchestrator) executeCount(ctx context.Context, index string, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (uint64, error) { +func (o *orchestrator) executeCount(ctx context.Context, tableKeyer dax.TableKeyer, c *pql.Call, shards []uint64, opt *featurebase.ExecOptions) (uint64, error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.executeCount") defer span.Finish() @@ -1936,7 +1938,7 @@ func (o *orchestrator) executeCount(ctx context.Context, index string, c *pql.Ca // If the child is distinct/similar, execute it directly here and count the result. if child.Type == pql.PrecallGlobal { - result, err := o.executeCall(ctx, index, child, shards, opt) + result, err := o.executeCall(ctx, tableKeyer, child, shards, opt) if err != nil { return 0, err } @@ -1959,7 +1961,7 @@ func (o *orchestrator) executeCount(ctx context.Context, index string, c *pql.Ca return other + v.(uint64) } - result, err := o.mapReduce(ctx, index, shards, c, opt, reduceFn) + result, err := o.mapReduce(ctx, tableKeyer, shards, c, opt, reduceFn) if err != nil { return 0, err } @@ -1997,10 +1999,12 @@ func (o *orchestrator) remoteExec(ctx context.Context, node dax.Address, index s // mapReduce has to ensure that it never returns before any work it spawned has // terminated. It's not enough to cancel the jobs; we have to wait for them to be // done, or we can unmap resources they're still using. -func (o *orchestrator) mapReduce(ctx context.Context, index string, shards []uint64, c *pql.Call, opt *featurebase.ExecOptions, reduceFn reduceFunc) (result interface{}, err error) { +func (o *orchestrator) mapReduce(ctx context.Context, tableKeyer dax.TableKeyer, shards []uint64, c *pql.Call, opt *featurebase.ExecOptions, reduceFn reduceFunc) (result interface{}, err error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.mapReduce") defer span.Finish() + index := string(tableKeyer.Key()) + ch := make(chan mapResponse) // Wrap context with a cancel to kill goroutines on exit. @@ -2107,7 +2111,7 @@ func makeEmbeddedDataForShards(allRows []*featurebase.Row, shards []uint64) []*f return newRows } -func (o *orchestrator) mapper(ctx context.Context, eg *errgroup.Group, ch chan mapResponse, index string, nodes []controller.ComputeNode, c *pql.Call, opt *featurebase.ExecOptions, reduceFn reduceFunc) (reterr error) { +func (o *orchestrator) mapper(ctx context.Context, eg *errgroup.Group, ch chan mapResponse, index string, nodes []dax.ComputeNode, c *pql.Call, opt *featurebase.ExecOptions, reduceFn reduceFunc) (reterr error) { span, ctx := tracing.StartSpanFromContext(ctx, "Executor.mapper") defer span.Finish() @@ -2517,13 +2521,31 @@ func fieldValidateValue(f *featurebase.FieldInfo, val interface{}) error { return nil } -func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index string, columnKeys map[string]map[string]uint64, rowKeys map[string]map[string]map[string]uint64) (*pql.Call, error) { +func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, tableKeyer dax.TableKeyer, columnKeys map[string]map[string]uint64, rowKeys map[string]map[string]map[string]uint64) (*pql.Call, error) { + index := string(tableKeyer.Key()) + // Check for an overriding 'index' argument. // This also applies to all child calls. if callIndex := c.CallIndex(); callIndex != "" { index = callIndex + // TODO(tlt): checking for prefix like this is bad form. Ideally, the + // argument stored in the Call.Args map would be of type TableKey + // (currently they are restricted to type: string). In that case we + // could just pass it through without doing this conversion. (This would + // require changing the logic in Queryer.convertIndex() to set "index" + // to a TableKeyer). + if strings.HasPrefix(index, dax.PrefixTable+dax.TableKeyDelimiter) { + qtid, err := dax.QualifiedTableIDFromKey(index) + if err != nil { + return nil, errors.Wrapf(err, "getting qtid from key: %s", index) + } + tableKeyer = qtid + } else { + tableKeyer = dax.StringTableKeyer(index) + } } - idx, err := o.schema.IndexInfo(ctx, index) + + idx, err := o.schemaIndexInfo(ctx, tableKeyer) if err != nil { return nil, errors.Wrapf(err, "translating query on index %q", index) } @@ -2535,7 +2557,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str switch c.Name { case "Set", "Store": if field, err := c.FieldArg(); err == nil { - f, err := o.schema.FieldInfo(ctx, index, field) + f, err := o.schemaFieldInfo(ctx, tableKeyer, field) if err != nil { return nil, errors.Wrapf(err, "validating value for field %q", field) } @@ -2561,7 +2583,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str case "Clear", "Row", "Range", "ClearRow": if field, err := c.FieldArg(); err == nil { - f, err := o.schema.FieldInfo(ctx, index, field) + f, err := o.schemaFieldInfo(ctx, tableKeyer, field) if err != nil { return nil, errors.Wrapf(err, "validating value for field %q", field) } @@ -2648,7 +2670,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str return nil, errors.New(errors.ErrUncoded, "missing field") } - f, err := o.schema.FieldInfo(ctx, index, field) + f, err := o.schemaFieldInfo(ctx, tableKeyer, field) if err != nil { return nil, errors.Wrapf(err, "validating value for field %q", field) } @@ -2718,7 +2740,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str // Translate the previous row key. if prev, ok := c.Args["previous"]; ok { // Validate the type. - f, err := o.schema.FieldInfo(ctx, index, field) + f, err := o.schemaFieldInfo(ctx, tableKeyer, field) if err != nil { return nil, errors.Wrapf(err, "validating value for field %q", field) } @@ -2749,7 +2771,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str if err != nil || fieldName == "" { return nil, fmt.Errorf("cannot read field name for Rows call") } - if f, err := o.schema.FieldInfo(ctx, index, fieldName); err != nil { + if f, err := o.schemaFieldInfo(ctx, tableKeyer, fieldName); err != nil { return nil, errors.Wrapf(err, "getting field %q", fieldName) } else if !f.Options.Keys { return nil, fmt.Errorf("'%s' is not a set/mutex/time field with a string key", fieldName) @@ -2778,7 +2800,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str // Translate child calls. for i, child := range c.Children { - translated, err := o.translateCall(ctx, child, index, columnKeys, rowKeys) + translated, err := o.translateCall(ctx, child, tableKeyer, columnKeys, rowKeys) if err != nil { return nil, err } @@ -2792,7 +2814,7 @@ func (o *orchestrator) translateCall(ctx context.Context, c *pql.Call, index str continue } - translated, err := o.translateCall(ctx, argCall, index, columnKeys, rowKeys) + translated, err := o.translateCall(ctx, argCall, tableKeyer, columnKeys, rowKeys) if err != nil { return nil, err } @@ -2823,10 +2845,12 @@ func (o *orchestrator) callZero(c *pql.Call) *pql.Call { } } -func (o *orchestrator) translateResults(ctx context.Context, index string, idx *featurebase.IndexInfo, calls []*pql.Call, results []interface{}, memoryAvailable int64) (err error) { +func (o *orchestrator) translateResults(ctx context.Context, qtbl *dax.QualifiedTable, calls []*pql.Call, results []interface{}, memoryAvailable int64) (err error) { span, _ := tracing.StartSpanFromContext(ctx, "Executor.translateResults") defer span.Finish() + idx := featurebase.TableToIndexInfo(&qtbl.Table) + idMap := make(map[uint64]string) if idx.Options.Keys { // Collect all index ids. @@ -2836,13 +2860,13 @@ func (o *orchestrator) translateResults(ctx context.Context, index string, idx * return err } } - if idMap, err = o.trans.TranslateIndexIDSet(ctx, index, idSet); err != nil { + if idMap, err = o.trans.TranslateIndexIDSet(ctx, string(qtbl.Key()), idSet); err != nil { return err } } for i := range results { - results[i], err = o.translateResult(ctx, idx, calls[i], results[i], idMap) + results[i], err = o.translateResult(ctx, qtbl, calls[i], results[i], idMap) if err != nil { return err } @@ -2884,13 +2908,13 @@ func (o *orchestrator) howToTranslate(ctx context.Context, idx *featurebase.Inde // First get the index and field the row specifies (if any). rowIdx = idx if row.Index != "" && row.Index != idx.Name { - rowIdx, err = o.schema.IndexInfo(ctx, row.Index) + rowIdx, err = o.schemaIndexInfo(ctx, dax.TableKey(row.Index)) if err != nil { return nil, nil, 0, errors.Wrapf(err, "got a row with unknown index: %s", row.Index) } } if row.Field != "" { - rowField, err = o.schema.FieldInfo(ctx, row.Index, row.Field) + rowField, err = o.schemaFieldInfo(ctx, dax.TableKey(row.Index), row.Field) if err != nil { return nil, nil, 0, errors.Wrapf(err, "got a row with unknown index/field %s/%s", idx.Name, row.Field) } @@ -2900,7 +2924,7 @@ func (o *orchestrator) howToTranslate(ctx context.Context, idx *featurebase.Inde if rowField != nil { // Handle the case where field has a foreign index. if rowField.Options.ForeignIndex != "" { - fidx, err := o.schema.IndexInfo(ctx, rowField.Options.ForeignIndex) + fidx, err := o.schemaIndexInfo(ctx, dax.StringTableKeyer(rowField.Options.ForeignIndex)) if err != nil { return nil, nil, 0, errors.Errorf("foreign index %s not found for field %s in index %s", rowField.Options.ForeignIndex, rowField.Name, rowIdx.Name) } @@ -2953,7 +2977,7 @@ func (o *orchestrator) collectResultIDs(ctx context.Context, idx *featurebase.In } // preTranslateMatrixSet translates the IDs of a set field in an extracted matrix. -func (o *orchestrator) preTranslateMatrixSet(ctx context.Context, mat featurebase.ExtractedIDMatrix, fieldIdx uint, index, field string) (map[uint64]string, error) { +func (o *orchestrator) preTranslateMatrixSet(ctx context.Context, mat featurebase.ExtractedIDMatrix, fieldIdx uint, tableKeyer dax.TableKeyer, field string) (map[uint64]string, error) { ids := make(map[uint64]struct{}, len(mat.Columns)) for _, col := range mat.Columns { for _, v := range col.Rows[fieldIdx] { @@ -2961,10 +2985,14 @@ func (o *orchestrator) preTranslateMatrixSet(ctx context.Context, mat featurebas } } + index := string(tableKeyer.Key()) + return o.trans.TranslateFieldIDs(ctx, index, field, ids) } -func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.IndexInfo, call *pql.Call, result interface{}, idSet map[uint64]string) (_ interface{}, err error) { +func (o *orchestrator) translateResult(ctx context.Context, qtbl *dax.QualifiedTable, call *pql.Call, result interface{}, idSet map[uint64]string) (_ interface{}, err error) { + idx := featurebase.TableToIndexInfo(&qtbl.Table) + switch result := result.(type) { case *featurebase.Row: rowIdx, rowField, strategy, err := o.howToTranslate(ctx, idx, result) @@ -2981,14 +3009,13 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind } return other, nil case byRowField: - keys, err := o.trans.TranslateFieldListIDs(ctx, rowIdx.Name, rowField.Name, result.Columns()) + keys, err := o.trans.TranslateFieldListIDs(ctx, result.Index, rowField.Name, result.Columns()) if err != nil { return nil, errors.Wrap(err, "translating Row to field keys") } result.Keys = keys case byRowFieldForeignIndex: - idx, err = o.schema.IndexInfo(ctx, rowField.Options.ForeignIndex) - if err != nil { + if _, err := o.schemaIndexInfo(ctx, dax.StringTableKeyer(rowField.Options.ForeignIndex)); err != nil { return nil, errors.Wrapf(err, "foreign index %s not found for field %s in index %s", rowField.Options.ForeignIndex, rowField.Name, rowIdx.Name) } for _, segment := range result.Segments { @@ -3021,7 +3048,7 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind return nil, nil } - field, err := o.schema.FieldInfo(ctx, idx.Name, fieldName) + field, err := o.schemaFieldInfo(ctx, qtbl, fieldName) if err != nil { return nil, nil } @@ -3052,7 +3079,7 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind case featurebase.PairField: if fieldName := callArgString(call, "field"); fieldName != "" { - field, err := o.schema.FieldInfo(ctx, idx.Name, fieldName) + field, err := o.schemaFieldInfo(ctx, qtbl, fieldName) if err != nil { return nil, fmt.Errorf("field %q not found", fieldName) } @@ -3076,7 +3103,7 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind case *featurebase.PairsField: if fieldName := callArgString(call, "_field"); fieldName != "" { - field, err := o.schema.FieldInfo(ctx, idx.Name, fieldName) + field, err := o.schemaFieldInfo(ctx, qtbl, fieldName) if err != nil { return nil, errors.Wrapf(err, "field '%q'", fieldName) } @@ -3106,7 +3133,7 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind groups := result.Groups() for _, gl := range groups { for _, g := range gl.Group { - field, err := o.schema.FieldInfo(ctx, idx.Name, g.Field) + field, err := o.schemaFieldInfo(ctx, qtbl, g.Field) if err != nil { return nil, errors.Wrapf(err, "getting field '%q", g.Field) } @@ -3187,7 +3214,7 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind Field: fieldName, } - if field, err := o.schema.FieldInfo(ctx, idx.Name, fieldName); err != nil { + if field, err := o.schemaFieldInfo(ctx, qtbl, fieldName); err != nil { return nil, errors.Wrapf(err, "'%q'", fieldName) } else if field.Options.Keys { keys, err := o.trans.TranslateFieldListIDs(ctx, idx.Name, field.Name, result) @@ -3207,7 +3234,7 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind fields := make([]featurebase.ExtractedTableField, len(result.Fields)) mappers := make([]fieldMapper, len(result.Fields)) for i, v := range result.Fields { - field, err := o.schema.FieldInfo(ctx, idx.Name, v) + field, err := o.schemaFieldInfo(ctx, qtbl, v) if err != nil { return nil, errors.Wrapf(err, "'%q'", v) } @@ -3237,9 +3264,9 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind case FieldTypeSet, FieldTypeTime: if field.Options.Keys { datatype = "[]string" - translations, err := o.preTranslateMatrixSet(ctx, result, uint(i), idx.Name, field.Name) + translations, err := o.preTranslateMatrixSet(ctx, result, uint(i), qtbl, field.Name) if err != nil { - return nil, errors.Wrapf(err, "translating IDs of field %q", v) + return nil, errors.Wrapf(err, "orch: translating IDs of field %q", v) } mapper = func(ids []uint64) (interface{}, error) { keys := make([]string, len(ids)) @@ -3260,9 +3287,9 @@ func (o *orchestrator) translateResult(ctx context.Context, idx *featurebase.Ind case FieldTypeMutex: if field.Options.Keys { datatype = "string" - translations, err := o.preTranslateMatrixSet(ctx, result, uint(i), idx.Name, field.Name) + translations, err := o.preTranslateMatrixSet(ctx, result, uint(i), qtbl, field.Name) if err != nil { - return nil, errors.Wrapf(err, "translating IDs of field %q", v) + return nil, errors.Wrapf(err, "orch: translating IDs of field %q", v) } mapper = func(ids []uint64) (interface{}, error) { switch len(ids) { @@ -3453,33 +3480,105 @@ func callArgString(call *pql.Call, key string) string { type qualifiedOrchestrator struct { *orchestrator - qual dax.TableQualifier - schemar schemar.Schemar + qual dax.TableQualifier } -func newQualifiedOrchestrator(orch *orchestrator, qual dax.TableQualifier, schemar schemar.Schemar) *qualifiedOrchestrator { +func newQualifiedOrchestrator(orch *orchestrator, qual dax.TableQualifier) *qualifiedOrchestrator { return &qualifiedOrchestrator{ orchestrator: orch, qual: qual, - schemar: schemar, } } -func (o *qualifiedOrchestrator) Execute(ctx context.Context, index string, q *pql.Query, shards []uint64, opt *featurebase.ExecOptions) (featurebase.QueryResponse, error) { +func (o *qualifiedOrchestrator) Execute(ctx context.Context, tableKeyer dax.TableKeyer, q *pql.Query, shards []uint64, opt *featurebase.ExecOptions) (featurebase.QueryResponse, error) { resp := featurebase.QueryResponse{} - tkey, err := o.indexToQualifiedTableKey(ctx, index) - if err != nil { - return resp, errors.Wrap(err, "converting index to qualified table key") + var qtbl *dax.QualifiedTable + + switch keyer := tableKeyer.(type) { + case *dax.Table: + qtbl = dax.NewQualifiedTable(o.qual, keyer) + case *dax.QualifiedTable: + qtbl = keyer + default: + return resp, errors.Errorf("qualifiedOrchestrator.Execute expects a *dax.Table or *dax.QualifiedTable, but got: %T", tableKeyer) } - return o.orchestrator.Execute(ctx, string(tkey), q, shards, opt) + return o.orchestrator.Execute(ctx, qtbl, q, shards, opt) } -func (o *qualifiedOrchestrator) indexToQualifiedTableKey(ctx context.Context, index string) (dax.TableKey, error) { - qtid, err := o.schemar.TableID(ctx, o.qual, dax.TableName(index)) - if err != nil { - return "", errors.Wrap(err, "converting index to qualified table id") +// schemaFieldInfo is a function introduced when we replaced +// `schema.FieldInfo()` calls, where schema was a `featurebase.SchemaInfoAPI` to +// `schema.Table().Field()` calls, where schema is a `pilosa.SchemaAPI`. In the +// future, when we're no longer dealing with IndexInfo and FieldInfo, and +// instead use dax.Table and dax.Field, this helper function can be factored +// out. +func (o *orchestrator) schemaFieldInfo(ctx context.Context, tableKeyer dax.TableKeyer, fieldName string) (*featurebase.FieldInfo, error) { + var tbl *dax.Table + var err error + + switch v := tableKeyer.(type) { + case *dax.QualifiedTable: + tbl = &v.Table + case *dax.Table: + tbl = v + case dax.QualifiedTableID: + tbl, err = o.schema.TableByID(ctx, v.ID) + if err != nil { + return nil, errors.Wrapf(err, "getting table by id: %s", v.ID) + } + case dax.StringTableKeyer: + tbl, err = o.schema.TableByName(ctx, dax.TableName(v)) + if err != nil { + return nil, errors.Wrapf(err, "getting table by name: %s", v) + } + case dax.TableKey: + qtid := v.QualifiedTableID() + tbl, err = o.schema.TableByID(ctx, qtid.ID) + if err != nil { + return nil, errors.Wrapf(err, "getting table by ID from TableKey: %s", v) + } + default: + return nil, errors.Errorf("unsupport table keyer type in schemaFieldInfo: %T", tableKeyer) + } + + fld, ok := tbl.Field(dax.FieldName(fieldName)) + if !ok { + return nil, errors.Errorf("field not found: %s", fieldName) } - return qtid.Key(), nil + + return featurebase.FieldToFieldInfo(fld), nil +} + +// schemaIndexInfo - see comment on schemaFieldInfo. +func (o *orchestrator) schemaIndexInfo(ctx context.Context, tableKeyer dax.TableKeyer) (*featurebase.IndexInfo, error) { + var tbl *dax.Table + var err error + + switch v := tableKeyer.(type) { + case *dax.QualifiedTable: + tbl = &v.Table + case *dax.Table: + tbl = v + case dax.QualifiedTableID: + tbl, err = o.schema.TableByID(ctx, v.ID) + if err != nil { + return nil, errors.Wrapf(err, "getting table by id: %s", v.ID) + } + case dax.TableKey: + qtid := v.QualifiedTableID() + tbl, err = o.schema.TableByID(ctx, qtid.ID) + if err != nil { + return nil, errors.Wrapf(err, "getting table by ID from TableKey: %s", v) + } + case dax.StringTableKeyer: + tbl, err = o.schema.TableByName(ctx, dax.TableName(v)) + if err != nil { + return nil, errors.Wrapf(err, "getting table by name: %s", v) + } + default: + return nil, errors.Errorf("unsupport table keyer type in schemaIndexInfo: %T", tableKeyer) + } + + return featurebase.TableToIndexInfo(tbl), nil } diff --git a/dax/queryer/queryer.go b/dax/queryer/queryer.go index c5eedda61..09b976c5e 100644 --- a/dax/queryer/queryer.go +++ b/dax/queryer/queryer.go @@ -6,6 +6,7 @@ import ( "fmt" "net/http" "strings" + "sync" "time" featurebase "github.com/featurebasedb/featurebase/v3" @@ -30,9 +31,13 @@ import ( // that the externally-facing Molecula API would proxy query requests to a pool // of "Queryer" nodes, which handle incoming query requests. type Queryer struct { - orchestrator *orchestrator + mu sync.RWMutex + orchestrators map[dax.TableQualifier]*qualifiedOrchestrator - mds MDS + fbClient *featurebase.InternalClient + + noder dax.Noder + schemar dax.Schemar logger logger.Logger } @@ -40,9 +45,10 @@ type Queryer struct { // New returns a new instance of Queryer. func New(cfg Config) *Queryer { q := &Queryer{ - mds: NewNopMDS(), - orchestrator: nil, - logger: logger.NopLogger, + noder: dax.NewNopNoder(), + schemar: dax.NewNopSchemar(), + orchestrators: make(map[dax.TableQualifier]*qualifiedOrchestrator), + logger: logger.NopLogger, } if cfg.Logger != nil { @@ -52,8 +58,63 @@ func New(cfg Config) *Queryer { return q } -func (q *Queryer) SetMDS(mds MDS) error { - q.mds = mds +// Orchestrator gets (or creates) an instance of qualifiedOrchestrator based on +// the provided dax.TableQualifier. +func (q *Queryer) Orchestrator(qual dax.TableQualifier) *qualifiedOrchestrator { + // Try to get orchestrator under a read lock first. + if orch := func() *qualifiedOrchestrator { + q.mu.RLock() + defer q.mu.RUnlock() + if orch, ok := q.orchestrators[qual]; ok { + return orch + } + return nil + }(); orch != nil { + return orch + } + + // Since we didn't find an orchestrator under a read lock, obtain a write + // lock and try a read/write. + q.mu.Lock() + defer q.mu.Unlock() + if orch, ok := q.orchestrators[qual]; ok { + return orch + } + + sapi := newQualifiedSchemaAPI(qual, q.schemar) + + orch := &orchestrator{ + schema: sapi, + trans: NewMDSTranslator(q.noder, q.schemar), + topology: &MDSTopology{noder: q.noder}, + // TODO(jaffee) using default http.Client probably bad... need to set some timeouts. + client: q.fbClient, + stats: stats.NopStatsClient, + logger: q.logger, + } + + qorch := newQualifiedOrchestrator(orch, qual) + q.orchestrators[qual] = qorch + + return qorch +} + +func (q *Queryer) SetNoder(noder dax.Noder) error { + q.noder = noder + return nil +} + +func (q *Queryer) SetSchemar(schemar dax.Schemar) error { + q.schemar = schemar + return nil +} + +func (q *Queryer) Start() error { + if q.noder == nil { + return errors.New(errors.ErrUncoded, "queryer requires noder to be configured") + } else if q.schemar == nil { + return errors.New(errors.ErrUncoded, "queryer requires schemar to be configured") + } // fbClient is an instance of internal client. It's used in one place in the // orchestrator (o.client.QueryNode()), but in that case, the host is @@ -67,26 +128,8 @@ func (q *Queryer) SetMDS(mds MDS) error { if err != nil { return errors.Wrap(err, "setting up internal client") } + q.fbClient = fbClient - q.orchestrator = &orchestrator{ - schema: NewSchemaInfoAPI(q.mds), - trans: NewMDSTranslator(q.mds), - topology: &MDSTopology{mds: q.mds}, - // TODO(jaffee) using default http.Client probably bad... need to set some timeouts. - client: fbClient, - stats: stats.NopStatsClient, - logger: q.logger, - } - - return nil -} - -func (q *Queryer) Start() error { - if q.mds == nil { - return errors.New(errors.ErrUncoded, "queryer requires mds to be configured") - } else if q.orchestrator == nil { - return errors.New(errors.ErrUncoded, "queryer requires orchestrator to be configured") - } return nil } @@ -123,13 +166,10 @@ func (q *Queryer) QuerySQL(ctx context.Context, qual dax.TableQualifier, sql str } // SchemaAPI - sapi := NewQualifiedSchemaAPI(qual, q.mds) - - // Orchestrator - orch := newQualifiedOrchestrator(q.orchestrator, qual, q.mds) + sapi := newQualifiedSchemaAPI(qual, q.schemar) // Importer - imp := idkmds.NewImporter(q.mds, qual, nil) + imp := idkmds.NewImporter(q.noder, q.schemar, qual, nil) // TODO(tlt): this obviously doesn't work; we don't have an API here. We // need a dax-compatible implementation of the SystemAPI (or at least a @@ -138,7 +178,7 @@ func (q *Queryer) QuerySQL(ctx context.Context, qual dax.TableQualifier, sql str systemLayer := systemlayer.NewSystemLayer() - pl := planner.NewExecutionPlanner(orch, sapi, sysapi, systemLayer, imp, q.orchestrator.logger, sql) + pl := planner.NewExecutionPlanner(q.Orchestrator(qual), sapi, sysapi, systemLayer, imp, q.logger, sql) planOp, err := pl.CompilePlan(ctx, st) if err != nil { @@ -204,6 +244,28 @@ func (q *Queryer) parseAndQueryPQL(ctx context.Context, qual dax.TableQualifier, return q.QueryPQL(ctx, qual, dax.TableName(table), query) } +// convertIndex tries to covert any "index" specified in the call.Args map to a +// TableKeyer. Note, since the Call.CallIndex() method currently only looks for +// strings, we can't just set the value to a TableKeyer; we have to set it to +// the equivalent string and then parse it back out later. A TODO would be to +// modify Call.CallIndex() to be TableKeyer aware. I didn't do that along with +// these changes because I'm not sure if we want to introduce dax types into the +// pql package. +func (q *Queryer) convertIndex(ctx context.Context, qual dax.TableQualifier, call *featurebase_pql.Call) { + if index := call.CallIndex(); index != "" { + qtbl, err := q.schemar.TableByName(ctx, qual, dax.TableName(index)) + if err != nil { + return + } + call.Args["index"] = string(qtbl.Key()) + } + + // Apply to children. + for _, child := range call.Children { + q.convertIndex(ctx, qual, child) + } +} + func (q *Queryer) QueryPQL(ctx context.Context, qual dax.TableQualifier, table dax.TableName, pql string) (*featurebase.WireQueryResponse, error) { // Parse the pql into a pql.Query containing []pql.Call. qry, err := featurebase_pql.NewParser(strings.NewReader(pql)).Parse() @@ -214,12 +276,15 @@ func (q *Queryer) QueryPQL(ctx context.Context, qual dax.TableQualifier, table d return nil, errors.Errorf("must have exactly 1 query, but got: %+v", qry.Calls) } - tkey, err := q.indexToQualifiedTableKey(ctx, qual, string(table)) + // Replace any "index" arguments within the PQL with a TableKey. + q.convertIndex(ctx, qual, qry.Calls[0]) + + qtbl, err := q.schemar.TableByName(ctx, qual, dax.TableName(table)) if err != nil { - return nil, errors.Wrapf(err, "converting index to qualified table key: %s", table) + return nil, errors.Wrap(err, "converting index to qualified table") } - results, err := q.orchestrator.Execute(ctx, string(tkey), qry, nil, &featurebase.ExecOptions{}) + results, err := q.Orchestrator(qual).Execute(ctx, qtbl, qry, nil, &featurebase.ExecOptions{}) if err != nil { return nil, errors.Wrap(err, "orchestrator.Execute") } @@ -227,10 +292,10 @@ func (q *Queryer) QueryPQL(ctx context.Context, qual dax.TableQualifier, table d return nil, errors.Errorf("expected single result but got %+v", results.Results) } - return PQLResultToQueryResult(results.Results[0]) + return pqlResultToQueryResult(results.Results[0]) } -func PQLResultToQueryResult(pqlResult interface{}) (*featurebase.WireQueryResponse, error) { +func pqlResultToQueryResult(pqlResult interface{}) (*featurebase.WireQueryResponse, error) { toTabler, err := server.ToTablerWrapper(pqlResult) if err != nil { return nil, errors.Wrap(err, "wrapping as type ToTabler") @@ -321,17 +386,3 @@ func rowToSliceInterface(header []*fbproto.ColumnInfo, row *fbproto.Row) []inter } return ret } - -// TODO(tlt): this method was copied from queryer/batchImporter. Can we centralize -// this logic? -func (q *Queryer) indexToQualifiedTableKey(ctx context.Context, qual dax.TableQualifier, index string) (dax.TableKey, error) { - if strings.HasPrefix(index, dax.PrefixTable+dax.TableKeyDelimiter) { - return dax.TableKey(index), nil - } - - qtid, err := q.mds.TableID(ctx, qual, dax.TableName(index)) - if err != nil { - return "", errors.Wrap(err, "converting index to qualified table id") - } - return qtid.Key(), nil -} diff --git a/dax/queryer/schema_api.go b/dax/queryer/schema_api.go index b3f39acd6..19405f832 100644 --- a/dax/queryer/schema_api.go +++ b/dax/queryer/schema_api.go @@ -5,7 +5,6 @@ import ( pilosa "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/mds/schemar" "github.com/featurebasedb/featurebase/v3/errors" ) @@ -13,39 +12,33 @@ import ( var _ pilosa.SchemaAPI = (*qualifiedSchemaAPI)(nil) // qualifiedSchemaAPI is a wrapper around schemaAPI. It is initialized with a -// TableQualifer, and it uses this qualifer to convert between, for example, +// TableQualifier, and it uses this qualifer to convert between, for example, // FeatureBase index name (a string) and TableKey. It requires a Schemar to do // that lookup/conversion. type qualifiedSchemaAPI struct { qual dax.TableQualifier - schemar schemar.Schemar + schemar dax.Schemar } -func NewQualifiedSchemaAPI(qual dax.TableQualifier, schemar schemar.Schemar) *qualifiedSchemaAPI { +func newQualifiedSchemaAPI(qual dax.TableQualifier, schema dax.Schemar) *qualifiedSchemaAPI { return &qualifiedSchemaAPI{ qual: qual, - schemar: schemar, + schemar: schema, } } func (s *qualifiedSchemaAPI) TableByName(ctx context.Context, tname dax.TableName) (*dax.Table, error) { - qtid, err := s.schemar.TableID(ctx, s.qual, tname) + qtbl, err := s.schemar.TableByName(ctx, s.qual, tname) if err != nil { return nil, errors.Wrapf(err, "getting table id: (%s) %s", s.qual, tname) } - - qtbl, err := s.schemar.Table(ctx, qtid) - if err != nil { - return nil, errors.Wrapf(err, "getting table: %s", qtid) - } - return &qtbl.Table, nil } func (s *qualifiedSchemaAPI) TableByID(ctx context.Context, tid dax.TableID) (*dax.Table, error) { qtid := dax.NewQualifiedTableID(s.qual, tid) - qtbl, err := s.schemar.Table(ctx, qtid) + qtbl, err := s.schemar.TableByID(ctx, qtid) if err != nil { return nil, errors.Wrapf(err, "getting table: %s", qtid) } @@ -73,28 +66,28 @@ func (s *qualifiedSchemaAPI) CreateTable(ctx context.Context, tbl *dax.Table) er } func (s *qualifiedSchemaAPI) CreateField(ctx context.Context, tname dax.TableName, fld *dax.Field) error { - qtid, err := s.schemar.TableID(ctx, s.qual, tname) + qtbl, err := s.schemar.TableByName(ctx, s.qual, tname) if err != nil { - return errors.Wrapf(err, "getting table id: (%s) %s", s.qual, tname) + return errors.Wrapf(err, "getting table by name: (%s) %s", s.qual, tname) } - return s.schemar.CreateField(ctx, qtid, fld) + return s.schemar.CreateField(ctx, qtbl.QualifiedID(), fld) } func (s *qualifiedSchemaAPI) DeleteTable(ctx context.Context, tname dax.TableName) error { - qtid, err := s.schemar.TableID(ctx, s.qual, tname) + qtbl, err := s.schemar.TableByName(ctx, s.qual, tname) if err != nil { - return errors.Wrapf(err, "getting table id: (%s) %s", s.qual, tname) + return errors.Wrapf(err, "getting table by name: (%s) %s", s.qual, tname) } - return s.schemar.DropTable(ctx, qtid) + return s.schemar.DropTable(ctx, qtbl.QualifiedID()) } func (s *qualifiedSchemaAPI) DeleteField(ctx context.Context, tname dax.TableName, fname dax.FieldName) error { - qtid, err := s.schemar.TableID(ctx, s.qual, tname) + qtid, err := s.schemar.TableByName(ctx, s.qual, tname) if err != nil { - return errors.Wrapf(err, "getting table id: (%s) %s", s.qual, tname) + return errors.Wrapf(err, "getting table by name: (%s) %s", s.qual, tname) } - return s.schemar.DropField(ctx, qtid, fname) + return s.schemar.DropField(ctx, qtid.Key().QualifiedTableID(), fname) } diff --git a/dax/queryer/schema_info_api.go b/dax/queryer/schema_info_api.go deleted file mode 100644 index d95ef94c4..000000000 --- a/dax/queryer/schema_info_api.go +++ /dev/null @@ -1,79 +0,0 @@ -package queryer - -import ( - "context" - - pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/mds/schemar" - "github.com/featurebasedb/featurebase/v3/errors" -) - -// Ensure type implements interface. -var _ pilosa.SchemaInfoAPI = (*schemaInfoAPI)(nil) - -type schemaInfoAPI struct { - schemar schemar.Schemar -} - -func NewSchemaInfoAPI(schemar schemar.Schemar) *schemaInfoAPI { - return &schemaInfoAPI{ - schemar: schemar, - } -} - -func (a *schemaInfoAPI) IndexInfo(ctx context.Context, indexName string) (*pilosa.IndexInfo, error) { - qtid := dax.TableKey(indexName).QualifiedTableID() - tbl, err := a.schemar.Table(ctx, qtid) - if err != nil { - return nil, errors.Wrap(err, "getting table for indexinfo") - } - - return daxTableToFeaturebaseIndexInfo(tbl, false) -} - -func (a *schemaInfoAPI) FieldInfo(ctx context.Context, indexName, fieldName string) (*pilosa.FieldInfo, error) { - qtid := dax.TableKey(indexName).QualifiedTableID() - tbl, err := a.schemar.Table(ctx, qtid) - fldName := dax.FieldName(fieldName) - - if err != nil { - return nil, errors.Wrap(err, "getting table for fieldinfo") - } - - fld, ok := tbl.Field(dax.FieldName(fieldName)) - if !ok { - return nil, dax.NewErrFieldDoesNotExist(fldName) - } - - return pilosa.FieldToFieldInfo(fld), nil -} - -// TODO(tlt): try to get rid of this in favor of pilosa.TableToIndexInfo. -// daxTableToFeaturebaseIndexInfo converts a dax.Table to a -// featurebase.IndexInfo. If useName is true, the IndexInfo.Name value will -// be set to the qualified table name. Otherwise it will be set to the table key. -func daxTableToFeaturebaseIndexInfo(qtbl *dax.QualifiedTable, useName bool) (*pilosa.IndexInfo, error) { - name := string(qtbl.Key()) - if useName { - name = string(qtbl.Name) - } - ii := &pilosa.IndexInfo{ - Name: name, - CreatedAt: 0, - Options: pilosa.IndexOptions{ - Keys: qtbl.StringKeys(), - TrackExistence: true, - }, - ShardWidth: pilosa.ShardWidth, - } - - // fields - fields := make([]*pilosa.FieldInfo, len(qtbl.Fields)) - for i := range qtbl.Fields { - fields[i] = pilosa.FieldToFieldInfo(qtbl.Fields[i]) - } - ii.Fields = fields - - return ii, nil -} diff --git a/dax/queryer/service/queryer.go b/dax/queryer/service/queryer.go index 56822b116..32ad2227e 100644 --- a/dax/queryer/service/queryer.go +++ b/dax/queryer/service/queryer.go @@ -50,6 +50,8 @@ func (q *queryerService) HTTPHandler() http.Handler { } func (q *queryerService) SetMDS(addr dax.Address) error { - q.queryer.SetMDS(mdsclient.New(addr, q.logger)) + mdscli := mdsclient.New(addr, q.logger) + q.queryer.SetNoder(mdscli) + q.queryer.SetSchemar(mdscli) return nil } diff --git a/dax/queryer/translator.go b/dax/queryer/translator.go index d6680c7a9..90de80b07 100644 --- a/dax/queryer/translator.go +++ b/dax/queryer/translator.go @@ -14,15 +14,17 @@ import ( ) // Ensure type implements interface. -var _ Translator = (*MDSTranslator)(nil) +var _ Translator = (*mdsTranslator)(nil) -type MDSTranslator struct { - mds MDS +type mdsTranslator struct { + noder dax.Noder + schemar dax.Schemar } -func NewMDSTranslator(mds MDS) *MDSTranslator { - return &MDSTranslator{ - mds: mds, +func NewMDSTranslator(noder dax.Noder, schemar dax.Schemar) *mdsTranslator { + return &mdsTranslator{ + noder: noder, + schemar: schemar, } } @@ -37,11 +39,11 @@ func fbClient(address dax.Address) (*featurebase_client.Client, error) { ) } -func (m *MDSTranslator) CreateIndexKeys(ctx context.Context, table string, keys []string) (map[string]uint64, error) { +func (m *mdsTranslator) CreateIndexKeys(ctx context.Context, table string, keys []string) (map[string]uint64, error) { tkey := dax.TableKey(table) qtid := tkey.QualifiedTableID() - qtbl, err := m.mds.Table(ctx, qtid) + qtbl, err := m.schemar.TableByID(ctx, qtid) if err != nil { return nil, errors.Wrap(err, "getting table") } @@ -53,7 +55,7 @@ func (m *MDSTranslator) CreateIndexKeys(ctx context.Context, table string, keys out := make(map[string]uint64) for pNum := range pMap { - address, err := m.mds.IngestPartition(ctx, qtid, pNum) + address, err := m.noder.IngestPartition(ctx, qtid, pNum) if err != nil { return nil, errors.Wrapf(err, "calling ingest-partition on table: %s, partition: %d", table, pNum) } @@ -78,9 +80,9 @@ func (m *MDSTranslator) CreateIndexKeys(ctx context.Context, table string, keys return out, nil } -func (m *MDSTranslator) CreateFieldKeys(ctx context.Context, table string, field string, keys []string) (map[string]uint64, error) { +func (m *mdsTranslator) CreateFieldKeys(ctx context.Context, table string, field string, keys []string) (map[string]uint64, error) { qtid := dax.TableKey(table).QualifiedTableID() - address, err := m.mds.IngestPartition(ctx, qtid, dax.PartitionNum(0)) + address, err := m.noder.IngestPartition(ctx, qtid, dax.PartitionNum(0)) if err != nil { return nil, errors.Wrapf(err, "calling ingest-partition on table: %s, partition: %d", table, dax.PartitionNum(0)) } @@ -96,11 +98,11 @@ func (m *MDSTranslator) CreateFieldKeys(ctx context.Context, table string, field return fbClient.CreateFieldKeys(fld, keys...) } -func (m *MDSTranslator) FindIndexKeys(ctx context.Context, table string, keys []string) (map[string]uint64, error) { +func (m *mdsTranslator) FindIndexKeys(ctx context.Context, table string, keys []string) (map[string]uint64, error) { tkey := dax.TableKey(table) qtid := tkey.QualifiedTableID() - qtbl, err := m.mds.Table(ctx, qtid) + qtbl, err := m.schemar.TableByID(ctx, qtid) if err != nil { return nil, errors.Wrap(err, "getting table") } @@ -115,7 +117,7 @@ func (m *MDSTranslator) FindIndexKeys(ctx context.Context, table string, keys [] pNums = append(pNums, k) } - translateNodes, err := m.mds.TranslateNodes(ctx, qtid, pNums...) + translateNodes, err := m.noder.TranslateNodes(ctx, qtid, pNums...) if err != nil { return nil, errors.Wrapf(err, "getting translate nodes for partitions on table: %s", table) } @@ -149,9 +151,9 @@ func (m *MDSTranslator) FindIndexKeys(ctx context.Context, table string, keys [] return out, nil } -func (m *MDSTranslator) FindFieldKeys(ctx context.Context, table, field string, keys []string) (map[string]uint64, error) { +func (m *mdsTranslator) FindFieldKeys(ctx context.Context, table, field string, keys []string) (map[string]uint64, error) { qtid := dax.TableKey(table).QualifiedTableID() - address, err := m.mds.IngestPartition(ctx, qtid, dax.PartitionNum(0)) + address, err := m.noder.IngestPartition(ctx, qtid, dax.PartitionNum(0)) if err != nil { return nil, errors.Wrapf(err, "calling ingest-partition on table: %s, partition: %d", table, dax.PartitionNum(0)) } @@ -167,7 +169,7 @@ func (m *MDSTranslator) FindFieldKeys(ctx context.Context, table, field string, return fbClient.FindFieldKeys(fld, keys...) } -func (m *MDSTranslator) TranslateIndexIDs(ctx context.Context, index string, ids []uint64) ([]string, error) { +func (m *mdsTranslator) TranslateIndexIDs(ctx context.Context, index string, ids []uint64) ([]string, error) { idsByPartition := splitIDsByPartition(index, ids, 1<<20) // TODO(jaffee), don't hardcode shardwidth...need to get this from index info daxPartitions := make([]dax.PartitionNum, 0) for partition := range idsByPartition { @@ -176,7 +178,7 @@ func (m *MDSTranslator) TranslateIndexIDs(ctx context.Context, index string, ids qtid := dax.TableKey(index).QualifiedTableID() - nodes, err := m.mds.TranslateNodes(ctx, qtid, daxPartitions...) + nodes, err := m.noder.TranslateNodes(ctx, qtid, daxPartitions...) if err != nil { return nil, errors.Wrapf(err, "calling translate-nodes on table: %s, partitions: %v", index, daxPartitions) } @@ -210,7 +212,7 @@ func (m *MDSTranslator) TranslateIndexIDs(ctx context.Context, index string, ids return ret, nil } -func (m *MDSTranslator) TranslateIndexIDSet(ctx context.Context, table string, ids map[uint64]struct{}) (map[uint64]string, error) { +func (m *mdsTranslator) TranslateIndexIDSet(ctx context.Context, table string, ids map[uint64]struct{}) (map[uint64]string, error) { idList := make([]uint64, 0, len(ids)) for id := range ids { idList = append(idList, id) @@ -227,7 +229,7 @@ func (m *MDSTranslator) TranslateIndexIDSet(ctx context.Context, table string, i } return ret, nil } -func (m *MDSTranslator) TranslateFieldIDs(ctx context.Context, table, field string, ids map[uint64]struct{}) (map[uint64]string, error) { +func (m *mdsTranslator) TranslateFieldIDs(ctx context.Context, table, field string, ids map[uint64]struct{}) (map[uint64]string, error) { idList := make([]uint64, 0, len(ids)) for id := range ids { idList = append(idList, id) @@ -244,9 +246,9 @@ func (m *MDSTranslator) TranslateFieldIDs(ctx context.Context, table, field stri } return ret, nil } -func (m *MDSTranslator) TranslateFieldListIDs(ctx context.Context, index, field string, ids []uint64) ([]string, error) { +func (m *mdsTranslator) TranslateFieldListIDs(ctx context.Context, index, field string, ids []uint64) ([]string, error) { qtid := dax.TableKey(index).QualifiedTableID() - address, err := m.mds.IngestPartition(ctx, qtid, dax.PartitionNum(0)) + address, err := m.noder.IngestPartition(ctx, qtid, dax.PartitionNum(0)) if err != nil { return nil, errors.Wrapf(err, "calling ingest-partition on table: %s, partition: %d", index, dax.PartitionNum(0)) } diff --git a/dax/role.go b/dax/role.go index 48655a873..92712ca64 100644 --- a/dax/role.go +++ b/dax/role.go @@ -33,8 +33,8 @@ var _ Role = &TranslateRole{} // ComputeRole is a role specific to compute nodes. type ComputeRole struct { - TableKey TableKey `json:"table-key"` - Shards VersionedShards `json:"shards"` + TableKey TableKey `json:"table-key"` + Shards ShardNums `json:"shards"` } // Type returns the type for ComputeRole. This is mainly to impolement the Role @@ -45,9 +45,9 @@ func (cr *ComputeRole) Type() RoleType { // TranslateRole is a role specific to translate nodes. type TranslateRole struct { - TableKey TableKey `json:"table-key"` - Partitions VersionedPartitions `json:"partitions"` - Fields VersionedFields `json:"fields"` + TableKey TableKey `json:"table-key"` + Partitions PartitionNums `json:"partitions"` + Fields []FieldName `json:"fields"` } // Type returns the type for TransteRole. This is mainly to impolement the Role diff --git a/dax/schema.go b/dax/schema.go new file mode 100644 index 000000000..f7b16abc4 --- /dev/null +++ b/dax/schema.go @@ -0,0 +1,51 @@ +package dax + +import "context" + +// Schemar is similar to the pilosa.SchemaAPI interface, but it takes +// TableQualifiers into account. +type Schemar interface { + TableByName(ctx context.Context, qual TableQualifier, tname TableName) (*QualifiedTable, error) + TableByID(ctx context.Context, qtid QualifiedTableID) (*QualifiedTable, error) + Tables(ctx context.Context, qual TableQualifier, tids ...TableID) ([]*QualifiedTable, error) + + CreateTable(ctx context.Context, qtbl *QualifiedTable) error + CreateField(ctx context.Context, qtid QualifiedTableID, fld *Field) error + + DropTable(ctx context.Context, qtid QualifiedTableID) error + DropField(ctx context.Context, qtid QualifiedTableID, fname FieldName) error +} + +////////////////////////////////////////////// + +// Ensure type implements interface. +var _ Schemar = &NopSchemar{} + +// NopSchemar is a no-op implementation of the Schemar interface. +type NopSchemar struct{} + +func NewNopSchemar() *NopSchemar { + return &NopSchemar{} +} + +func (s *NopSchemar) TableByName(context.Context, TableQualifier, TableName) (*QualifiedTable, error) { + return nil, nil +} +func (s *NopSchemar) TableByID(ctx context.Context, qtid QualifiedTableID) (*QualifiedTable, error) { + return nil, nil +} +func (s *NopSchemar) Tables(ctx context.Context, qual TableQualifier, tids ...TableID) ([]*QualifiedTable, error) { + return nil, nil +} +func (s *NopSchemar) CreateTable(ctx context.Context, qtbl *QualifiedTable) error { + return nil +} +func (s *NopSchemar) DropTable(ctx context.Context, qtid QualifiedTableID) error { + return nil +} +func (s *NopSchemar) CreateField(ctx context.Context, qtid QualifiedTableID, fld *Field) error { + return nil +} +func (s *NopSchemar) DropField(ctx context.Context, qtid QualifiedTableID, fld FieldName) error { + return nil +} diff --git a/dax/server/config.go b/dax/server/config.go index e852675e7..5d9fe80b5 100644 --- a/dax/server/config.go +++ b/dax/server/config.go @@ -85,6 +85,7 @@ func NewConfig() *Config { Config: mds.Config{ RegistrationBatchTimeout: time.Second * 3, StorageMethod: defaultStorageMethod, + SnappingTurtleTimeout: time.Second * 10, }, }, Bind: ":" + defaultBindPort, diff --git a/dax/server/server.go b/dax/server/server.go index f1ded7228..34fe15709 100644 --- a/dax/server/server.go +++ b/dax/server/server.go @@ -163,8 +163,7 @@ func (m *Command) Close() error { return nil default: eg := errgroup.Group{} - //eg.Go(m.Server.Close) - + eg.Go(m.svcmgr.StopAll) err := eg.Wait() //_ = testhook.Closed(pilosa.NewAuditor(), m, nil) close(m.done) @@ -284,6 +283,7 @@ func (m *Command) setupServices() error { RegistrationBatchTimeout: m.Config.MDS.Config.RegistrationBatchTimeout, StorageMethod: m.Config.MDS.Config.StorageMethod, DataDir: m.Config.MDS.Config.DataDir, + SnappingTurtleTimeout: m.Config.MDS.Config.SnappingTurtleTimeout, Logger: m.logger, Director: controllerhttp.NewDirector( controllerhttp.DirectorConfig{ @@ -309,7 +309,7 @@ func (m *Command) setupServices() error { var mdsAddr dax.Address if m.Config.Queryer.Config.MDSAddress != "" { - mdsAddr = dax.Address(m.Config.Queryer.Config.MDSAddress + "/" + dax.ServicePrefixMDS) + mdsAddr = dax.Address(m.Config.Queryer.Config.MDSAddress) } else if m.svcmgr.MDS != nil { mdsAddr = m.svcmgr.MDS.Address() } else { diff --git a/dax/service_manager.go b/dax/service_manager.go index b541cf081..ec24897c9 100644 --- a/dax/service_manager.go +++ b/dax/service_manager.go @@ -82,6 +82,18 @@ func (s *ServiceManager) StartAll() error { return nil } +func (s *ServiceManager) StopAll() error { + for key := range s.computers { + if err := s.ComputerStop(key); err != nil { + s.Logger.Printf("stopping computer %s: %v", key, err) + } + } + if err := s.QueryerStop(); err != nil { + s.Logger.Printf("stopping queryer: %v", err) + } + return s.MDSStop() +} + // MDSStart starts the MDS service. func (s *ServiceManager) MDSStart() error { if s.MDS == nil { diff --git a/dax/snapshot.go b/dax/snapshot.go index dd2d06a9c..8b58c84bf 100644 --- a/dax/snapshot.go +++ b/dax/snapshot.go @@ -3,12 +3,8 @@ package dax type SnapshotShardDataRequest struct { Address Address `json:"address"` - TableKey TableKey `json:"table-key"` - ShardNum ShardNum `json:"shard"` - FromVersion int `json:"from-version"` - ToVersion int `json:"to-version"` - - Directive Directive `json:"directive"` + TableKey TableKey `json:"table-key"` + ShardNum ShardNum `json:"shard"` } type SnapshotTableKeysRequest struct { @@ -16,19 +12,11 @@ type SnapshotTableKeysRequest struct { TableKey TableKey `json:"table-key"` PartitionNum PartitionNum `json:"partition"` - FromVersion int `json:"from-version"` - ToVersion int `json:"to-version"` - - Directive Directive `json:"directive"` } type SnapshotFieldKeysRequest struct { Address Address `json:"address"` - TableKey TableKey `json:"table-key"` - Field FieldName `json:"field"` - FromVersion int `json:"from-version"` - ToVersion int `json:"to-version"` - - Directive Directive `json:"directive"` + TableKey TableKey `json:"table-key"` + Field FieldName `json:"field"` } diff --git a/dax/snapshotter/api/openapi.yaml b/dax/snapshotter/api/openapi.yaml deleted file mode 100644 index bfb5072ed..000000000 --- a/dax/snapshotter/api/openapi.yaml +++ /dev/null @@ -1,101 +0,0 @@ -openapi: 3.0.3 - -info: - title: Snapshotter - description: The alpha implementation of the Snapshotter interface. - version: 0.0.0 - -paths: - /snapshotter/health: - get: - summary: Health check endpoint. - description: Provides an endpoint to check the overall health of the Snapshotter service. - operationId: GetHealth - responses: - 200: - description: Service is healthy. - - - /snapshotter/write-snapshot: - post: - summary: Write snapshot. - description: Write snapshot based on bucket/key. - operationId: PostWriteSnapshot - parameters: - - name: bucket - in: query - description: bucket containing snapshot key - required: true - schema: - type: string - - name: key - in: query - description: key identifying snapshot - required: true - schema: - type: string - - name: version - in: query - description: bucket/key version - required: true - schema: - type: integer - format: int64 - requestBody: - content: - text/plain: - schema: - type: string - format: byte - responses: - 200: - $ref: '#/components/responses/WriteSnapshotResponse' - - /snapshotter/read-snapshot: - get: - summary: Read snapshot. - description: Read snapshot based on bucket/key. - operationId: GetReadSnapshot - parameters: - - name: bucket - in: query - description: bucket containing snapshot key - required: true - schema: - type: string - - name: key - in: query - description: key identifying snapshot - required: true - schema: - type: string - - name: version - in: query - description: bucket/key version - required: true - schema: - type: integer - format: int64 - requestBody: - content: - text/plain: - schema: - type: string - format: byte - responses: - 200: - description: Bytes making up the contents of the snapshot. - content: - text/plain: - schema: - type: string - format: byte - -components: - responses: - WriteSnapshotResponse: - description: Placeholder response. - content: - application/json: - schema: - type: object \ No newline at end of file diff --git a/dax/snapshotter/client/client.go b/dax/snapshotter/client/client.go deleted file mode 100644 index ac8e5a72a..000000000 --- a/dax/snapshotter/client/client.go +++ /dev/null @@ -1,109 +0,0 @@ -// Package client contains an http implementation of the WriteLogger client. -package client - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "net/http" - "net/url" - - "github.com/featurebasedb/featurebase/v3/dax" - snapshotterhttp "github.com/featurebasedb/featurebase/v3/dax/snapshotter/http" - "github.com/featurebasedb/featurebase/v3/errors" -) - -const defaultScheme = "http" - -// Snapshotter is a client for the Snapshotter API methods. -type Snapshotter struct { - address dax.Address -} - -func New(address dax.Address) *Snapshotter { - return &Snapshotter{ - address: address, - } -} - -func (s *Snapshotter) Write(bucket string, key string, version int, rc io.ReadCloser) error { - url := fmt.Sprintf("%s/snapshotter/write-snapshot?bucket=%s&key=%s&version=%d", - s.address.WithScheme(defaultScheme), - url.QueryEscape(bucket), - url.QueryEscape(key), - version, - ) - - // Post the request. - resp, err := http.Post(url, "", rc) - if err != nil { - return errors.Wrap(err, "posting write-snapshot") - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - b, _ := io.ReadAll(resp.Body) - return errors.Errorf("status code: %d: %s", resp.StatusCode, b) - } - - var wsr snapshotterhttp.WriteSnapshotResponse - if err := json.NewDecoder(resp.Body).Decode(&wsr); err != nil { - return errors.Wrap(err, "reading response body") - } - - return nil -} - -// WriteTo is exactly the same as Write, except that it takes an io.WriteTo -// instead of an io.ReadCloser. This needs to be cleaned up so that we're only -// using one or the other. -func (s *Snapshotter) WriteTo(bucket string, key string, version int, wrTo io.WriterTo) error { - url := fmt.Sprintf("%s/snapshotter/write-snapshot?bucket=%s&key=%s&version=%d", - s.address.WithScheme(defaultScheme), - url.QueryEscape(bucket), - url.QueryEscape(key), - version, - ) - - buf := &bytes.Buffer{} - if _, err := wrTo.WriteTo(buf); err != nil { - return errors.Wrap(err, "writing to buffer") - } - - // Post the request. - resp, err := http.Post(url, "", buf) - if err != nil { - return errors.Wrap(err, "posting write-snapshot") - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - b, _ := io.ReadAll(resp.Body) - return errors.Errorf("status code: %d: %s", resp.StatusCode, b) - } - - var wsr snapshotterhttp.WriteSnapshotResponse - if err := json.NewDecoder(resp.Body).Decode(&wsr); err != nil { - return errors.Wrap(err, "reading response body") - } - - return nil -} - -func (s *Snapshotter) Read(bucket string, key string, version int) (io.ReadCloser, error) { - url := fmt.Sprintf("%s/snapshotter/read-snapshot?bucket=%s&key=%s&version=%d", - s.address.WithScheme(defaultScheme), - url.QueryEscape(bucket), - url.QueryEscape(key), - version, - ) - - // Get the request. - resp, err := http.Get(url) - if err != nil { - return nil, errors.Wrap(err, "getting read-snapshot") - } - - return resp.Body, nil -} diff --git a/dax/snapshotter/http/handler.go b/dax/snapshotter/http/handler.go deleted file mode 100644 index b559b2845..000000000 --- a/dax/snapshotter/http/handler.go +++ /dev/null @@ -1,120 +0,0 @@ -package http - -import ( - "encoding/json" - "io" - "net/http" - "strconv" - - "github.com/gorilla/mux" - "github.com/featurebasedb/featurebase/v3/dax/snapshotter" - "github.com/featurebasedb/featurebase/v3/rbf" -) - -func Handler(s *snapshotter.Snapshotter) http.Handler { - svr := &server{ - snapshotter: s, - } - - router := mux.NewRouter() - router.HandleFunc("/health", svr.getHealth).Methods("GET").Name("GetHealth") - router.HandleFunc("/write-snapshot", svr.postWriteSnapshot).Methods("POST").Name("PostWriteSnapshot") - router.HandleFunc("/read-snapshot", svr.getReadSnapshot).Methods("GET").Name("GetReadSnapshot") - return router -} - -type server struct { - snapshotter *snapshotter.Snapshotter -} - -// GET /health -func (s *server) getHealth(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) -} - -// POST /write-snapshot -func (s *server) postWriteSnapshot(w http.ResponseWriter, r *http.Request) { - bucket := r.URL.Query().Get("bucket") - if bucket == "" { - http.Error(w, "bucket required", http.StatusBadRequest) - return - } - - key := r.URL.Query().Get("key") - if key == "" { - http.Error(w, "key required", http.StatusBadRequest) - return - } - - versionArg := r.URL.Query().Get("version") - versionInt64, err := strconv.ParseInt(versionArg, 10, 64) - if err != nil { - http.Error(w, "bad shard", http.StatusBadRequest) - return - } - version := int(versionInt64) - - body := r.Body - defer body.Close() - - if err := s.snapshotter.Write(bucket, key, version, body); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - resp := &WriteSnapshotResponse{} - - if err := json.NewEncoder(w).Encode(resp); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } -} - -type WriteSnapshotResponse struct{} - -// GET /read-snapshot -func (s *server) getReadSnapshot(w http.ResponseWriter, r *http.Request) { - bucket := r.URL.Query().Get("bucket") - if bucket == "" { - http.Error(w, "bucket required", http.StatusBadRequest) - return - } - - key := r.URL.Query().Get("key") - if key == "" { - http.Error(w, "key required", http.StatusBadRequest) - return - } - - versionArg := r.URL.Query().Get("version") - versionInt64, err := strconv.ParseInt(versionArg, 10, 64) - if err != nil { - http.Error(w, "bad shard", http.StatusBadRequest) - return - } - version := int(versionInt64) - - rc, err := s.snapshotter.Read(bucket, key, version) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - defer rc.Close() - - // TODO: is rbf.PageSize a problem here for non-RBF snapshots (i.e. keys)? - // Copy data to response body. - if _, err := io.CopyBuffer(&passthroughWriter{w}, rc, make([]byte, rbf.PageSize)); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } -} - -// passthroughWriter is used to remove non-Writer interfaces from an io.Writer. -// For example, a writer that implements io.ReaderFrom can change io.Copy() behavior. -type passthroughWriter struct { - w io.Writer -} - -func (w *passthroughWriter) Write(p []byte) (int, error) { - return w.w.Write(p) -} diff --git a/dax/snapshotter/snapshotter.go b/dax/snapshotter/snapshotter.go index 7f46fc012..b311b0e12 100644 --- a/dax/snapshotter/snapshotter.go +++ b/dax/snapshotter/snapshotter.go @@ -8,12 +8,18 @@ import ( "io/fs" "os" "path" + "strconv" "sync" + "syscall" + "github.com/featurebasedb/featurebase/v3/dax/computer" "github.com/featurebasedb/featurebase/v3/errors" "github.com/featurebasedb/featurebase/v3/logger" ) +// bucket = table + partition or table + field +// key = "shard/num" or "keys" + type Snapshotter struct { mu sync.RWMutex @@ -50,6 +56,29 @@ func (s *Snapshotter) Write(bucket string, key string, version int, rc io.ReadCl return snapshotFile.Sync() } +func (s *Snapshotter) List(bucket, key string) ([]computer.SnapInfo, error) { + dirpath := path.Join(s.dataDir, bucket, key) + + entries, err := os.ReadDir(dirpath) + if err != nil { + if pe, ok := err.(*os.PathError); ok && pe.Err == syscall.ENOENT { + return nil, nil + } + return nil, errors.Wrap(err, "reading directory") + } + snaps := make([]computer.SnapInfo, len(entries)) + for i, entry := range entries { + version, err := strconv.ParseInt(entry.Name(), 10, 64) + if err != nil { + return nil, errors.Wrapf(err, "filename '%s' could not be parsed to version number", entry.Name()) + } + snaps[i] = computer.SnapInfo{ + Version: int(version), + } + } + return snaps, nil +} + func (s *Snapshotter) Read(bucket string, key string, version int) (io.ReadCloser, error) { _, filePath := s.paths(fullKey(bucket, key, version)) f, err := os.Open(filePath) diff --git a/dax/storage/encoding.go b/dax/storage/encoding.go new file mode 100644 index 000000000..3042da236 --- /dev/null +++ b/dax/storage/encoding.go @@ -0,0 +1,149 @@ +package storage + +import ( + "bufio" + "encoding/json" + "io" + + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/dax/computer" +) + +// TODO this needs to be genericized and moved + +type TableKeyReader struct { + table dax.TableKey + partition dax.PartitionNum + scanner *bufio.Scanner + closer io.Closer +} + +func NewTableKeyReader(qtid dax.QualifiedTableID, partition dax.PartitionNum, writelog io.ReadCloser) *TableKeyReader { + r := &TableKeyReader{ + table: qtid.Key(), + partition: partition, + scanner: bufio.NewScanner(writelog), + closer: writelog, + } + + return r +} + +func (r *TableKeyReader) Read() (computer.PartitionKeyMap, error) { + if r.scanner == nil { + return computer.PartitionKeyMap{}, io.EOF + } + + var b []byte + var out computer.PartitionKeyMap + + if r.scanner.Scan() { + b = r.scanner.Bytes() + if err := json.Unmarshal(b, &out); err != nil { + return out, err + } + return out, nil + } + if err := r.scanner.Err(); err != nil { + return out, err + } + + return out, io.EOF +} + +func (r *TableKeyReader) Close() error { + if r.closer != nil { + return r.closer.Close() + } + return nil +} + +type FieldKeyReader struct { + table dax.TableKey + field dax.FieldName + scanner *bufio.Scanner + closer io.Closer +} + +func NewFieldKeyReader(qtid dax.QualifiedTableID, field dax.FieldName, writelog io.ReadCloser) *FieldKeyReader { + r := &FieldKeyReader{ + table: qtid.Key(), + field: field, + scanner: bufio.NewScanner(writelog), + closer: writelog, + } + + return r +} + +func (r *FieldKeyReader) Read() (computer.FieldKeyMap, error) { + if r.scanner == nil { + return computer.FieldKeyMap{}, io.EOF + } + + var b []byte + var out computer.FieldKeyMap + + if r.scanner.Scan() { + b = r.scanner.Bytes() + if err := json.Unmarshal(b, &out); err != nil { + return out, err + } + return out, nil + } + if err := r.scanner.Err(); err != nil { + return out, err + } + + return out, io.EOF +} + +func (r *FieldKeyReader) Close() error { + if r.closer != nil { + return r.closer.Close() + } + return nil +} + +type ShardReader struct { + table dax.TableKey + partition dax.PartitionNum + shard dax.ShardNum + version int + scanner *bufio.Scanner + closer io.Closer +} + +func NewShardReader(qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum, writelog io.ReadCloser) *ShardReader { + r := &ShardReader{ + table: qtid.Key(), + partition: partition, + shard: shard, + scanner: bufio.NewScanner(writelog), + closer: writelog, + } + + return r +} + +func (r *ShardReader) Read() (computer.LogMessage, error) { + if r.scanner == nil { + return nil, io.EOF + } + + if r.scanner.Scan() { + return computer.UnmarshalLogMessage(r.scanner.Bytes()) + } + if err := r.scanner.Err(); err != nil { + return nil, err + } + + return nil, io.EOF +} + +func (r *ShardReader) Close() error { + if r.closer != nil { + return r.closer.Close() + } + return nil +} diff --git a/dax/storage/storage.go b/dax/storage/storage.go new file mode 100644 index 000000000..96dc17876 --- /dev/null +++ b/dax/storage/storage.go @@ -0,0 +1,430 @@ +package storage + +import ( + "fmt" + "io" + "path" + "strings" + "sync" + + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/dax/computer" + "github.com/featurebasedb/featurebase/v3/errors" + "github.com/featurebasedb/featurebase/v3/logger" +) + +// ResourceManager holds all the various Resources each of which is +// specific to a particular shard, table key partition or field, but +// all of which use the same underlying snapshotter and writelogger. +type ResourceManager struct { + Snapshotter computer.SnapshotService + WriteLogger computer.WriteLogService + Logger logger.Logger + + mu sync.Mutex + shardResources map[shardK]*Resource + tableKeyResources map[tableKeyK]*Resource + fieldKeyResources map[fieldKeyK]*Resource +} + +func NewResourceManager(s computer.SnapshotService, w computer.WriteLogService, l logger.Logger) *ResourceManager { + return &ResourceManager{ + Snapshotter: s, + WriteLogger: w, + Logger: l, + + shardResources: make(map[shardK]*Resource), + tableKeyResources: make(map[tableKeyK]*Resource), + fieldKeyResources: make(map[fieldKeyK]*Resource), + } +} + +// compound map keys + +type shardK struct { + qtid dax.QualifiedTableID + partition dax.PartitionNum + shard dax.ShardNum +} + +type tableKeyK struct { + qtid dax.QualifiedTableID + partition dax.PartitionNum +} + +type fieldKeyK struct { + qtid dax.QualifiedTableID + field dax.FieldName +} + +func (mm *ResourceManager) GetShardResource(qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum) *Resource { + mm.mu.Lock() + defer mm.mu.Unlock() + key := shardK{qtid: qtid, partition: partition, shard: shard} + if m, ok := mm.shardResources[key]; ok { + return m + } + mm.shardResources[key] = (&Resource{ + snapshotter: mm.Snapshotter, + writeLogger: mm.WriteLogger, + bucket: partitionBucket(qtid.Key(), partition), + key: shardKey(shard), + log: mm.Logger, + }).initialize() + + return mm.shardResources[key] +} + +func (mm *ResourceManager) RemoveShardResource(qtid dax.QualifiedTableID, partition dax.PartitionNum, shard dax.ShardNum) { + mm.mu.Lock() + defer mm.mu.Unlock() + key := shardK{qtid: qtid, partition: partition, shard: shard} + if m, ok := mm.shardResources[key]; ok { + err := m.Unlock() + if err != nil { + mm.Logger.Printf("unlocking shard resource during removal: %v", err) + } + delete(mm.shardResources, key) + } +} + +func (mm *ResourceManager) GetTableKeyResource(qtid dax.QualifiedTableID, partition dax.PartitionNum) *Resource { + mm.mu.Lock() + defer mm.mu.Unlock() + key := tableKeyK{qtid: qtid, partition: partition} + if m, ok := mm.tableKeyResources[key]; ok { + return m + } + mm.tableKeyResources[key] = (&Resource{ + snapshotter: mm.Snapshotter, + writeLogger: mm.WriteLogger, + bucket: partitionBucket(qtid.Key(), partition), + key: keysFileName, + log: mm.Logger, + }).initialize() + return mm.tableKeyResources[key] +} + +func (mm *ResourceManager) RemoveTableKeyResource(qtid dax.QualifiedTableID, partition dax.PartitionNum) { + mm.mu.Lock() + defer mm.mu.Unlock() + key := tableKeyK{qtid: qtid, partition: partition} + if m, ok := mm.tableKeyResources[key]; ok { + err := m.Unlock() + if err != nil { + mm.Logger.Printf("unlocking table key resource during removal: %v", err) + } + delete(mm.tableKeyResources, key) + } +} + +func (mm *ResourceManager) GetFieldKeyResource(qtid dax.QualifiedTableID, field dax.FieldName) *Resource { + mm.mu.Lock() + defer mm.mu.Unlock() + key := fieldKeyK{qtid: qtid, field: field} + if m, ok := mm.fieldKeyResources[key]; ok { + return m + } + mm.fieldKeyResources[key] = (&Resource{ + snapshotter: mm.Snapshotter, + writeLogger: mm.WriteLogger, + bucket: fieldBucket(qtid.Key(), field), + key: keysFileName, + log: mm.Logger, + }).initialize() + return mm.fieldKeyResources[key] +} + +func (mm *ResourceManager) RemoveFieldKeyResource(qtid dax.QualifiedTableID, field dax.FieldName) { + mm.mu.Lock() + defer mm.mu.Unlock() + key := fieldKeyK{qtid: qtid, field: field} + if m, ok := mm.fieldKeyResources[key]; ok { + err := m.Unlock() + if err != nil { + mm.Logger.Printf("unlocking field key resource during removal: %v", err) + } + delete(mm.fieldKeyResources, key) + } +} + +// RemoveAll unlocks and deletes all resources held within this +// ResourceManager. +func (mm *ResourceManager) RemoveAll() error { + mm.mu.Lock() + defer mm.mu.Unlock() + + errList := make([]error, 0) + for k, resource := range mm.shardResources { + err := resource.Unlock() + if err != nil && !strings.Contains(err.Error(), "resource was not locked") { + errList = append(errList, err) + } + delete(mm.shardResources, k) + } + for k, resource := range mm.tableKeyResources { + err := resource.Unlock() + if err != nil && !strings.Contains(err.Error(), "resource was not locked") { + errList = append(errList, err) + } + delete(mm.tableKeyResources, k) + } + for k, resource := range mm.fieldKeyResources { + err := resource.Unlock() + if err != nil && !strings.Contains(err.Error(), "resource was not locked") { + errList = append(errList, err) + } + delete(mm.fieldKeyResources, k) + } + if len(errList) > 0 { + return errors.Errorf("%v", errList) + } + return nil +} + +// Resource wraps the snapshotter and writelogger to maintain messy +// state between calls. Resource is *not* threadsafe, care should be +// taken that concurrent calls are not made to Resource methods. The +// exception being that Snapshot and Append are safe to call +// concurrently. +type Resource struct { + snapshotter computer.SnapshotService + writeLogger computer.WriteLogService + bucket string + key string + + log logger.Logger + + loadWLsPastVersion int + latestWLVersion int + lastWLPos int + + locked bool + + dirty bool +} + +func (m *Resource) initialize() *Resource { + m.loadWLsPastVersion = -2 + m.latestWLVersion = -1 + m.lastWLPos = -1 + return m +} + +// IsLocked checks to see if this particular instance of the resource +// believes it holds the lock. It does not look at the state of +// underlying storage to verify the lock. +func (m *Resource) IsLocked() bool { + return m.locked +} + +// LoadLatestSnapshot finds the most recent snapshot for this resource +// and returns a ReadCloser for that snapshot data. If there is no +// snapshot for this resource it returns nil, nil. +func (m *Resource) LoadLatestSnapshot() (data io.ReadCloser, err error) { + snaps, err := m.snapshotter.List(m.bucket, m.key) + if err != nil { + return nil, errors.Wrap(err, "listing snapshots") + } + m.log.Debugf("LoadLatestSnapshot %s/%s: list: %v", m.bucket, m.key, snaps) + m.lastWLPos = 0 + + if len(snaps) == 0 { + m.loadWLsPastVersion = -1 + return nil, nil + } + // assuming snapshots come back in sorted order + latest := snaps[len(snaps)-1] + m.loadWLsPastVersion = latest.Version + + // TODO(jaffee): whatever is using the snapshot may discover that + // it is corrupted/incomplete. We don't want to separately check + // the checksum in here because then we'd have to read the whole + // snapshot twice. Need a way to catch the checksum error and tell + // Resource to mark that version as bad and remove it, then try + // LoadLatestSnapshot again. + return m.snapshotter.Read(m.bucket, m.key, latest.Version) +} + +// // Potential future methods to support getting older versions. SnapInfo would have timestamp information as well. +// +// ListSnapshots() []SnapInfo +// LoadSnapshot(version int) (data io.ReadCloser, err error) + +// LoadWriteLog can be called after LoadLatestSnapshot. It loads any +// writelog data which has been written since the latest +// snapshot. Subsequent calls to LoadWriteLog will only return new +// data that hasn't previously been returned from LoadWriteLog. If +// there is no writelog, it returns nil, nil. +func (m *Resource) LoadWriteLog() (data io.ReadCloser, err error) { + if m.loadWLsPastVersion == -2 { + return nil, errors.New(errors.ErrUncoded, "LoadWriteLog called in inconsistent state, can't tell what version to load from") + } + wLogs, err := m.writeLogger.List(m.bucket, m.key) + if err != nil { + return nil, errors.Wrap(err, "listing write logs") + } + + m.log.Debugf("LoadWriteLog %s/%s: list: %v", m.bucket, m.key, wLogs) + + versions := make([]int, 0, len(wLogs)) + for _, log := range wLogs { + if log.Version > m.loadWLsPastVersion { + versions = append(versions, log.Version) + } + } + + if len(versions) > 1 { + // TODO(jaffee) This can happen if there's a failure writing a + // snapshot. Need to implement a MultiReadCloser or similar + // that wraps all the latest write logs into one ReadCloser. + // It should only wrap the last one in a trackingReader. + return nil, errors.New(dax.ErrUnimplemented, "UNIMPLEMENTED: multiple write log versions ahead of latest snapshot.") + } + + if len(versions) == 0 { + m.log.Debugf("LoadWriteLog: no logs after snapshot: %d on %s", m.loadWLsPastVersion, path.Join(m.bucket, m.key)) + m.latestWLVersion = m.loadWLsPastVersion + 1 + return nil, nil + } + + if m.locked && m.latestWLVersion != versions[0] { + return nil, errors.New(errors.ErrUncoded, "write log version gone since locking") + } + m.latestWLVersion = versions[0] + m.dirty = true + + r, err := m.writeLogger.LogReaderFrom(m.bucket, m.key, versions[0], m.lastWLPos) + if err != nil { + return nil, errors.Wrap(err, "getting writelog") + } + return &trackingReader{ + r: r, + update: func(n int, err error) { + m.lastWLPos += n + }, + }, nil +} + +// Lock acquires an advisory lock for this resource which grants +// us exclusive access to write to it. The normal pattern is to +// call: +// +// 1. LoadLatestSnapshot +// 2. LoadWriteLog +// 3. Lock +// 4. LoadWriteLog +// +// The second call to LoadWriteLog is necessary in case any writes +// occurred between the last load and acquiring the lock. Once the +// lock is acquired it should not be possible for any more writes +// to occur. Lock will error if (a) we fail to acquire the lock or +// (b) the state of the snapshot store for this resource is not +// identical to what is was before the lock was acquired. Case (b) +// means that quite a lot has happened in between LoadWriteLog and +// Lock, and we should probably just die and start over. +func (m *Resource) Lock() error { + m.log.Debugf("Lock %s/%s", m.bucket, m.key) + // lock is sort of arbitrarily on the write log interface + if err := m.writeLogger.Lock(m.bucket, m.key); err != nil { + return errors.Wrap(err, "acquiring lock") + } + m.locked = true + return nil +} + +// Append appends the msg to the write log. It will fail if we +// haven't properly loaded and gotten a lock for the resource +// we're writing to. +func (m *Resource) Append(msg []byte) error { + m.log.Debugf("Append %s/%s", m.bucket, m.key) + if m.latestWLVersion < 0 { + return errors.New(errors.ErrUncoded, "can't call append before loading and locking write log") + } + m.dirty = true + return m.writeLogger.AppendMessage(m.bucket, m.key, m.latestWLVersion, msg) +} + +// IncrementWLVersion should be called during snapshotting with a +// write Tx held on the local resource. This ensures that any writes +// which completed prior to the snapshot are in the prior WL and any +// that complete after the snapshot are in the incremented WL. If +// there have been no writes since the latest snapshot, this returns +// false and does nothing. In this case, Snapshot should *not* be +// called. +func (m *Resource) IncrementWLVersion() (bool, error) { + if !m.dirty { + return false, nil + } + m.log.Debugf("IncrementWLVersion %s/%s", m.bucket, m.key) + m.latestWLVersion++ + m.lastWLPos = -1 + m.loadWLsPastVersion = -1 + m.dirty = false + return true, nil +} + +// Snapshot takes a ReadCloser which has the contents of the resource +// being tracked at a particular point in time and writes them to the +// Snapshot Store. Upon a successful write it will truncate any write +// logs which are now incorporated into the snapshot. Do not call +// until after calling IncrementWLVersion, and only if that method +// returns "true". +func (m *Resource) Snapshot(rc io.ReadCloser) error { + m.log.Debugf("Snapshot %s/%s", m.bucket, m.key) + // latestWLVersion has already been incremented at this point, so + // we write that version minus 1. + err := m.snapshotter.Write(m.bucket, m.key, m.latestWLVersion-1, rc) + if err != nil { + return errors.Wrap(err, "writing snapshot") + } + err = m.writeLogger.DeleteLog(m.bucket, m.key, m.latestWLVersion-1) + return errors.Wrap(err, "deleting old write log") +} + +// SnapshotTo is Snapshot's ugly stepsister supporting the weirdness +// of reading from translate stores who we're hoping to off in the +// next season. +func (m *Resource) SnapshotTo(wt io.WriterTo) error { + m.log.Debugf("SnapshotTo %s/%s", m.bucket, m.key) + err := m.snapshotter.WriteTo(m.bucket, m.key, m.latestWLVersion-1, wt) + if err != nil { + return errors.Wrap(err, "writing snapshot SnapshotTo") + } + err = m.writeLogger.DeleteLog(m.bucket, m.key, m.latestWLVersion-1) + return errors.Wrap(err, "deleting old write log snapshotTo") +} + +// Unlock releases the lock. This should be called if control of +// the underlying resource is being transitioned to another +// node. Ideally it's also called if the process crashes (e.g. via +// a defer), but an implementation based on filesystem locks +// should have those removed by the operating system when the +// process exits anyway. +func (m *Resource) Unlock() error { + m.log.Debugf("Unlock %s/%s", m.bucket, m.key) + if !m.locked { + return errors.New(errors.ErrUncoded, "resource was not locked") + } + if err := m.writeLogger.Unlock(m.bucket, m.key); err != nil { + return errors.Wrap(err, "unlocking") + } + m.locked = false + return nil +} + +const ( + keysFileName = "keys" +) + +func partitionBucket(table dax.TableKey, partition dax.PartitionNum) string { + return path.Join(string(table), "partition", fmt.Sprintf("%d", partition)) +} + +func shardKey(shard dax.ShardNum) string { + return path.Join("shard", fmt.Sprintf("%d", shard)) +} + +func fieldBucket(table dax.TableKey, field dax.FieldName) string { + return path.Join(string(table), "field", string(field)) +} diff --git a/dax/storage/storage_test.go b/dax/storage/storage_test.go new file mode 100644 index 000000000..0b368569a --- /dev/null +++ b/dax/storage/storage_test.go @@ -0,0 +1,162 @@ +package storage + +import ( + "bytes" + "io" + "os" + + "testing" + + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/dax/snapshotter" + "github.com/featurebasedb/featurebase/v3/dax/writelogger" + "github.com/featurebasedb/featurebase/v3/logger" + "github.com/stretchr/testify/assert" +) + +func TestResourceManager(t *testing.T) { + sdd, err := os.MkdirTemp("", "snaptest*") + assert.NoError(t, err) + wdd, err := os.MkdirTemp("", "wltest*") + assert.NoError(t, err) + defer func() { + os.RemoveAll(sdd) + os.RemoveAll(wdd) + }() + + sn := snapshotter.New(snapshotter.Config{ + DataDir: sdd, + }) + wl := writelogger.New(writelogger.Config{ + DataDir: wdd, + }) + + mm := NewResourceManager(sn, wl, logger.NewStandardLogger(os.Stderr)) + + qtid := dax.QualifiedTableID{ + TableQualifier: dax.TableQualifier{ + OrganizationID: dax.OrganizationID("org1"), + DatabaseID: dax.DatabaseID("db1"), + }, + ID: dax.TableID("blah"), + Name: "blah", + } + var n int + var d, wld io.ReadCloser + + // get a resource and perform normal startup routine on empty data + resource := mm.GetShardResource(qtid, dax.PartitionNum(1), dax.ShardNum(1)) + + d, err = resource.LoadLatestSnapshot() + assert.NoError(t, err) + assert.Nil(t, d) + + wld, err = resource.LoadWriteLog() + assert.NoError(t, err) + assert.Nil(t, wld) + + err = resource.Lock() + assert.NoError(t, err) + + wld, err = resource.LoadWriteLog() + assert.NoError(t, err) + assert.Nil(t, wld) + + // append some data + err = resource.Append([]byte("blahblah")) + assert.NoError(t, err) + + // a new ResourceManager is necessary so we get a new Resource with + // new internal state instead of a cached Resource. + mm2 := NewResourceManager(sn, wl, logger.NewStandardLogger(os.Stderr)) + // get second resource for same stuff + resource2 := mm2.GetShardResource(qtid, dax.PartitionNum(1), dax.ShardNum(1)) + // load snapshot on 2nd resource (empty) + d, err = resource2.LoadLatestSnapshot() + assert.NoError(t, err) + assert.Nil(t, d) + + // load WL on 2nd resource (blahblah) + wld, err = resource2.LoadWriteLog() + assert.NoError(t, err) + buf := make([]byte, 16) + n, _ = wld.Read(buf) + assert.Equal(t, 9, n) + assert.Equal(t, "blahblah\n", string(buf[:9])) + n, err = wld.Read(buf) + assert.Equal(t, 0, n) + assert.Equal(t, io.EOF, err) + + // begin snapshot procedure on 1st resource + ok, err := resource.IncrementWLVersion() + assert.Equal(t, true, ok) + assert.NoError(t, err) + + // do append on 1st resource mid-snapshot + err = resource.Append([]byte("blahbla2")) + assert.NoError(t, err) + + // snapshot 1st resource + rc := io.NopCloser(bytes.NewBufferString("hahaha")) + err = resource.Snapshot(rc) + assert.NoError(t, err) + + // append again on 1st resource + err = resource.Append([]byte("blahbla3")) + assert.NoError(t, err) + + // locking 2nd resource should fail + err = resource2.Lock() + assert.NotNil(t, err) + + // exit 1st resource + err = resource.Unlock() + assert.NoError(t, err) + + // locking 2nd resource should succeed + err = resource2.Lock() + assert.NoError(t, err) + + // loading write log should fail since there's been a snapshot + // between the last load and locking. + _, err = resource2.LoadWriteLog() + assert.NotNil(t, err) + + // resource2 dies due to error loading write lock + err = resource2.Unlock() + assert.NoError(t, err) + + // get third resource for same stuff + mm3 := NewResourceManager(sn, wl, logger.NewStandardLogger(os.Stderr)) + resource3 := mm3.GetShardResource(qtid, dax.PartitionNum(1), dax.ShardNum(1)) + // load snapshot on 3nd resource + d, err = resource3.LoadLatestSnapshot() + assert.NoError(t, err) + buf = make([]byte, 6) + n, err = d.Read(buf) + assert.Equal(t, 6, n) + assert.Equal(t, "hahaha", string(buf)) + assert.Equal(t, nil, err) + + // load write log on 3rd resource, get previous 2 writes + wld, err = resource3.LoadWriteLog() + assert.NoError(t, err) + buf = make([]byte, 20) + n, _ = wld.Read(buf) + assert.Equal(t, 18, n) + assert.Equal(t, "blahbla2\nblahbla3\n", string(buf[:18])) + n, err = wld.Read(buf) + assert.Equal(t, 0, n) + assert.Equal(t, io.EOF, err) + + // lock 3rd resource + err = resource3.Lock() + assert.NoError(t, err) + + // reload write log (should be empty) + wld, err = resource3.LoadWriteLog() + assert.NoError(t, err) + n, err = wld.Read(make([]byte, 8)) + assert.Equal(t, 0, n) + assert.Equal(t, io.EOF, err) +} diff --git a/dax/storage/util.go b/dax/storage/util.go new file mode 100644 index 000000000..85c5e7adf --- /dev/null +++ b/dax/storage/util.go @@ -0,0 +1,24 @@ +package storage + +import "io" + +// trackingReader wraps a Reader and calls an custom "update" function +// whenever Read is called. Used by the storage layer to keep track of +// how much of the writelog has been read. +type trackingReader struct { + r io.Reader + update func(int, error) +} + +func (tr *trackingReader) Read(p []byte) (n int, err error) { + n, err = tr.r.Read(p) + tr.update(n, err) + return n, err +} + +func (tr *trackingReader) Close() error { + if closer, ok := tr.r.(io.Closer); ok { + return closer.Close() + } + return nil +} diff --git a/dax/table.go b/dax/table.go index b75a62b29..eb39df22a 100644 --- a/dax/table.go +++ b/dax/table.go @@ -27,7 +27,7 @@ import ( // Table - base Table struct; includes a TableID and a TableName // TableQualifier - combination of OrganizationID and DatabaseID // QualifiedTable - TableQualifier plus a Table -// QualifiedTableID - TableQualifer plus a TableID +// QualifiedTableID - TableQualifier plus a TableID // TableKey - a string representation of OrganizationID, DatabaseID, and // TableID, which is safe to use as a FeatureBase index name. // @@ -44,7 +44,7 @@ import ( // //////////////////////////////////////////////////////////////////////////////// -// TableKeyDelimiter is used to delimit the qualifer elements in the TableKey. +// TableKeyDelimiter is used to delimit the qualifier elements in the TableKey. // While it might make more sense to use a pipe ("|") here, we instead use a // double underscore because underscore is one of the few characters allowed by // the FeatureBase index name restrictions, and we double it in a lame attempt @@ -103,6 +103,23 @@ type OrganizationID string // value could be any string. type DatabaseID string +// TableKeyer is an interface implemented by any type which can produce, and be +// represented by, a TableKey. In the case of a QualifiedTable, its TableKey +// might be something like `tbl__org__db__tableid`, while a general pilosa +// implemenation might represent a table as a basic table name `foo`. +type TableKeyer interface { + Key() TableKey +} + +// StringTableKeyer is a helper type which can wrap a string, making it a +// TableKeyer. This is useful for certain calls to Execute() which take a string +// index name. +type StringTableKeyer string + +func (s StringTableKeyer) Key() TableKey { + return TableKey(s) +} + // TableKey is a globally unique identifier for a table; it is effectively the // compound key: (org, database, table). This is (hopefully) the value that will // be used when interfacing with services which are unaware of table qualifiers. @@ -112,6 +129,8 @@ type DatabaseID string // TableKey as the value for index.Name. type TableKey string +func (t TableKey) Key() TableKey { return t } + // QualifiedTableID returns the QualifiedTableID based on the key. If TableKey // can't be parsed into a valid (i.e. complete) QualifiedTableID, then blank // values are used where necessary. @@ -164,7 +183,14 @@ type Table struct { PartitionN int `json:"partitionN"` Description string `json:"description,omitempty"` + Owner string `json:"owner,omitempty"` CreatedAt int64 `json:"createdAt,omitempty"` + UpdatedAt int64 `json:"updatedAt,omitempty"` + UpdatedBy string `json:"updatedBy,omitempty"` +} + +func (t *Table) Key() TableKey { + return TableKey(t.ID) } // CreateID generates a unique identifier for Table. If Table has already been @@ -198,7 +224,7 @@ func (t *Table) CreateID() (TableID, error) { } // NewTable returns a new instance of table with a pseudo-random ID which is -// assumed to be unique within the scope of a TableQualifer. +// assumed to be unique within the scope of a TableQualifier. func NewTable(name TableName) *Table { return &Table{ Name: name, @@ -289,7 +315,7 @@ func (o Tables) Len() int { return len(o) } func (o Tables) Less(i, j int) bool { return o[i].Name < o[j].Name } func (o Tables) Swap(i, j int) { o[i], o[j] = o[j], o[i] } -// TableQualifierKey is the unique TableQualifer values encoded as a string. The +// TableQualifierKey is the unique TableQualifier values encoded as a string. The // current encoding is delimited as `prefix|OrganizationID|DatabaseID` (where // the pipe may be some other delimiter) by the TableQualifier.Key() method. type TableQualifierKey string @@ -339,7 +365,7 @@ type TableQualifier struct { DatabaseID DatabaseID `json:"db-id"` } -// NewTableQualifier is a helper function used to create a TableQualifer from +// NewTableQualifier is a helper function used to create a TableQualifier from // the provided arguments. func NewTableQualifier(orgID OrganizationID, dbID DatabaseID) TableQualifier { return TableQualifier{ @@ -433,7 +459,7 @@ func (qtid QualifiedTableID) Key() TableKey { } // Equals returns true if `other` is the same as qtid. Note: the `Name` value is -// ignored in this comparison; only `TableQaulifer` and `ID` are considered. +// ignored in this comparison; only `TableQualifier` and `ID` are considered. func (qtid QualifiedTableID) Equals(other QualifiedTableID) bool { if qtid.TableQualifier == other.TableQualifier && qtid.ID == other.ID { return true @@ -470,7 +496,7 @@ func (qt QualifiedTable) String() string { return fmt.Sprintf("%s (%s)", qt.QualifiedID(), qt.Name) } -// Qualifier returns the TableQualifer portion of the QualifiedTable. +// Qualifier returns the TableQualifier portion of the QualifiedTable. func (qt *QualifiedTable) Qualifier() TableQualifier { return qt.TableQualifier } diff --git a/dax/test/dax/dax_test.go b/dax/test/dax/dax_test.go index 9b5c4d611..95ff794c6 100644 --- a/dax/test/dax/dax_test.go +++ b/dax/test/dax/dax_test.go @@ -107,11 +107,12 @@ func TestDAXIntegration(t *testing.T) { // skips is a list of tests which are currently not passing in dax. We // need to get these passing before alpha. skips := []string{ - "testinsert/test-5", // error messages differ - "percentile_test/test-6", // related to TODO in orchestrator.executePercentile - "innerjointest/innerjoin-aggregate-groupby", // join test which won't work until we support multiple tables - "alterTable/alterTableBadTable", // looks like table does not exist is a different error in DAX - "top-tests/test-1", // don't know why this is failing at all + "testinsert/test-5", // error messages differ + "percentile_test/test-6", // related to TODO in orchestrator.executePercentile + "alterTable/alterTableBadTable", // looks like table does not exist is a different error in DAX + "top-tests/test-1", // don't know why this is failing at all + "delete_tests", + "subquerytable", // subqueries seem to be a problem } doSkip := func(name string) bool { @@ -135,12 +136,18 @@ func TestDAXIntegration(t *testing.T) { PQLTests: make([]defs.PQLTest, 0), } for j, sqltest := range test.SQLTests { + if doSkip(test.Name(i)) { + continue + } if doSkip(test.Name(i) + "/" + sqltest.Name(j)) { continue } tt.SQLTests = append(tt.SQLTests, sqltest) } for j, pqltest := range test.PQLTests { + if doSkip(test.Name(i)) { + continue + } if doSkip(test.Name(i) + "/" + pqltest.Name(j)) { continue } diff --git a/dax/versioned_field.go b/dax/versioned_field.go deleted file mode 100644 index 3edfb2608..000000000 --- a/dax/versioned_field.go +++ /dev/null @@ -1,30 +0,0 @@ -package dax - -import "fmt" - -// VersionedField is used in a similar way to VersionedShard and -// VersionedPartition in that they all contain a snapshot version. -type VersionedField struct { - Name FieldName `json:"name"` - Version int `json:"version"` -} - -// String returns the VersionedField (i.e. its Name and Version) as a string. -func (f VersionedField) String() string { - return fmt.Sprintf("%s.%d", f.Name, f.Version) -} - -// NewVersionedField returns a VersionedField with the provided name and version. -func NewVersionedField(name FieldName, version int) VersionedField { - return VersionedField{ - Name: name, - Version: version, - } -} - -// VersionedFields is a sortable slice of VersionedField. -type VersionedFields []VersionedField - -func (f VersionedFields) Len() int { return len(f) } -func (f VersionedFields) Less(i, j int) bool { return f[i].Name < f[j].Name } -func (f VersionedFields) Swap(i, j int) { f[i], f[j] = f[j], f[i] } diff --git a/dax/versioned_partition.go b/dax/versioned_partition.go index 2bc645c8e..32b738893 100644 --- a/dax/versioned_partition.go +++ b/dax/versioned_partition.go @@ -17,55 +17,10 @@ func (p PartitionNum) String() string { return fmt.Sprintf("%d", p) } -// VersionedPartition is a partition number along with the snapshot version -// which it is currently writing at. -type VersionedPartition struct { - Num PartitionNum `json:"num"` - Version int `json:"version"` -} - -// NewVersionedPartition returns a VersionedPartition with the provided -// partition number and version. -func NewVersionedPartition(num PartitionNum, version int) VersionedPartition { - return VersionedPartition{ - Num: num, - Version: version, - } -} - -// String returns the VersionedPartition (i.e. its Num and Version) as a string. -func (p VersionedPartition) String() string { - return fmt.Sprintf("%d.%d", p.Num, p.Version) -} - -// VersionedPartitions is a sortable slice of VersionedPartition. -type VersionedPartitions []VersionedPartition - -func (p VersionedPartitions) Len() int { return len(p) } -func (p VersionedPartitions) Less(i, j int) bool { return p[i].Num < p[j].Num } -func (p VersionedPartitions) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -// NewVersionedPartitions returns the provided list of partition nums as a list -// of VersionedPartition with an invalid version (-1). This is to use for cases -// where the request should not be aware of a partition versioning. -func NewVersionedPartitions(partitionNums ...PartitionNum) VersionedPartitions { - pvs := make(VersionedPartitions, len(partitionNums)) - - for i := range partitionNums { - pvs[i] = VersionedPartition{ - Num: partitionNums[i], - Version: -1, - } - } - - return pvs -} - -// Nums returns a slice of all the partition numbers in VersionedPartitions. -func (p VersionedPartitions) Nums() []PartitionNum { - pp := make([]PartitionNum, len(p)) - for i := range p { - pp[i] = p[i].Num +func NewPartitionNums(nums ...uint64) PartitionNums { + partitions := make(PartitionNums, len(nums)) + for i, n := range nums { + partitions[i] = PartitionNum(n) } - return pp + return partitions } diff --git a/dax/versioned_shard.go b/dax/versioned_shard.go index 68d568179..865b55022 100644 --- a/dax/versioned_shard.go +++ b/dax/versioned_shard.go @@ -12,54 +12,14 @@ func (s ShardNum) String() string { return fmt.Sprintf("%d", s) } -// VersionedShard is a shard number along with the snapshot version which it is -// currently writing at. -type VersionedShard struct { - Num ShardNum `json:"num"` - Version int `json:"version"` -} - -// NewVersionedShard returns a VersionedShard with the provided shard number and version. -func NewVersionedShard(num ShardNum, version int) VersionedShard { - return VersionedShard{ - Num: num, - Version: version, - } -} - -// String returns the VersionedShard (i.e. its Num and Version) as a string. -func (s VersionedShard) String() string { - return fmt.Sprintf("%d.%d", s.Num, s.Version) -} - -// VersionedShards is a sortable slice of VersionedShard. -type VersionedShards []VersionedShard - -func (s VersionedShards) Len() int { return len(s) } -func (s VersionedShards) Less(i, j int) bool { return s[i].Num < s[j].Num } -func (s VersionedShards) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -// NewVersionedShards returns the provided list of shard nums as a list of -// VersionedShard with an invalid version (-1). This is to use for cases where -// the request should not be aware of shard versioning. -func NewVersionedShards(shardNums ...ShardNum) VersionedShards { - svs := make(VersionedShards, len(shardNums)) - - for i := range shardNums { - svs[i] = VersionedShard{ - Num: shardNums[i], - Version: -1, - } - } - - return svs -} - -// Nums returns a slice of all the shard numbers in VersionedShards. -func (s VersionedShards) Nums() []ShardNum { - ss := make([]ShardNum, len(s)) - for i := range s { - ss[i] = s[i].Num +func (s ShardNums) Len() int { return len(s) } +func (s ShardNums) Less(i, j int) bool { return s[i] < s[j] } +func (s ShardNums) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func NewShardNums(nums ...uint64) ShardNums { + shards := make(ShardNums, len(nums)) + for i, n := range nums { + shards[i] = ShardNum(n) } - return ss + return shards } diff --git a/dax/versionstore.go b/dax/versionstore.go deleted file mode 100644 index 1d1dc051f..000000000 --- a/dax/versionstore.go +++ /dev/null @@ -1,110 +0,0 @@ -package dax - -import ( - "context" -) - -// VersionStore is an interface for tracking Shard, Partition, and Field[Key] -// versions. For example, when the contents of a shard are checkpointed, and a -// snapshot is generated, and the write log messages for that shard are -// truncated, the ShardVersion for that shard is incremented. The VersionStore -// is the interface through which various services read/write that version. -type VersionStore interface { - AddTable(ctx context.Context, qtid QualifiedTableID) error - RemoveTable(ctx context.Context, qtid QualifiedTableID) (VersionedShards, VersionedPartitions, error) - - // Shards (shardData) - AddShards(ctx context.Context, qtid QualifiedTableID, shards ...VersionedShard) error - Shards(ctx context.Context, qtid QualifiedTableID) (VersionedShards, bool, error) - ShardVersion(ctx context.Context, qtid QualifiedTableID, shardNum ShardNum) (int, bool, error) - ShardTables(ctx context.Context, qual TableQualifier) (TableIDs, error) - - // Partitions (tableKeys) - AddPartitions(ctx context.Context, qtid QualifiedTableID, partitions ...VersionedPartition) error - Partitions(ctx context.Context, qtid QualifiedTableID) (VersionedPartitions, bool, error) - PartitionVersion(ctx context.Context, qtid QualifiedTableID, partitionNum PartitionNum) (int, bool, error) - PartitionTables(ctx context.Context, qual TableQualifier) (TableIDs, error) - - // Fields (fieldKeys) - AddFields(ctx context.Context, qtid QualifiedTableID, fields ...VersionedField) error - Fields(ctx context.Context, qtid QualifiedTableID) (VersionedFields, bool, error) - FieldVersion(ctx context.Context, qtid QualifiedTableID, field FieldName) (int, bool, error) - FieldTables(ctx context.Context, qual TableQualifier) (TableIDs, error) - - Copy(ctx context.Context) (VersionStore, error) -} - -type DirectiveVersion interface { - Increment(ctx context.Context, delta uint64) (uint64, error) -} - -// Ensure type implements interface. -var _ VersionStore = (*nopVersionStore)(nil) - -// nopVersionStore is a no-op implementation of the VersionStore interface. -type nopVersionStore struct{} - -// NewNopVersionStore returns a new no-op instance of VersionStore. -func NewNopVersionStore() *nopVersionStore { - return &nopVersionStore{} -} - -func (s *nopVersionStore) AddTable(ctx context.Context, qtid QualifiedTableID) error { - return nil -} - -func (s *nopVersionStore) RemoveTable(ctx context.Context, qtid QualifiedTableID) (VersionedShards, VersionedPartitions, error) { - return nil, nil, nil -} - -func (s *nopVersionStore) AddShards(ctx context.Context, qtid QualifiedTableID, shards ...VersionedShard) error { - return nil -} - -func (s *nopVersionStore) Shards(ctx context.Context, qtid QualifiedTableID) (VersionedShards, bool, error) { - return nil, false, nil -} - -func (s *nopVersionStore) ShardVersion(ctx context.Context, qtid QualifiedTableID, shardNum ShardNum) (int, bool, error) { - return 0, true, nil -} - -func (s *nopVersionStore) ShardTables(ctx context.Context, qual TableQualifier) (TableIDs, error) { - return TableIDs{}, nil -} - -func (s *nopVersionStore) AddPartitions(ctx context.Context, qtid QualifiedTableID, partitions ...VersionedPartition) error { - return nil -} - -func (s *nopVersionStore) Partitions(ctx context.Context, qtid QualifiedTableID) (VersionedPartitions, bool, error) { - return nil, false, nil -} - -func (s *nopVersionStore) PartitionVersion(ctx context.Context, qtid QualifiedTableID, partitionNum PartitionNum) (int, bool, error) { - return 0, true, nil -} - -func (s *nopVersionStore) PartitionTables(ctx context.Context, qual TableQualifier) (TableIDs, error) { - return TableIDs{}, nil -} - -func (s *nopVersionStore) AddFields(ctx context.Context, qtid QualifiedTableID, fields ...VersionedField) error { - return nil -} - -func (s *nopVersionStore) Fields(ctx context.Context, qtid QualifiedTableID) (VersionedFields, bool, error) { - return nil, false, nil -} - -func (s *nopVersionStore) FieldVersion(ctx context.Context, qtid QualifiedTableID, field FieldName) (int, bool, error) { - return 0, true, nil -} - -func (s *nopVersionStore) FieldTables(ctx context.Context, qual TableQualifier) (TableIDs, error) { - return TableIDs{}, nil -} - -func (s *nopVersionStore) Copy(ctx context.Context) (VersionStore, error) { - return nil, nil -} diff --git a/dax/workerjob.go b/dax/workerjob.go index 490c9702e..a249bd82b 100644 --- a/dax/workerjob.go +++ b/dax/workerjob.go @@ -2,8 +2,7 @@ package dax import ( "sort" - - "golang.org/x/exp/constraints" + "strings" ) // Worker is a generic identifier used to represent a service responsible for @@ -27,7 +26,7 @@ type Job string // Jobs is a slice of Job. type Jobs []Job -// WorkerInfo reprents a Worker and the Jobs to which it has been assigned. +// WorkerInfo represents a Worker and the Jobs to which it has been assigned. type WorkerInfo struct { ID Worker Jobs []Job @@ -78,10 +77,10 @@ func (w WorkerDiffs) Len() int { return len(w) } func (w WorkerDiffs) Less(i, j int) bool { return w[i].WorkerID < w[j].WorkerID } func (w WorkerDiffs) Swap(i, j int) { w[i], w[j] = w[j], w[i] } -// Set is a set of orderable items. -type Set[K constraints.Ordered] map[K]struct{} +// Set is a set of stringy items. +type Set[K ~string] map[K]struct{} -func NewSet[K constraints.Ordered](stuff ...K) Set[K] { +func NewSet[K ~string](stuff ...K) Set[K] { s := make(map[K]struct{}) for _, thing := range stuff { s[thing] = struct{}{} @@ -110,6 +109,17 @@ func (s Set[K]) Remove(k K) { delete(s, k) } +func (s Set[K]) RemovePrefix(prefix string) []K { + ret := make([]K, 0) + for k := range s { + if strings.HasPrefix(string(k), prefix) { + ret = append(ret, k) + delete(s, k) + } + } + return ret +} + // Slice returns a slice containing each member of the set in an undefined order. func (s Set[K]) Slice() []K { ret := make([]K, 0, len(s)) diff --git a/dax/writelogger/api/openapi.yaml b/dax/writelogger/api/openapi.yaml deleted file mode 100644 index d82fe6820..000000000 --- a/dax/writelogger/api/openapi.yaml +++ /dev/null @@ -1,113 +0,0 @@ -openapi: 3.0.3 - -info: - title: WriteLogger - description: The alpha implementation of the WriteLogger interface. - version: 0.0.0 - -paths: - /writelogger/health: - get: - summary: Health check endpoint. - description: Provides an endpoint to check the overall health of the WriteLogger service. - operationId: GetHealth - responses: - 200: - description: Service is healthy. - - - /writelogger/append-message: - post: - summary: Append message to WriteLogger. - description: Appends a message to a versioned bucket/key. - operationId: PostAppendMessage - requestBody: - content: - application/json: - example: - bucket: example-bucket - key: unique-key - version: 4 - message: SGVsbG8gV29ybGQ= - schema: - type: object - properties: - bucket: - type: string - key: - type: string - version: - type: integer - format: int64 - message: - type: string - format: byte - responses: - 200: - $ref: '#/components/responses/AppendMessageResponse' - - /writelogger/log-reader: - post: - summary: Read log. - description: Reads an entire log (collection of messages) at bucket/key for the given version. - operationId: PostLogReader - requestBody: - content: - application/json: - example: - bucket: example-bucket - key: unique-key - version: 4 - schema: - type: object - properties: - bucket: - type: string - key: - type: string - version: - type: integer - format: int64 - responses: - 200: - description: Bytes making up the contents of the log. - content: - text/plain: - schema: - type: string - format: byte - - /writelogger/delete-log: - post: - summary: Delete log. - description: Deletes the log at bucket/key for the given version. - operationId: PostDeleteLog - requestBody: - content: - application/json: - example: - bucket: example-bucket - key: unique-key - version: 4 - schema: - type: object - properties: - bucket: - type: string - key: - type: string - version: - type: integer - format: int64 - responses: - 200: - description: Log was deleted. - -components: - responses: - AppendMessageResponse: - description: Placeholder response. - content: - application/json: - schema: - type: object \ No newline at end of file diff --git a/dax/writelogger/client/client.go b/dax/writelogger/client/client.go deleted file mode 100644 index 73ac31338..000000000 --- a/dax/writelogger/client/client.go +++ /dev/null @@ -1,145 +0,0 @@ -// Package client contains an http implementation of the WriteLogger client. -package client - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "net/http" - - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/errors" -) - -const defaultScheme = "http" - -// WriteLogger is a client for the WriteLogger API methods. -type WriteLogger struct { - address dax.Address -} - -func New(address dax.Address) *WriteLogger { - return &WriteLogger{ - address: address, - } -} - -func (w *WriteLogger) AppendMessage(bucket string, key string, version int, msg []byte) error { - url := fmt.Sprintf("%s/writelogger/append-message", w.address.WithScheme(defaultScheme)) - - req := &AppendMessageRequest{ - Bucket: bucket, - Key: key, - Version: version, - Message: msg, - } - - // Encode the request. - postBody, err := json.Marshal(req) - if err != nil { - return errors.Wrap(err, "marshalling post request") - } - requestBody := bytes.NewBuffer(postBody) - - // Post the request. - resp, err := http.Post(url, "application/json", requestBody) - if err != nil { - return errors.Wrap(err, "posting append-message request") - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - b, _ := io.ReadAll(resp.Body) - return errors.Errorf("status code: %d: %s", resp.StatusCode, b) - } - - var isr *AppendMessageResponse - if err := json.NewDecoder(resp.Body).Decode(&isr); err != nil { - return errors.Wrap(err, "reading response body") - } - - return nil -} - -type AppendMessageRequest struct { - Bucket string `json:"bucket"` - Key string `json:"key"` - Version int `json:"version"` - Message []byte `json:"message"` -} -type AppendMessageResponse struct{} - -func (w *WriteLogger) LogReader(bucket string, key string, version int) (io.Reader, io.Closer, error) { - url := fmt.Sprintf("%s/writelogger/log-reader", w.address.WithScheme(defaultScheme)) - - req := &LogReaderRequest{ - Bucket: bucket, - Version: version, - Key: key, - } - - // Encode the request. - postBody, err := json.Marshal(req) - if err != nil { - return nil, nil, errors.Wrap(err, "marshalling post request") - } - requestBody := bytes.NewBuffer(postBody) - - // Post the request. - resp, err := http.Post(url, "application/json", requestBody) - if err != nil { - return nil, nil, errors.Wrap(err, "posting log-reader request") - } - - if resp.StatusCode != http.StatusOK { - b, _ := io.ReadAll(resp.Body) - defer resp.Body.Close() - return nil, nil, errors.Errorf("status code: %d: %s", resp.StatusCode, b) - } - - return resp.Body, resp.Body, nil -} - -type LogReaderRequest struct { - Bucket string `json:"bucket"` - Version int `json:"version"` - Key string `json:"key"` -} - -func (w *WriteLogger) DeleteLog(bucket string, key string, version int) error { - url := fmt.Sprintf("%s/writelogger/delete-log", w.address.WithScheme(defaultScheme)) - - req := &DeleteLogRequest{ - Bucket: bucket, - Version: version, - Key: key, - } - - // Encode the request. - postBody, err := json.Marshal(req) - if err != nil { - return errors.Wrap(err, "marshalling post request") - } - requestBody := bytes.NewBuffer(postBody) - - // Post the request. - resp, err := http.Post(url, "application/json", requestBody) - if err != nil { - return errors.Wrap(err, "posting log-reader request") - } - - if resp.StatusCode != http.StatusOK { - b, _ := io.ReadAll(resp.Body) - defer resp.Body.Close() - return errors.Errorf("status code: %d: %s", resp.StatusCode, b) - } - - return nil -} - -type DeleteLogRequest struct { - Bucket string `json:"bucket"` - Version int `json:"version"` - Key string `json:"key"` -} diff --git a/dax/writelogger/http/handler.go b/dax/writelogger/http/handler.go deleted file mode 100644 index 6366a7f26..000000000 --- a/dax/writelogger/http/handler.go +++ /dev/null @@ -1,121 +0,0 @@ -package http - -import ( - "encoding/json" - "io" - "net/http" - - "github.com/gorilla/mux" - "github.com/featurebasedb/featurebase/v3/dax/writelogger" - "github.com/featurebasedb/featurebase/v3/logger" -) - -func Handler(w *writelogger.WriteLogger, logger logger.Logger) http.Handler { - svr := &server{ - writeLogger: w, - logger: logger, - } - - router := mux.NewRouter() - router.HandleFunc("/health", svr.getHealth).Methods("GET").Name("GetHealth") - router.HandleFunc("/append-message", svr.postAppendMessage).Methods("POST").Name("PostAppendMessage") - router.HandleFunc("/log-reader", svr.postLogReader).Methods("POST").Name("PostLogReader") - router.HandleFunc("/delete-log", svr.postDeleteLog).Methods("POST").Name("PostDeleteLog") - return router -} - -type server struct { - writeLogger *writelogger.WriteLogger - logger logger.Logger -} - -// GET /health -func (s *server) getHealth(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) -} - -// POST /append-message -func (s *server) postAppendMessage(w http.ResponseWriter, r *http.Request) { - body := r.Body - defer body.Close() - - req := AppendMessageRequest{} - if err := json.NewDecoder(body).Decode(&req); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - err := s.writeLogger.AppendMessage(req.Bucket, req.Key, req.Version, req.Message) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - resp := AppendMessageResponse{} - if err := json.NewEncoder(w).Encode(resp); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } -} - -type AppendMessageRequest struct { - Bucket string `json:"bucket"` - Key string `json:"key"` - Version int `json:"version"` - Message []byte `json:"message"` -} - -type AppendMessageResponse struct{} - -// POST /log-reader -func (s *server) postLogReader(w http.ResponseWriter, r *http.Request) { - body := r.Body - defer body.Close() - - req := LogReaderRequest{} - if err := json.NewDecoder(body).Decode(&req); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - reader, closer, err := s.writeLogger.LogReader(req.Bucket, req.Key, req.Version) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - defer closer.Close() - - if _, err := io.Copy(w, reader); err != nil { - s.logger.Printf("error streaming log data: %s", err) - } -} - -type LogReaderRequest struct { - Bucket string `json:"bucket"` - Version int `json:"version"` - Key string `json:"key"` -} - -// POST /delete-log -func (s *server) postDeleteLog(w http.ResponseWriter, r *http.Request) { - body := r.Body - defer body.Close() - - req := DeleteLogRequest{} - if err := json.NewDecoder(body).Decode(&req); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - if err := s.writeLogger.DeleteLog(req.Bucket, req.Key, req.Version); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } -} - -type DeleteLogRequest struct { - Bucket string `json:"bucket"` - Version int `json:"version"` - Key string `json:"key"` -} diff --git a/dax/writelogger/writelogger.go b/dax/writelogger/writelogger.go index db54f859b..2df249c48 100644 --- a/dax/writelogger/writelogger.go +++ b/dax/writelogger/writelogger.go @@ -7,26 +7,31 @@ import ( "io/fs" "os" "path" + "strconv" "sync" + "syscall" + "github.com/featurebasedb/featurebase/v3/dax/computer" "github.com/featurebasedb/featurebase/v3/errors" "github.com/featurebasedb/featurebase/v3/logger" ) type WriteLogger struct { - mu sync.RWMutex + dataDir string - dataDir string - logFiles map[string]*os.File + mu sync.RWMutex + logFiles map[string]*os.File + lockFiles map[string]*os.File logger logger.Logger } func New(cfg Config) *WriteLogger { return &WriteLogger{ - dataDir: cfg.DataDir, - logFiles: make(map[string]*os.File), - logger: logger.NopLogger, + dataDir: cfg.DataDir, + logFiles: make(map[string]*os.File), + lockFiles: make(map[string]*os.File), + logger: logger.NopLogger, } } @@ -43,25 +48,58 @@ func (w *WriteLogger) AppendMessage(bucket string, key string, version int, mess return errors.Wrapf(err, "getting log file by key: %s", fKey) } - logFile.Write(append(message, "\n"...)) - logFile.Sync() + _, err = logFile.Write(append(message, "\n"...)) + if err != nil { + return errors.Wrapf(err, "writing to log file %s", logFile.Name()) + } + err = logFile.Sync() + return errors.Wrapf(err, "syncing log file %s", logFile.Name()) +} - return nil +func (w *WriteLogger) List(bucket, key string) ([]computer.WriteLogInfo, error) { + dirpath := path.Join(w.dataDir, bucket, key) + + entries, err := os.ReadDir(dirpath) + if err != nil { + if pe, ok := err.(*os.PathError); ok && pe.Err == syscall.ENOENT { + return nil, nil + } + return nil, errors.Wrap(err, "reading directory") + } + + wLogs := make([]computer.WriteLogInfo, len(entries)) + for i, entry := range entries { + version, err := strconv.ParseInt(entry.Name(), 10, 64) + if err != nil { + return nil, errors.Wrapf(err, "writelog filename '%s' could not be parsed to version number", entry.Name()) + } + wLogs[i] = computer.WriteLogInfo{ + Version: int(version), + } + } + return wLogs, nil +} + +func (w *WriteLogger) LogReader(bucket, key string, version int) (io.ReadCloser, error) { + return w.LogReaderFrom(bucket, key, version, 0) } -func (w *WriteLogger) LogReader(bucket string, key string, version int) (io.Reader, io.Closer, error) { +func (w *WriteLogger) LogReaderFrom(bucket string, key string, version int, offset int) (io.ReadCloser, error) { _, filePath := w.paths(fullKey(bucket, key, version)) f, err := os.Open(filePath) if err != nil { if e, ok := err.(*fs.PathError); ok { - return nil, nil, e + return nil, e } - return nil, nil, err + return nil, err + } + if offset > 0 { + f.Seek(int64(offset), io.SeekStart) } w.logger.Debugf("WriteLogger LogReader file: %s", f.Name()) - return f, f, nil + return f, nil } func (w *WriteLogger) DeleteLog(bucket string, key string, version int) error { @@ -84,6 +122,66 @@ func (w *WriteLogger) DeleteLog(bucket string, key string, version int) error { return os.Remove(f.Name()) } +func (w *WriteLogger) lockFile(bucket, key string) (string, string) { + lockFile := path.Join(w.dataDir, bucket, fmt.Sprintf("_lock_%s", key)) + return path.Dir(lockFile), lockFile +} + +func (w *WriteLogger) Lock(bucket, key string) error { + lockDir, lockFile := w.lockFile(bucket, key) + + if err := os.MkdirAll(lockDir, 0777); err != nil { + return errors.Wrapf(err, "lock dir %s", lockDir) + } + + f, err := os.OpenFile(lockFile, os.O_CREATE|os.O_EXCL|syscall.O_NONBLOCK, 0644) + if err != nil { + return errors.Wrapf(err, "opening lock file: %s", lockFile) + } + w.mu.Lock() + defer w.mu.Unlock() + w.lockFiles[lockFile] = f + + // fd, err = syscall.Open(lockFile, syscall.O_RDWR|syscall.O_CREAT, 0644) + // if err != nil { + // return 0, errors.Wrapf(err, "syscall opening %s", lockFile) + // } + // err = syscall.FcntlFlock(uintptr(fd), syscall.F_SETLK, &syscall.Flock_t{ + // Type: syscall.F_WRLCK, + // }) + return nil + +} + +func (w *WriteLogger) Unlock(bucket, key string) error { + w.mu.Lock() + defer w.mu.Unlock() + // TODO(jaffee) since the file isn't guaranteed to be removed if + // the process is killed, we should actually use flock instead of + // EXCL file creation. Problem with that is it makes testing + // tricky because file handles from the same process are able to + // acquire the flock simultaneously. Headache. + _, lockFile := w.lockFile(bucket, key) + f, ok := w.lockFiles[lockFile] + if !ok { + return errors.New(errors.ErrUncoded, "couldn't find file to unlock") + } + f.Close() + err := os.Remove(lockFile) + delete(w.lockFiles, lockFile) + + // defer func() { + // err := syscall.Close(fd) + // if err != nil { + // w.logger.Printf("error closing lockfile %s", lockFile) + // } + // }() + // err := syscall.FcntlFlock(uintptr(fd), syscall.F_SETLK, &syscall.Flock_t{ + // Type: syscall.F_UNLCK, + // }) + return errors.Wrap(err, "removing lock file") +} + // paths takes a key and returns the full file path (including the root data // directory) as well as the full directory path (i.e. the file path without the // file portion). @@ -116,6 +214,7 @@ func (w *WriteLogger) logFileByKey(key string) (*os.File, error) { if err != nil { return nil, errors.Wrapf(err, "opening file: %s", filePath) } + w.logFiles[key] = f return f, nil diff --git a/dax/writelogger/writelogger_test.go b/dax/writelogger/writelogger_test.go index ddcf17745..62d1d4667 100644 --- a/dax/writelogger/writelogger_test.go +++ b/dax/writelogger/writelogger_test.go @@ -50,11 +50,11 @@ func TestWriteLogger(t *testing.T) { assert.NoError(t, err) // Read the message. - reader, closer, err := wl.LogReader(bucket(table, partition), key, version) + readcloser, err := wl.LogReader(bucket(table, partition), key, version) assert.NoError(t, err) - defer closer.Close() + defer readcloser.Close() - buf, err := io.ReadAll(reader) + buf, err := io.ReadAll(readcloser) assert.NoError(t, err) var out payload diff --git a/executor.go b/executor.go index c6944f3ca..2172f39b3 100644 --- a/executor.go +++ b/executor.go @@ -47,7 +47,7 @@ const ( ) type Executor interface { - Execute(context.Context, string, *pql.Query, []uint64, *ExecOptions) (QueryResponse, error) + Execute(context.Context, dax.TableKeyer, *pql.Query, []uint64, *ExecOptions) (QueryResponse, error) } // executor recursively executes calls in a PQL query across all shards. @@ -77,7 +77,8 @@ type executor struct { maxMemory int64 // Temporary flag to be removed when stablized - dataframeEnabled bool + dataframeEnabled bool + datafameUseParquet bool } // executorOption is a functional option type for pilosa.executor @@ -178,7 +179,9 @@ func (e *executor) InitStats() { } // Execute executes a PQL query. -func (e *executor) Execute(ctx context.Context, index string, q *pql.Query, shards []uint64, opt *ExecOptions) (QueryResponse, error) { +func (e *executor) Execute(ctx context.Context, tableKeyer dax.TableKeyer, q *pql.Query, shards []uint64, opt *ExecOptions) (QueryResponse, error) { + index := string(tableKeyer.Key()) + span, ctx := tracing.StartSpanFromContext(ctx, "executor.Execute") span.LogKV("pql", q.String()) defer span.Finish() diff --git a/executor_test.go b/executor_test.go index 27ed8c058..b489c6738 100644 --- a/executor_test.go +++ b/executor_test.go @@ -33,17 +33,9 @@ import ( "github.com/featurebasedb/featurebase/v3/testhook" . "github.com/featurebasedb/featurebase/v3/vprint" // nolint:staticcheck "github.com/google/go-cmp/cmp" - pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/ctl" - "github.com/featurebasedb/featurebase/v3/disco" "github.com/featurebasedb/featurebase/v3/logger" - "github.com/featurebasedb/featurebase/v3/pql" - "github.com/featurebasedb/featurebase/v3/proto" - "github.com/featurebasedb/featurebase/v3/server" - "github.com/featurebasedb/featurebase/v3/test" - "github.com/featurebasedb/featurebase/v3/testhook" - . "github.com/featurebasedb/featurebase/v3/vprint" // nolint:staticcheck "github.com/pkg/errors" + "github.com/stretchr/testify/assert" ) var ( @@ -5131,47 +5123,50 @@ func TestExecutor_Execute_Extract_Keyed(t *testing.T) { `) resp := c.Query(t, c.Idx(), `Extract(All(), Rows(set))`) - expect := []interface{}{ - pilosa.ExtractedTable{ - Fields: []pilosa.ExtractedTableField{ - { - Name: "set", - Type: "[]uint64", - }, + expect := pilosa.ExtractedTable{ + Fields: []pilosa.ExtractedTableField{ + { + Name: "set", + Type: "[]uint64", }, - // The order of these probably shouldn't matter, but currently depends indirectly on the - // index. - Columns: []pilosa.ExtractedTableColumn{ - { - Column: pilosa.KeyOrID{Keyed: true, Key: "h"}, - Rows: []interface{}{ - []uint64{ - 1, - 2, - }, + }, + // The order of these probably shouldn't matter, but currently depends indirectly on the + // index. + Columns: []pilosa.ExtractedTableColumn{ + { + Column: pilosa.KeyOrID{Keyed: true, Key: "h"}, + Rows: []interface{}{ + []uint64{ + 1, + 2, }, }, - { - Column: pilosa.KeyOrID{Keyed: true, Key: "xyzzy"}, - Rows: []interface{}{ - []uint64{ - 2, - }, + }, + { + Column: pilosa.KeyOrID{Keyed: true, Key: "xyzzy"}, + Rows: []interface{}{ + []uint64{ + 2, }, }, - { - Column: pilosa.KeyOrID{Keyed: true, Key: "plugh"}, - Rows: []interface{}{ - []uint64{}, - }, + }, + { + Column: pilosa.KeyOrID{Keyed: true, Key: "plugh"}, + Rows: []interface{}{ + []uint64{}, }, }, }, } - if !reflect.DeepEqual(expect, resp.Results) { - t.Errorf("expected %v but got %v", expect, resp.Results) + if len(resp.Results) != 1 { + t.Fail() } + res := resp.Results[0].(pilosa.ExtractedTable) + if !reflect.DeepEqual(expect.Fields, res.Fields) { + t.Errorf("expected:\n%v\nbut got:\n%v", expect, resp.Results) + } + assert.ElementsMatch(t, expect.Columns, res.Columns) } func TestExecutor_Execute_MaxMemory(t *testing.T) { @@ -5791,7 +5786,7 @@ func TestExecutor_Execute_Rows_Keys(t *testing.T) { t.Fatalf("got success, expected error similar to: %+v", test.expErr) } rows := res.Results[0].(pilosa.RowIdentifiers) - if !reflect.DeepEqual(rows.Keys, test.exp) { + if !assert.ElementsMatch(t, rows.Keys, test.exp) { t.Fatalf("\ngot: %+v\nexp: %+v", rows.Keys, test.exp) } else if rows.Rows != nil { if test.exp == nil { @@ -7428,6 +7423,7 @@ func backupCluster(t *testing.T, c *test.Cluster, index string) (backupDir strin buf := &bytes.Buffer{} backupLog := logger.NewStandardLogger(buf) + backupCommand := ctl.NewBackupCommand(backupLog) backupCommand.Host = c.Nodes[len(c.Nodes)-1].URL() // don't pick node 0 so we don't always get primary (better code coverage) backupCommand.Index = index diff --git a/extendiblehash/extendiblehash.go b/extendiblehash/extendiblehash.go new file mode 100644 index 000000000..ff18c97b1 --- /dev/null +++ b/extendiblehash/extendiblehash.go @@ -0,0 +1,266 @@ +package extendiblehash + +import ( + "bytes" + "fmt" + + "github.com/featurebasedb/featurebase/v3/bufferpool" +) + +// ExtendibleHashTable is an extendible hash table implementation backed by a buffer +// pool +type ExtendibleHashTable struct { + directory []bufferpool.PageID + globalDepth uint + keysPerPage int + bufferPool *bufferpool.BufferPool +} + +// NewExtendibleHashTable creates a new ExtendibleHashTable +func NewExtendibleHashTable(keyLength int, valueLength int, bufferPool *bufferpool.BufferPool) (*ExtendibleHashTable, error) { + bytesPerKV := keyLength + valueLength + bufferpool.PAGE_SLOT_LENGTH + keysPerPage := (bufferpool.PAGE_SIZE - bufferpool.PAGE_SLOTS_START_OFFSET) / bytesPerKV + + //create the root page + page, err := bufferPool.NewPage() + if err != nil { + return nil, err + } + page.WritePageType(bufferpool.PAGE_TYPE_HASH_TABLE) + bufferPool.FlushPage(page.ID()) + + return &ExtendibleHashTable{ + globalDepth: 0, + directory: make([]bufferpool.PageID, 1), + keysPerPage: keysPerPage, + bufferPool: bufferPool, + }, nil +} + +// Get gets a key from the hash table. It returns the value, a bool set to true if the key is +// found (false if the key is not found) or an error. +func (e *ExtendibleHashTable) Get(key []byte) ([]byte, bool, error) { + pageId, err := e.getPageID(key) + if err != nil { + return []byte{}, false, err + } + + page, err := e.bufferPool.FetchPage(pageId) + if err != nil { + return []byte{}, false, err + } + defer e.bufferPool.UnpinPage(page.ID()) + + index, found := e.findKey(page, key) + if found { + slot := page.ReadSlot(int16(index)) + return slot.ValueBytes(page), true, nil + } + return []byte{}, false, nil +} + +// Put puts a key/value pair into the hash table. It returns an error if one occurs. +func (e *ExtendibleHashTable) Put(key, value []byte) error { + pageID, err := e.getPageID(key) + if err != nil { + return err + } + page, err := e.bufferPool.FetchPage(pageID) + if err != nil { + return err + } + defer e.bufferPool.UnpinPage(page.ID()) + + full := int(page.ReadSlotCount()) >= e.keysPerPage + err = e.putKeyValue(page, key, value) + if err != nil { + return err + } + + if full { + err = e.splitOnKey(page, key) + if err != nil { + return err + } + } + return nil +} + +// Close cleans up the hash table after its use. +func (e *ExtendibleHashTable) Close() { + e.bufferPool.Close() +} + +func (e *ExtendibleHashTable) hashFunction(k Hashable) int { + hashResult := k.Hash() & ((1 << e.globalDepth) - 1) + return int(hashResult) +} + +func (e *ExtendibleHashTable) getPageID(key []byte) (bufferpool.PageID, error) { + hash := e.hashFunction(Key(key)) + if hash > len(e.directory)-1 { + return 0, fmt.Errorf("hash (%d) out of the directory array bounds (%d)", hash, len(e.directory)) + } + id := e.directory[hash] + return bufferpool.PageID(id), nil +} + +func (e *ExtendibleHashTable) findKey(page *bufferpool.Page, key []byte) (int, bool) { + minIndex := 0 + onePastMaxIndex := int(page.ReadSlotCount()) + + for onePastMaxIndex != minIndex { + index := (minIndex + onePastMaxIndex) / 2 + s := page.ReadSlot(int16(index)) + keyAtIndex := s.KeyBytes(page) + + if bytes.Equal(keyAtIndex, key) { + return index, true + } + if bytes.Compare(key, keyAtIndex) < 0 { + onePastMaxIndex = index + } else { + minIndex = index + 1 + } + } + return minIndex, false +} + +func (e *ExtendibleHashTable) splitOnKey(page *bufferpool.Page, key []byte) error { + if uint(page.ReadLocalDepth()) == e.globalDepth { + + e.directory = append(e.directory, e.directory...) + e.globalDepth++ + } + + // scratch page for left + p0 := e.bufferPool.ScratchPage() + p0.WritePageNumber(int32(page.ID())) + p0.WritePageType(bufferpool.PAGE_TYPE_HASH_TABLE) + + // allocate new page for split + p1, err := e.bufferPool.NewPage() + if err != nil { + return err + } + defer e.bufferPool.UnpinPage(p1.ID()) + p1.WritePageType(bufferpool.PAGE_TYPE_HASH_TABLE) + + // update local depths + newLocalDepth := page.ReadLocalDepth() + 1 + p0.WriteLocalDepth(newLocalDepth) + p1.WriteLocalDepth(newLocalDepth) + + ld := page.ReadLocalDepth() + hiBit := uint64(1 << ld) + + it := bufferpool.NewPageSlotIterator(page, 0) + for { + slot := it.Next() + if slot == nil { + break + } + keyBytes := slot.KeyBytes(page) + k := string(keyBytes) + h := Key(k).Hash() + + if h&hiBit > 0 { + sc := p1.ReadSlotCount() + p1.WriteKeyValueInSlot(sc, keyBytes, slot.ValueBytes(page)) + // update the slot count + p1.WriteSlotCount(int16(sc + 1)) + + } else { + sc := p0.ReadSlotCount() + p0.WriteKeyValueInSlot(sc, keyBytes, slot.ValueBytes(page)) + // update the slot count + p0.WriteSlotCount(int16(sc + 1)) + } + } + for j := Key(key).Hash() & (hiBit - 1); j < uint64(len(e.directory)); j += hiBit { + if j&hiBit > 0 { + e.directory[j] = p1.ID() + } else { + e.directory[j] = p0.ID() + } + } + + // copy p1 back into page + p0.WritePage(page) + + return nil +} + +func (e *ExtendibleHashTable) cleanPage(page *bufferpool.Page) error { + scratch := e.bufferPool.ScratchPage() + // copy page number + scratch.WritePageNumber(int32(page.ID())) + // set the page type + scratch.WritePageType(bufferpool.PAGE_TYPE_HASH_TABLE) + // copy local depth + scratch.WriteLocalDepth(page.ReadLocalDepth()) + + // copy slots from page to scratch + si := bufferpool.NewPageSlotIterator(page, 0) + for { + slot := si.Next() + if slot == nil { + break + } + scratch.WriteKeyValueInSlot(si.Cursor(), slot.KeyBytes(page), slot.ValueBytes(page)) + } + + // update the slot count + scratch.WriteSlotCount(page.ReadSlotCount()) + + // write scratch back to page + scratch.WritePage(page) + return nil +} + +func (e *ExtendibleHashTable) keyValueWillFit(page *bufferpool.Page, key, value []byte) bool { + // will this k/v fit on the page? + slotLen := 4 // we need 2 len words for the slot + chunkLen := 6 + len(key) + len(value) // int16 len + int32 len + len of respective []byte + fs := page.FreeSpace() + return fs > (int16(slotLen) + int16(chunkLen)) +} + +func (e *ExtendibleHashTable) putKeyValue(page *bufferpool.Page, key, value []byte) error { + + if !e.keyValueWillFit(page, key, value) { + // try to garbage collect the page first + err := e.cleanPage(page) + if err != nil { + return err + } + } + + //find the key + newIndex, found := e.findKey(page, []byte(key)) + // get the slot count + slotCount := int(page.ReadSlotCount()) + if found { + // we found the key, so we will update the value + err := page.WriteKeyValueInSlot(int16(newIndex), []byte(key), []byte(value)) + if err != nil { + return err + } + } else { + // TODO(pok) should check in WriteSlot() to see if we are out space too... + + // TODO(pok) we should move all the slots in one fell swoop, because,... performance + // move all the slots after where we are going to insert + for j := slotCount; j > newIndex; j-- { + sl := page.ReadSlot(int16(j - 1)) + page.WriteSlot(int16(j), sl) + } + err := page.WriteKeyValueInSlot(int16(newIndex), []byte(key), []byte(value)) + if err != nil { + return err + } + // update the slot count + page.WriteSlotCount(int16(slotCount + 1)) + } + return nil +} diff --git a/extendiblehash/extendiblehash_test.go b/extendiblehash/extendiblehash_test.go new file mode 100644 index 000000000..98d412e41 --- /dev/null +++ b/extendiblehash/extendiblehash_test.go @@ -0,0 +1,327 @@ +package extendiblehash + +import ( + "strconv" + "testing" + + "github.com/featurebasedb/featurebase/v3/bufferpool" + "github.com/stretchr/testify/assert" +) + +func makeDirectory() (*ExtendibleHashTable, error) { + diskManager := bufferpool.NewInMemDiskSpillingDiskManager(128) + bufferPool := bufferpool.NewBufferPool(128, diskManager) + + keySize := 12 + valueSize := 20 + + return NewExtendibleHashTable(keySize, valueSize, bufferPool) +} + +func TestHashTable_ExtendibleHash(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + d.globalDepth = 4 + + key := "321" // 0011 + key2 := "123" // 1011 + + result := d.hashFunction(Key(key)) + result2 := d.hashFunction(Key(key2)) + + assert.Equal(t, 7, result) + assert.Equal(t, 6, result2) +} + +func TestHashTable_GetPage(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + d.globalDepth = 4 + d.directory = make([]bufferpool.PageID, 16) + + key := "478" + d.directory[14] = 2 + + pageID, err := d.getPageID([]byte(key)) + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, 2, int(pageID)) +} + +func TestHashTable_GetPage_ShouldReturnError_WhenOffsetIsNotLimitedToDataSize(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + d.globalDepth = 4 + key := "478" + + _, err = d.getPageID([]byte(key)) + assert.Error(t, err) +} + +func TestHashTable_GetPage_ShouldReturnError_WhenPageIDIsOutOfTheTable(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + d.directory = make([]bufferpool.PageID, 0) + key := "123" + + _, err = d.getPageID([]byte(key)) + assert.Error(t, err) +} + +func TestHashTable_Get(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + d.globalDepth = 4 + d.directory = make([]bufferpool.PageID, 16) + + d.directory[14] = 2 + + // force there to be two pages + page, err := d.bufferPool.NewPage() //1 + if err != nil { + t.Fatal(err) + } + page.WritePageType(bufferpool.PAGE_TYPE_HASH_TABLE) + d.bufferPool.FlushPage(page.ID()) + + page, err = d.bufferPool.NewPage() //2 + if err != nil { + t.Fatal(err) + } + page.WritePageType(bufferpool.PAGE_TYPE_HASH_TABLE) + d.bufferPool.FlushPage(page.ID()) + + // now do the test + page, err = d.bufferPool.FetchPage(2) + if err != nil { + t.Fatal(err) + } + defer d.bufferPool.UnpinPage(page.ID()) + + key := "478" + value := "Hi" + + page.WriteKeyValueInSlot(0, []byte(key), []byte(value)) + page.WriteSlotCount(int16(1)) + + result, _, err := d.Get([]byte(key)) + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, "Hi", string(result)) +} + +func TestHashTable_Get_ShouldHandleError(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + key := "123" + + result, found, err := d.Get([]byte(key)) + + assert.Equal(t, err, nil) + assert.Equal(t, []byte{}, result) + assert.Equal(t, false, found) +} + +func TestHashTable_Put(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + + page, err := d.bufferPool.FetchPage(0) + if err != nil { + t.Fatal(err) + } + defer d.bufferPool.UnpinPage(page.ID()) + err = addToPage(page, 5) + if err != nil { + t.Fatal(err) + } + + d.Put([]byte("123"), []byte("Yolo !")) + + value, found, err := d.Get([]byte("123")) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, "Yolo !", string(value)) + assert.Equal(t, true, found) +} + +func TestHashTable_Put_ShouldIncreaseSize_WhenTableIsFull(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + + page, err := d.bufferPool.FetchPage(0) + if err != nil { + t.Fatal(err) + } + defer d.bufferPool.UnpinPage(page.ID()) + err = addToPage(page, 227) // keys per page with key 12, value 20 + if err != nil { + t.Fatal(err) + } + + d.Put([]byte("123"), []byte("Yolo !")) + + value, _, err := d.Get([]byte("123")) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, "Yolo !", string(value)) + assert.Equal(t, 2, len(d.directory)) + assert.Equal(t, uint(1), d.globalDepth) +} + +func TestHashTable_PutShouldIncrementLD_WhenPageIsFull(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + + page, err := d.bufferPool.FetchPage(0) + if err != nil { + t.Fatal(err) + } + defer d.bufferPool.UnpinPage(page.ID()) + err = addToPage(page, 227) // keys per page with key 12, value 20 + if err != nil { + t.Fatal(err) + } + + d.Put([]byte("12345678"), []byte("Yolo !")) + + assert.Equal(t, int64(8192*2), d.bufferPool.OnDiskSize()) + assert.Equal(t, 1, int(d.globalDepth)) + + p0, err := d.bufferPool.FetchPage(0) + if err != nil { + t.Fatal(err) + } + defer d.bufferPool.UnpinPage(p0.ID()) + assert.Equal(t, int16(1), p0.ReadLocalDepth()) + + p1, err := d.bufferPool.FetchPage(1) + if err != nil { + t.Fatal(err) + } + defer d.bufferPool.UnpinPage(p1.ID()) + assert.Equal(t, int16(1), p1.ReadLocalDepth()) +} + +func TestHashTable_Put_INT(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + + for i := 0; i < 4000; i++ { + err = d.Put([]byte("key"+strconv.Itoa(i)), []byte("Yolo !")) + if err != nil { + t.Fatal(err) + } + } + + assert.Equal(t, []bufferpool.PageID{0, 1, 2, 3, 4, 7, 6, 5, 13, 14, 12, 9, 8, 15, 10, 11, 28, 24, 21, 18, 4, 19, 29, 20, 27, 22, 25, 23, 17, 15, 16, 26}, d.directory) +} + +func TestHashTable_Put_SameKey_ALotOfTime(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + + for i := 0; i < 10000; i++ { + d.Put([]byte("key"), []byte("Yolo ! "+strconv.Itoa(i))) + } + + value, _, err := d.Get([]byte("key")) + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, "Yolo ! 9999", string(value)) + assert.Equal(t, 1, len(d.directory)) + assert.Equal(t, int64(8192), d.bufferPool.OnDiskSize()) +} + +func TestHashTable_Put_Many_Keys(t *testing.T) { + d, err := makeDirectory() + if err != nil { + t.Fatal(err) + } + + for i := 0; i < 1000000; i++ { + err = d.Put([]byte("key"+strconv.Itoa(i)), []byte("Yolo ! "+strconv.Itoa(i))) + if err != nil { + t.Fatal(err) + } + } + + value, _, err := d.Get([]byte("key99756")) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, "Yolo ! 99756", string(value)) + assert.Equal(t, 8192, len(d.directory)) + assert.Equal(t, uint(13), d.globalDepth) + d.Close() +} + +func BenchmarkHashTable_Put_Many_Keys(b *testing.B) { + for i := 0; i < b.N; i++ { + + d, err := makeDirectory() + if err != nil { + b.Fatal(err) + } + + for i := 0; i < 1000000; i++ { + err = d.Put([]byte("key"+strconv.Itoa(i)), []byte("Yolo ! "+strconv.Itoa(i))) + if err != nil { + b.Fatal(err) + } + } + + value, _, err := d.Get([]byte("key99756")) + if err != nil { + b.Fatal(err) + } + assert.Equal(b, "Yolo ! 99756", string(value)) + assert.Equal(b, 8192, len(d.directory)) + assert.Equal(b, uint(13), d.globalDepth) + d.Close() + + } +} + +func addToPage(page *bufferpool.Page, numberOfRecords int) error { + for i := 0; i < numberOfRecords; i++ { + //fmt.Printf("writing record %d\n", i+1) + itoa := strconv.Itoa(i) + err := page.WriteKeyValueInSlot(int16(i), []byte("key"+itoa), []byte("value foo bar")) + if err != nil { + return err + } + page.WriteSlotCount(int16(page.ReadSlotCount() + 1)) + } + return nil +} diff --git a/extendiblehash/key.go b/extendiblehash/key.go new file mode 100644 index 000000000..ec7f55da5 --- /dev/null +++ b/extendiblehash/key.go @@ -0,0 +1,24 @@ +package extendiblehash + +import ( + "github.com/zeebo/xxh3" +) + +type Hashable interface { + Hash() uint64 +} + +// use the same seed all the time - this is not for crypto +var protoSeed uint64 = 20041973 + +var hasher = xxh3.NewSeed(protoSeed) + +type Key []byte + +// BEWARE - not concurrent!! +func (k Key) Hash() uint64 { + hasher.Reset() + hasher.Write(k) + hash := hasher.Sum64() + return hash +} diff --git a/fbcloud/auth.go b/fbcloud/auth.go index c1c59f6ef..a3caf3c0a 100644 --- a/fbcloud/auth.go +++ b/fbcloud/auth.go @@ -23,11 +23,11 @@ type cognitoParameters struct { type cognitoAuthRequest struct { AuthParameters cognitoParameters `json:"AuthParameters"` AuthFlow string `json:"AuthFlow"` - AppClientId string `json:"ClientId"` + AppClientID string `json:"ClientId"` } type cognitoAuthResult struct { - IdToken string `json:"IdToken"` + IDToken string `json:"IdToken"` } type cognitoAuthResponse struct { @@ -41,7 +41,7 @@ func authenticate(clientID, region, email, password string) (string, error) { Password: password, }, AuthFlow: authFlow, - AppClientId: clientID, + AppClientID: clientID, } data, err := json.Marshal(authPayload) @@ -75,5 +75,5 @@ func authenticate(clientID, region, email, password string) (string, error) { return "", errors.Wrap(err, "decoding cognito auth response") } - return auth.Result.IdToken, nil + return auth.Result.IDToken, nil } diff --git a/fbcloud/client.go b/fbcloud/client.go index 74ef679c2..53c2d5035 100644 --- a/fbcloud/client.go +++ b/fbcloud/client.go @@ -37,7 +37,6 @@ func (cq *Queryer) tokenRefresh() error { } cq.token = token cq.lastRefresh = time.Now() - fmt.Println("refreshed auth token") return nil } @@ -53,7 +52,7 @@ func (cq *Queryer) Query(org, db, sql string) (*featurebase.WireQueryResponse, e return nil, errors.Wrap(err, "refreshing token") } } - url := fmt.Sprintf("%s/v2/databases/%s/query", cq.Host, db) + url := fmt.Sprintf("%s/v2/databases/%s/query/sql", cq.Host, db) sqlReq := &tokenizedSQL{ Language: "sql", @@ -71,6 +70,7 @@ func (cq *Queryer) Query(org, db, sql string) (*featurebase.WireQueryResponse, e if err != nil { return nil, errors.Wrap(err, "creating new post request") } + req.Header.Add("Content-Type", "application/json") req.Header.Add("Authorization", cq.token) var resp *http.Response @@ -85,11 +85,11 @@ func (cq *Queryer) Query(org, db, sql string) (*featurebase.WireQueryResponse, e if resp.StatusCode/100 != 2 { return nil, errors.Errorf("unexpected status: %s, full body: '%s'", resp.Status, fullbod) } - var cloudResp cloudResponse - if err := json.Unmarshal(fullbod, &cloudResp); err != nil { + + var sqlResponse featurebase.WireQueryResponse + if err := json.Unmarshal(fullbod, &sqlResponse); err != nil { return nil, errors.Wrapf(err, "decoding cloud response, body:\n%s", fullbod) } - sqlResponse := cloudResp.Results return &sqlResponse, nil } diff --git a/field.go b/field.go index 34e22ae95..94cb7b8e3 100644 --- a/field.go +++ b/field.go @@ -1502,7 +1502,7 @@ func (f *Field) ClearValue(qcx *Qcx, columnID uint64) (changed bool, err error) } func (f *Field) MaxForShard(qcx *Qcx, shard uint64, filter *Row) (ValCount, error) { - tx, finisher, err := qcx.GetTx(Txo{Write: true, Index: f.idx, Shard: shard}) + tx, finisher, err := qcx.GetTx(Txo{Write: false, Index: f.idx, Shard: shard}) defer finisher(&err) bsig := f.bsiGroup(f.name) if bsig == nil { @@ -1532,7 +1532,7 @@ func (f *Field) MaxForShard(qcx *Qcx, shard uint64, filter *Row) (ValCount, erro // (this field must be an Int or Decimal field). It also returns the // number of times the minimum value appears. func (f *Field) MinForShard(qcx *Qcx, shard uint64, filter *Row) (ValCount, error) { - tx, finisher, err := qcx.GetTx(Txo{Write: true, Index: f.idx, Shard: shard}) + tx, finisher, err := qcx.GetTx(Txo{Write: false, Index: f.idx, Shard: shard}) defer finisher(&err) bsig := f.bsiGroup(f.name) if bsig == nil { diff --git a/field_internal_test.go b/field_internal_test.go index 59907ca30..d0b071336 100644 --- a/field_internal_test.go +++ b/field_internal_test.go @@ -692,9 +692,6 @@ func TestDecimalField_MinMaxBoundaries(t *testing.T) { func TestDecimalField_MinMaxForShard(t *testing.T) { _, _, f := newTestField(t, OptFieldTypeDecimal(3)) - qcx := f.idx.holder.txf.NewQcx() - defer qcx.Abort() - options := &ImportOptions{} for i, test := range []struct { name string @@ -738,12 +735,19 @@ func TestDecimalField_MinMaxForShard(t *testing.T) { }, } { t.Run(test.name+strconv.Itoa(i), func(t *testing.T) { - if err := f.importFloatValue(qcx, test.columnIDs, test.values, 0, options); err != nil { + qcx := f.idx.holder.txf.NewQcx() + + err := f.importFloatValue(qcx, test.columnIDs, test.values, 0, options) + if err != nil { + qcx.Abort() t.Fatalf("test %d, importing values: %s", i, err.Error()) } + qcx.Abort() shard := uint64(0) + qcx = f.idx.holder.txf.NewQcx() + defer qcx.Abort() maxvc, err := f.MaxForShard(qcx, shard, nil) if err != nil { t.Fatalf("getting max for shard: %v", err) diff --git a/fragment.go b/fragment.go index c00b9ec8a..801a9d2a7 100644 --- a/fragment.go +++ b/fragment.go @@ -186,7 +186,6 @@ func (f *fragment) Open() error { if err := func() error { // Fill cache with rows persisted to disk. - f.holder.Logger.Debugf("open cache for index/field/view/fragment: %s/%s/%s/%d", f.index(), f.field(), f.view(), f.shard) if err := f.openCache(); err != nil { return errors.Wrap(err, "opening cache") } @@ -200,7 +199,6 @@ func (f *fragment) Open() error { } _ = testhook.Opened(f.holder.Auditor, f, nil) - f.holder.Logger.Debugf("successfully opened index/field/view/fragment: %s/%s/%s/%d", f.index(), f.field(), f.view(), f.shard) return nil } @@ -2091,7 +2089,7 @@ func (f *fragment) ImportRoaringClearAndSet(ctx context.Context, tx Tx, clear, s err = tx.ApplyRewriter(f.index(), f.field(), f.view(), f.shard, 0, rewriter) if err != nil { - errors.Wrap(err, "applying rewriter") + return fmt.Errorf("pilosa.ImportRoaringClearAndSet: %s", err) } if f.CacheType != CacheTypeNone { // TODO this may be quite a bit slower than the way @@ -2134,7 +2132,7 @@ func (f *fragment) ImportRoaringBSI(ctx context.Context, tx Tx, clear, set []byt } err = tx.ApplyRewriter(f.index(), f.field(), f.view(), f.shard, 0, rewriter) - return errors.Wrap(err, "applying rewriter") + return errors.Wrap(err, "pilosa.ImportRoaringBSI: ") } // ImportRoaringSingleValued treats "clear" as a single row and clears @@ -2158,7 +2156,7 @@ func (f *fragment) ImportRoaringSingleValued(ctx context.Context, tx Tx, clear, } err = tx.ApplyRewriter(f.index(), f.field(), f.view(), f.shard, 0, rewriter) - return errors.Wrap(err, "applying rewriter") + return errors.Wrap(err, "pilosa.ImportRoaringSingleValued: ") } func (f *fragment) doImportRoaring(ctx context.Context, tx Tx, data []byte, clear bool) (map[uint64]int, bool, error) { @@ -2724,7 +2722,8 @@ func (f *fragment) intRowIterator(tx Tx, wrap bool, filters ...roaring.BitmapFil func (f *fragment) foreachRow(tx Tx, filters []roaring.BitmapFilter, fn func(rid uint64) error) error { filter := roaring.NewBitmapRowFilter(fn, filters...) - return tx.ApplyFilter(f.index(), f.field(), f.view(), f.shard, 0, filter) + err := tx.ApplyFilter(f.index(), f.field(), f.view(), f.shard, 0, filter) + return errors.Wrap(err, "pilosa.foreachRow: ") } func (it *intRowIterator) Seek(rowID uint64) { diff --git a/go.mod b/go.mod index 1dbef6739..a7caa917e 100644 --- a/go.mod +++ b/go.mod @@ -92,6 +92,16 @@ require ( robpike.io/ivy v0.2.9 ) +require ( + github.com/DataDog/datadog-agent/pkg/obfuscate v0.0.0-20211129110424-6491aa3bf583 // indirect + github.com/DataDog/sketches-go v1.0.0 // indirect + github.com/dgraph-io/ristretto v0.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/philhofer/fwd v1.1.1 // indirect + github.com/tinylib/msgp v1.1.2 // indirect +) + require ( github.com/DataDog/datadog-go/v5 v5.1.0 // indirect github.com/DataDog/gostackparse v0.5.0 // indirect @@ -162,7 +172,7 @@ require ( github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - github.com/zeebo/xxh3 v1.0.2 // indirect + github.com/zeebo/xxh3 v1.0.2 go.etcd.io/etcd/client/v2 v2.305.5 // indirect go.etcd.io/etcd/pkg/v3 v3.5.5 // indirect go.etcd.io/etcd/raft/v3 v3.5.5 // indirect diff --git a/go.sum b/go.sum index 2efa9f29e..8c291fb52 100644 --- a/go.sum +++ b/go.sum @@ -228,6 +228,7 @@ github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f/go.mod h1:xH/i4TFM github.com/dgraph-io/ristretto v0.1.0 h1:Jv3CGQHp9OjuMBSne1485aDpUkTKEcUqF+jm/LuerPI= github.com/dgraph-io/ristretto v0.1.0/go.mod h1:fux0lOrBhrVCJd3lcTHsIJhq1T2rokOu6v9Vcb3Q9ug= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= @@ -463,6 +464,7 @@ github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= diff --git a/holder.go b/holder.go index f6004f348..89533d2fc 100644 --- a/holder.go +++ b/holder.go @@ -147,8 +147,6 @@ type Holder struct { // snapshotter/writelogger; then MDS should only start directing queries to // that computer once it has completed applying the snapshot. directiveApplied bool - - versionStore dax.VersionStore } // HolderOpts holds information about the holder which other things might want @@ -344,8 +342,6 @@ func NewHolder(path string, cfg *HolderConfig) *Holder { path: path, indexes: make(map[string]*Index), - - versionStore: dax.NewNopVersionStore(), } txf, err := NewTxFactory(cfg.StorageConfig.Backend, h.IndexesPath(), h) @@ -1018,14 +1014,6 @@ func (h *Holder) createIndex(cim *CreateIndexMessage, broadcast bool) (*Index, e // Update options. h.addIndex(index) - tkey := dax.TableKey(cim.Index) - qtid := tkey.QualifiedTableID() - - // Initialize the table in holder.versionStore. - if err := h.versionStore.AddTable(context.Background(), qtid); err != nil { - h.Logger.Printf("could not add table to version store: %s", cim.Index) - } - if broadcast { // Send the create index message to all nodes. if err := h.broadcaster.SendSync(cim); err != nil { @@ -1045,7 +1033,7 @@ func (h *Holder) createIndex(cim *CreateIndexMessage, broadcast bool) (*Index, e // createIndexWithPartitions is similar to createIndex, but it takes a list of // partitions for which this node is responsible. This ensures that the node // doesn't instantiate more partition TranslateStores than is necessary. -func (h *Holder) createIndexWithPartitions(cim *CreateIndexMessage, translatePartitions dax.VersionedPartitions) (*Index, error) { +func (h *Holder) createIndexWithPartitions(cim *CreateIndexMessage, translatePartitions dax.PartitionNums) (*Index, error) { if cim.Index == "" { return nil, errors.New("index name required") } @@ -1068,24 +1056,6 @@ func (h *Holder) createIndexWithPartitions(cim *CreateIndexMessage, translatePar // Update options. h.addIndex(index) - tkey := dax.TableKey(cim.Index) - qtid := tkey.QualifiedTableID() - - // Initialize the table in holder.versionStore. - if err := h.versionStore.AddTable(context.Background(), qtid); err != nil { - h.Logger.Printf("could not add table to version store: %s", cim.Index) - } - - // Initialize a list of partitions at version 0. - newPartitions := make(dax.VersionedPartitions, len(translatePartitions)) - for i := range translatePartitions { - newPartitions[i] = dax.NewVersionedPartition(translatePartitions[i].Num, 0) - } - - if err := h.versionStore.AddPartitions(context.Background(), qtid, newPartitions...); err != nil { - return nil, errors.Wrap(err, "adding partitions to version store") - } - // Since this is a new index, we need to kick off // its translation sync. if err := h.translationSyncer.Reset(); err != nil { @@ -1242,14 +1212,6 @@ func (h *Holder) deleteIndex(name string) error { // Remove reference. h.deleteIndexFromMap(name) - tkey := dax.TableKey(name) - qtid := tkey.QualifiedTableID() - - // Remove the index from holder.versionStore. - if _, _, err := h.versionStore.RemoveTable(context.Background(), qtid); err != nil { - h.Logger.Printf("could not find table to remove from version store: %s", name) - } - // I'm not sure if calling Reset() here is necessary // since closing the index stops its translation // sync processes. diff --git a/http_handler.go b/http_handler.go index 95cefe351..bbb72a5f2 100644 --- a/http_handler.go +++ b/http_handler.go @@ -1420,13 +1420,6 @@ func (h *Handler) handlePostSQL(w http.ResponseWriter, r *http.Request) { // put the requestId in the context ctx := fbcontext.WithRequestID(r.Context(), requestID.String()) - sql := string(b) - rootOperator, err := h.api.CompilePlan(ctx, sql) - if err != nil { - h.writeBadRequest(w, r, err) - return - } - // Write response back to client. w.Header().Set("Content-Type", "application/json") @@ -1446,9 +1439,9 @@ func (h *Handler) handlePostSQL(w http.ResponseWriter, r *http.Request) { var value []byte value, err = json.Marshal(execTime) if err != nil { - w.Write([]byte(`,"exec_time": 0`)) + w.Write([]byte(`,"execution-time": 0`)) } else { - w.Write([]byte(`,"exec_time":`)) + w.Write([]byte(`,"execution-time":`)) w.Write(value) } w.Write([]byte("}")) @@ -1496,11 +1489,18 @@ func (h *Handler) handlePostSQL(w http.ResponseWriter, r *http.Request) { if err != nil { planBytes = []byte(`"PROBLEM ENCODING QUERY PLAN"`) } - w.Write([]byte(`,"queryPlan":`)) + w.Write([]byte(`,"query-plan":`)) w.Write(planBytes) } } + sql := string(b) + rootOperator, err := h.api.CompilePlan(ctx, sql) + if err != nil { + writeError(err, false) + return + } + // Get a query iterator. iter, err := rootOperator.Iterator(ctx, nil) if err != nil { @@ -2447,7 +2447,7 @@ func (h *Handler) handleGetIndexShardSnapshot(w http.ResponseWriter, r *http.Req return } - rc, err := h.api.IndexShardSnapshot(r.Context(), indexName, shard) + rc, err := h.api.IndexShardSnapshot(r.Context(), indexName, shard, false) if err != nil { switch errors.Cause(err) { case ErrIndexNotFound: @@ -2790,8 +2790,15 @@ func (h *Handler) handleGetTranslateData(w http.ResponseWriter, r *http.Request) http.Error(w, err.Error(), http.StatusNotFound) return } + tx, err := p.Begin(false) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer tx.Rollback() + // Stream translate data to response body. - if _, err := p.WriteTo(w); err != nil { + if _, err := tx.WriteTo(w); err != nil { h.logger.Errorf("error streaming translation data: %s", err) } return @@ -2816,8 +2823,14 @@ func (h *Handler) handleGetTranslateData(w http.ResponseWriter, r *http.Request) http.Error(w, err.Error(), http.StatusNotFound) return } + tx, err := p.Begin(false) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer tx.Rollback() // Stream translate partition to response body. - if _, err := p.WriteTo(w); err != nil { + if _, err := tx.WriteTo(w); err != nil { h.logger.Errorf("error streaming translation data: %s", err) } } @@ -3860,7 +3873,7 @@ func (h *Handler) handlePostDataframeRestore(w http.ResponseWriter, r *http.Requ http.Error(w, fmt.Sprintf("Index %s Not Found", indexName), http.StatusNotFound) return } - filename := idx.GetDataFramePath(shard) + ".parquet" + filename := idx.GetDataFramePath(shard) + h.api.server.executor.TableExtension() dest, err := os.Create(filename) if err != nil { http.Error(w, fmt.Sprintf("failed to create restore dataframe shard %v %v err:%v", indexName, shard, err), http.StatusBadRequest) @@ -4184,7 +4197,7 @@ func (h *Handler) handleGetDataframe(w http.ResponseWriter, r *http.Request) { http.Error(w, fmt.Sprintf("Index %s Not Found", indexName), http.StatusNotFound) return } - filename := idx.GetDataFramePath(shard) + ".parquet" + filename := idx.GetDataFramePath(shard) + h.api.server.executor.TableExtension() http.ServeFile(w, r, filename) } diff --git a/http_handler_test.go b/http_handler_test.go index dadc7fbeb..db9c1d3a5 100644 --- a/http_handler_test.go +++ b/http_handler_test.go @@ -545,6 +545,64 @@ func TestGetViewAndDelete(t *testing.T) { } } +// TestHandlerSQL tests that the json coming back from a POST /sql request has +// the expected json tags. +func TestHandlerSQL(t *testing.T) { + cfg := server.NewConfig() + cfg.SQL.EndpointEnabled = true + c := test.MustRunCluster(t, 1, []server.CommandOption{ + server.OptCommandConfig(cfg), + }) + defer c.Close() + + m := c.GetPrimary() + + tests := []struct { + name string + url string + sql string + expKeys []string + }{ + { + name: "sql", + url: "/sql", + sql: "show tables", + expKeys: []string{"schema", "data", "execution-time"}, + }, + { + name: "sql-with-plan", + url: "/sql?plan=1", + sql: "show tables", + expKeys: []string{"schema", "data", "query-plan", "execution-time"}, + }, + { + name: "invalid-sql", + url: "/sql", + sql: "invalid sql", + expKeys: []string{"error", "execution-time"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sqlURL := fmt.Sprintf("%s%s", m.URL(), tt.url) + resp := test.Do(t, "POST", sqlURL, tt.sql) + if resp.StatusCode != http.StatusOK { + t.Errorf("post sql, status: %d, body=%s", resp.StatusCode, resp.Body) + } + + out := make(map[string]interface{}) + assert.NoError(t, json.Unmarshal([]byte(resp.Body), &out)) + + keys := make([]string, 0, len(out)) + for k := range out { + keys = append(keys, k) + } + + assert.ElementsMatch(t, tt.expKeys, keys) + }) + } +} + func TestTranslationHandlers(t *testing.T) { // reusable data for the tests nameBytes, err := json.Marshal([]string{"a", "b", "c"}) diff --git a/idk/ingest.go b/idk/ingest.go index b6ef0328f..7cf88f783 100644 --- a/idk/ingest.go +++ b/idk/ingest.go @@ -98,7 +98,7 @@ type Main struct { AllowTimestampOutOfRange bool `help:"Allow ingest to continue when it encounters out of range timestamps in TimestampFields. (default false)"` SkipBadRows int `help:"If you fail to process the first n rows without processing one successfully, fail."` - UseShardTransactionalEndpoint bool `flag:"use-shard-transactional-endpoint" help:"Use alternate import endpoint. Currently unstable/testing"` + UseShardTransactionalEndpoint bool `flag:"use-shard-transactional-endpoint" help:"Use alternate import endpoint that ingests data for all fields in a shard in a single atomic request. No negative performance impact and better consistency. Recommended."` MDSAddress string `short:"" help:"MDS address."` OrganizationID dax.OrganizationID `short:"" help:"auto-assigned organization ID"` @@ -1027,7 +1027,7 @@ func (m *Main) setupClient() (*tls.Config, error) { m.SchemaManager = mds.NewSchemaManager(dax.Address(m.MDSAddress), qual, m.log) m.NewImporterFn = func() pilosacore.Importer { - return mds.NewImporter(mdsClient, qtbl.Qualifier(), &qtbl.Table) + return mds.NewImporter(mdsClient, mdsClient, qtbl.Qualifier(), &qtbl.Table) } } else { m.SchemaManager = m.client diff --git a/idk/ingest_test.go b/idk/ingest_test.go index df371680b..7922b8c7a 100644 --- a/idk/ingest_test.go +++ b/idk/ingest_test.go @@ -20,14 +20,10 @@ import ( "github.com/featurebasedb/featurebase/v3/logger" "github.com/golang-jwt/jwt" pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/authn" batch "github.com/featurebasedb/featurebase/v3/batch" - pilosaclient "github.com/featurebasedb/featurebase/v3/client" "github.com/featurebasedb/featurebase/v3/dax" mdsclient "github.com/featurebasedb/featurebase/v3/dax/mds/client" - "github.com/featurebasedb/featurebase/v3/idk/idktest" "github.com/featurebasedb/featurebase/v3/idk/mds" - "github.com/featurebasedb/featurebase/v3/logger" "github.com/featurebasedb/featurebase/v3/pql" "github.com/pkg/errors" "github.com/stretchr/testify/assert" @@ -62,7 +58,7 @@ func configureTestFlagsMDS(main *Main, address dax.Address, qtbl *dax.QualifiedT mdsClient := mdsclient.New(dax.Address(address), logger.StderrLogger) main.NewImporterFn = func() pilosa.Importer { - return mds.NewImporter(mdsClient, qtbl.TableQualifier, &qtbl.Table) + return mds.NewImporter(mdsClient, mdsClient, qtbl.TableQualifier, &qtbl.Table) } } diff --git a/idk/mds/importer.go b/idk/mds/importer.go index 0c1cf6cf9..6548eca8e 100644 --- a/idk/mds/importer.go +++ b/idk/mds/importer.go @@ -18,18 +18,20 @@ var _ featurebase.Importer = &importer{} // importer type importer struct { - mds MDS + noder dax.Noder + schemar dax.Schemar mu sync.Mutex qual dax.TableQualifier tbl *dax.Table } -func NewImporter(mds MDS, qual dax.TableQualifier, tbl *dax.Table) *importer { +func NewImporter(noder dax.Noder, schemar dax.Schemar, qual dax.TableQualifier, tbl *dax.Table) *importer { return &importer{ - mds: mds, - qual: qual, - tbl: tbl, + noder: noder, + schemar: schemar, + qual: qual, + tbl: tbl, } } @@ -75,7 +77,7 @@ func (m *importer) CreateTableKeys(ctx context.Context, tid dax.TableID, keys .. // all the partitions at once, then getting the distinct list of addresses // and looping over that instead. for partition, ks := range partitions { - address, err := m.mds.IngestPartition(context.Background(), qtbl.QualifiedID(), partition) + address, err := m.noder.IngestPartition(context.Background(), qtbl.QualifiedID(), partition) if err != nil { return nil, errors.Wrapf(err, "calling ingest-partition on table: %s, partition: %d", qtbl, partition) } @@ -112,7 +114,7 @@ func (m *importer) CreateFieldKeys(ctx context.Context, tid dax.TableID, fname d // different partitionN for field translation. partition := dax.PartitionNum(0) - address, err := m.mds.IngestPartition(context.Background(), qtbl.QualifiedID(), partition) + address, err := m.noder.IngestPartition(context.Background(), qtbl.QualifiedID(), partition) if err != nil { return nil, errors.Wrapf(err, "calling ingest-partition on table: %s, partition: %d", qtbl, partition) } @@ -137,7 +139,7 @@ func (m *importer) ImportRoaringBitmap(ctx context.Context, tid dax.TableID, fld return errors.Wrapf(err, "getting qtbl") } - address, err := m.mds.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) + address, err := m.noder.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) if err != nil { return errors.Wrap(err, "calling ingest-shard") } @@ -162,7 +164,7 @@ func (m *importer) ImportRoaringShard(ctx context.Context, tid dax.TableID, shar return errors.Wrapf(err, "getting qtbl") } - address, err := m.mds.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) + address, err := m.noder.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) if err != nil { return errors.Wrap(err, "calling ingest-shard") } @@ -182,7 +184,7 @@ func (m *importer) EncodeImportValues(ctx context.Context, tid dax.TableID, fld return "", nil, errors.Wrapf(err, "getting qtbl") } - address, err := m.mds.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) + address, err := m.noder.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) if err != nil { return "", nil, errors.Wrap(err, "calling ingest-shard") } @@ -207,7 +209,7 @@ func (m *importer) EncodeImport(ctx context.Context, tid dax.TableID, fld *dax.F return "", nil, errors.Wrapf(err, "getting qtbl") } - address, err := m.mds.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) + address, err := m.noder.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) if err != nil { return "", nil, errors.Wrap(err, "calling ingest-shard") } @@ -232,7 +234,7 @@ func (m *importer) DoImport(ctx context.Context, tid dax.TableID, fld *dax.Field return errors.Wrapf(err, "getting qtbl") } - address, err := m.mds.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) + address, err := m.noder.IngestShard(context.Background(), qtbl.QualifiedID(), dax.ShardNum(shard)) if err != nil { return errors.Wrap(err, "calling ingest-shard") } @@ -266,7 +268,7 @@ func (m *importer) getQtbl(ctx context.Context, tid dax.TableID) (*dax.Qualified qtid := dax.NewQualifiedTableID(m.qual, tid) - qtbl, err := m.mds.Table(ctx, qtid) + qtbl, err := m.schemar.TableByID(ctx, qtid) if err != nil { return nil, errors.Wrap(err, "getting table") } diff --git a/idk/mds/mds.go b/idk/mds/mds.go deleted file mode 100644 index eb51fb4c4..000000000 --- a/idk/mds/mds.go +++ /dev/null @@ -1,21 +0,0 @@ -// Package mds contains the implementation of the SchemaManager interface. -package mds - -import ( - "context" - - "github.com/featurebasedb/featurebase/v3/dax" -) - -// MDS represents the MDS methods which importer uses. -type MDS interface { - IngestPartition(ctx context.Context, qtid dax.QualifiedTableID, partition dax.PartitionNum) (dax.Address, error) - IngestShard(ctx context.Context, qtid dax.QualifiedTableID, shard dax.ShardNum) (dax.Address, error) - - // Table was added so the `importer` instance (in this package) of the - // batch.Importer interface could lookup up a table based on the name - // provided in a method, as opposed to setting the table up front. This is - // because in queryer, we don't know the table yet, because we haven't - // parsed the sql yet. - Table(ctx context.Context, qtid dax.QualifiedTableID) (*dax.QualifiedTable, error) -} diff --git a/importer.go b/importer.go index 645e3deea..059b12d42 100644 --- a/importer.go +++ b/importer.go @@ -6,6 +6,8 @@ import ( "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/roaring" + "github.com/pkg/errors" + "golang.org/x/sync/errgroup" ) type Importer interface { @@ -32,12 +34,14 @@ var _ Importer = &onPremImporter{} // implemtation of the Importer interface does not use, and therefore they // intentionally no-op. type onPremImporter struct { - api *API + api *API + client *InternalClient } func NewOnPremImporter(api *API) *onPremImporter { return &onPremImporter{ - api: api, + api: api, + client: api.holder.executor.client, } } @@ -63,13 +67,27 @@ func (i *onPremImporter) ImportRoaringBitmap(ctx context.Context, tid dax.TableI } func (i *onPremImporter) ImportRoaringShard(ctx context.Context, tid dax.TableID, shard uint64, request *ImportRoaringShardRequest) error { - return i.api.ImportRoaringShard(ctx, string(tid), shard, request) + nodes, err := i.api.ShardNodes(ctx, string(tid), shard) + if err != nil { + return err + } + eg := errgroup.Group{} + for _, node := range nodes { + node := node + if node.ID == i.api.NodeID() { // local + eg.Go(func() error { + return i.api.ImportRoaringShard(ctx, string(tid), shard, request) + }) + } else { + eg.Go(func() error { // forward on + return i.client.ImportRoaringShard(ctx, &node.URI, string(tid), shard, true, request) + }) + } + } + err = eg.Wait() + return errors.Wrap(err, "importing") } -// EncodeImportValues is kind of weird. We're trying to mimic what the client -// does here (because the Importer interface was originally based off of the -// client methods). So we end up generating a protobuf-encode byte slice. And we -// don't really use path. func (i *onPremImporter) EncodeImportValues(ctx context.Context, tid dax.TableID, fld *dax.Field, shard uint64, vals []int64, ids []uint64, clear bool) (path string, data []byte, err error) { // This intentionally no-ops. See comment on struct. return "", nil, nil diff --git a/index.go b/index.go index b49bc1036..33078bca0 100644 --- a/index.go +++ b/index.go @@ -50,7 +50,7 @@ type Index struct { holder *Holder // Per-partition translation stores - translatePartitions dax.VersionedPartitions + translatePartitions dax.PartitionNums translateStores map[int]TranslateStore translationSyncer TranslationSyncer @@ -977,7 +977,7 @@ func (i *Index) DeleteField(name string) error { // case, we need to update this cached value. Really, this is kind of hacky and // we need to revisit the ApplyDirective logic so that it's more intuitive with // respect to index.translatePartitions. -func (i *Index) SetTranslatePartitions(tp dax.VersionedPartitions) { +func (i *Index) SetTranslatePartitions(tp dax.PartitionNums) { i.mu.Lock() defer i.mu.Unlock() diff --git a/internal/clustertests/pause_node_test.go b/internal/clustertests/pause_node_test.go index 1dce3cd0a..bb58dd3f7 100644 --- a/internal/clustertests/pause_node_test.go +++ b/internal/clustertests/pause_node_test.go @@ -16,7 +16,6 @@ import ( pilosa "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/authn" - boltdb "github.com/featurebasedb/featurebase/v3/boltdb" "github.com/featurebasedb/featurebase/v3/disco" "github.com/featurebasedb/featurebase/v3/encoding/proto" "github.com/featurebasedb/featurebase/v3/net" @@ -161,7 +160,7 @@ func openTranslateStores(dirPath, index string) (map[int]pilosa.TranslateStore, return nil, err } // open bolt db - ts, err := boltdb.OpenTranslateStore(filePath, index, "", partition, disco.DefaultPartitionN, false) + ts, err := pilosa.OpenTranslateStore(filePath, index, "", partition, disco.DefaultPartitionN, false) ts.SetReadOnly(true) if err != nil { return nil, err diff --git a/internal_client.go b/internal_client.go index ab80682c5..5ae930660 100644 --- a/internal_client.go +++ b/internal_client.go @@ -888,7 +888,6 @@ func (c *InternalClient) Import(ctx context.Context, qcx *Qcx, req *ImportReques func (c *InternalClient) ImportValue(ctx context.Context, qcx *Qcx, req *ImportValueRequest, options *ImportOptions) error { span, ctx := tracing.StartSpanFromContext(ctx, "InternalClient.Import") defer span.Finish() - if req.ColumnKeys != nil { req.Shard = ^uint64(0) } @@ -926,35 +925,7 @@ func (c *InternalClient) ImportRoaring(ctx context.Context, uri *pnet.URI, index return errors.Wrap(err, "marshal import request") } - // Generate HTTP request. - httpReq, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) - if err != nil { - return errors.Wrap(err, "creating request") - } - httpReq.Header.Set("Content-Type", "application/x-protobuf") - httpReq.Header.Set("Accept", "application/x-protobuf") - httpReq.Header.Set("X-Pilosa-Row", "roaring") - httpReq.Header.Set("User-Agent", "pilosa/"+Version) - AddAuthToken(ctx, &httpReq.Header) - - // Execute request against the host. - resp, err := c.executeRequest(httpReq.WithContext(ctx)) - if err != nil { - return err - } - defer resp.Body.Close() - - dec := json.NewDecoder(resp.Body) - rbody := &ImportResponse{} - err = dec.Decode(rbody) - // Decode can return EOF when no error occurred. helpful! - if err != nil && err != io.EOF { - return errors.Wrap(err, "decoding response body") - } - if rbody.Err != "" { - return errors.Wrap(errors.New(rbody.Err), "importing roaring") - } - return nil + return c.executeProtobufRequest(ctx, url, data) } // ExportCSV bulk exports data for a single shard from a host to CSV format. @@ -2459,3 +2430,58 @@ func (c *InternalClient) getDiskUsage(ctx context.Context, index string) (DiskUs return sum, nil } + +func (c *InternalClient) executeProtobufRequest(ctx context.Context, url string, data []byte) error { + httpReq, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) + if err != nil { + return errors.Wrap(err, "creating request") + } + httpReq.Header.Set("Content-Type", "application/x-protobuf") + httpReq.Header.Set("Accept", "application/x-protobuf") + httpReq.Header.Set("X-Pilosa-Row", "roaring") + httpReq.Header.Set("User-Agent", "pilosa/"+Version) + AddAuthToken(ctx, &httpReq.Header) + + // Execute request against the host. + resp, err := c.executeRequest(httpReq.WithContext(ctx)) + if err != nil { + return err + } + defer resp.Body.Close() + + dec := json.NewDecoder(resp.Body) + rbody := &ImportResponse{} + err = dec.Decode(rbody) + // Decode can return EOF when no error occurred. helpful! + if err != nil && err != io.EOF { + return errors.Wrap(err, "decoding response body") + } + if rbody.Err != "" { + return errors.Wrap(errors.New(rbody.Err), "importing roaring") + } + return nil +} + +// ImportRoaringShard(ctx, node, string(tid), shard, request +func (c *InternalClient) ImportRoaringShard(ctx context.Context, uri *pnet.URI, index string, shard uint64, remote bool, req *ImportRoaringShardRequest) error { + span, ctx := tracing.StartSpanFromContext(ctx, "InternalClient.ImportRoaringShard") + defer span.Finish() + + if index == "" { + return ErrIndexRequired + } + if uri == nil { + uri = c.defaultURI + } + + vals := url.Values{} + vals.Set("remote", strconv.FormatBool(remote)) + url := fmt.Sprintf("%s%s/index/%s/shard/%d/import-roaring?%s", uri, c.prefix(), index, shard, vals.Encode()) + + // Marshal data to protobuf. + data, err := c.serializer.Marshal(req) + if err != nil { + return errors.Wrap(err, "marshal import roaring shard request") + } + return c.executeProtobufRequest(ctx, url, data) +} diff --git a/lattice/src/App/Query/QueryContainer.tsx b/lattice/src/App/Query/QueryContainer.tsx index c07df10df..50a8b6a86 100644 --- a/lattice/src/App/Query/QueryContainer.tsx +++ b/lattice/src/App/Query/QueryContainer.tsx @@ -50,11 +50,45 @@ export const QueryContainer: FC<{}> = () => { }); const handleHTTPQueryMessages = (response) => { - setIsSQL3(true) - streamingResults.headers = response.data.schema.fields; - streamingResults.rows = response.data.data; - setErrorResult(undefined); - setResults([streamingResults]); + // SQL is true + setIsSQL3(true); + + // check status of result + if (response.status >= 400 && response.status < 500) { + // response had status 400-499 + streamingResults.error = response.data.error; + setErrorResult(streamingResults); + } else if (response.data.error != undefined) { + // response contained an error, but returned outside 400 range + streamingResults.error = response.data.error; + setErrorResult(streamingResults); + } else { + // sets the history + let recentQueries = JSON.parse( + localStorage.getItem('recent-queries') || '[]' + ); + const lastQuery = localStorage.getItem('last-query'); + recentQueries.unshift(lastQuery); + recentQueries = uniqBy(recentQueries); + + if (recentQueries.length > 10) { + localStorage.setItem( + 'recent-queries', + JSON.stringify(recentQueries.slice(0, 9)) + ); + } else { + localStorage.setItem('recent-queries', JSON.stringify(recentQueries)); + } + + // stream results for display + streamingResults.roundtrip = moment.duration(moment().diff(startTime)).as('milliseconds'); + streamingResults.headers = response.data.schema.fields; + streamingResults.duration = response.data["execution-time"]; + streamingResults.rows = response.data.data; + + setErrorResult(undefined); + setResults([streamingResults, ...results]); + } setLoading(false); } @@ -75,6 +109,7 @@ export const QueryContainer: FC<{}> = () => { streamingResults.error = statusMessage; setErrorResult(streamingResults); } else { + // sets the history let recentQueries = JSON.parse( localStorage.getItem('recent-queries') || '[]' ); @@ -138,9 +173,10 @@ export const QueryContainer: FC<{}> = () => { if (e.response.status === 404) { setIsSQL3(false); querySQL(query, handleQueryMessages, handleQueryEnd); + } else if (e.response.status === 400) { + setIsSQL3(false) + handleHTTPQueryMessages(e.response); } - - }); } } @@ -164,4 +200,4 @@ export const QueryContainer: FC<{}> = () => { isSQL3={isSQL3} /> ); -}; +}; \ No newline at end of file diff --git a/pilosa_test.go b/pilosa_test.go index 0d4264b8f..1b21e2e29 100644 --- a/pilosa_test.go +++ b/pilosa_test.go @@ -6,7 +6,7 @@ import ( "strings" "testing" - pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3" _ "github.com/featurebasedb/featurebase/v3/test" ) diff --git a/rbf.go b/rbf.go index b82f94363..78081f1bc 100644 --- a/rbf.go +++ b/rbf.go @@ -388,11 +388,13 @@ func (tx *RBFTx) ImportRoaringBits(index, field, view string, shard uint64, rit } func (tx *RBFTx) ApplyFilter(index, field, view string, shard uint64, ckey uint64, filter roaring.BitmapFilter) (err error) { - return tx.tx.ApplyFilter(rbfName(index, field, view, shard), ckey, filter) + err = tx.tx.ApplyFilter(rbfName(index, field, view, shard), ckey, filter) + return errors.Wrap(err, fmt.Sprintf("applying filter for index %s, field %s, view %s, shard %d", index, field, view, shard)) } func (tx *RBFTx) ApplyRewriter(index, field, view string, shard uint64, ckey uint64, filter roaring.BitmapRewriter) (err error) { - return tx.tx.ApplyRewriter(rbfName(index, field, view, shard), ckey, filter) + err = tx.tx.ApplyRewriter(rbfName(index, field, view, shard), ckey, filter) + return errors.Wrap(err, fmt.Sprintf("applying rewriter for index %s, field %s, view %s, shard %d", index, field, view, shard)) } func (tx *RBFTx) GetSortedFieldViewList(idx *Index, shard uint64) (fvs []txkey.FieldView, err error) { diff --git a/rbf/cursorx.go b/rbf/cursorx.go index 9a4b98003..cbd56d928 100644 --- a/rbf/cursorx.go +++ b/rbf/cursorx.go @@ -196,9 +196,9 @@ func intoContainer(l leafCell, tx *Tx, replacing *roaring.Container, target []by // intoWritableContainer always uses the provided target for a copy of // the container's contents, so the container can be modified safely. -func intoWritableContainer(l leafCell, tx *Tx, replacing *roaring.Container, target []byte) (c *roaring.Container) { +func intoWritableContainer(l leafCell, tx *Tx, replacing *roaring.Container, target []byte) (c *roaring.Container, err error) { if len(l.Data) == 0 { - return nil + return nil, nil } orig := l.Data target = target[:len(orig)] @@ -209,7 +209,10 @@ func intoWritableContainer(l leafCell, tx *Tx, replacing *roaring.Container, tar case ContainerTypeBitmapPtr: pgno := toPgno(target) target = target[:PageSize] // reslice back to full size - _, bm, _ := tx.leafCellBitmapInto(pgno, target) + _, bm, err := tx.leafCellBitmapInto(pgno, target) + if err != nil { + return nil, fmt.Errorf("intoContainer: %s", err) + } c = roaring.RemakeContainerBitmapN(replacing, bm, int32(l.BitN)) case ContainerTypeBitmap: c = roaring.RemakeContainerBitmapN(replacing, toArray64(target), int32(l.BitN)) @@ -221,7 +224,7 @@ func intoWritableContainer(l leafCell, tx *Tx, replacing *roaring.Container, tar // expensive. c.CheckN() c.SetMapped(false) - return c + return c, nil } func toContainer(l leafCell, tx *Tx) (c *roaring.Container) { diff --git a/rbf/tx.go b/rbf/tx.go index 04d8f9488..0c0df1272 100644 --- a/rbf/tx.go +++ b/rbf/tx.go @@ -1668,7 +1668,10 @@ func (s *containerFilter) ApplyFilter() (err error) { } for err := s.cursor.Next(); err == nil; err = s.cursor.Next() { elem := &s.cursor.stack.elems[s.cursor.stack.top] - leafPage, _, _ := s.cursor.tx.readPage(elem.pgno) + leafPage, _, err := s.cursor.tx.readPage(elem.pgno) + if err != nil { + return fmt.Errorf("reading from pgno %d applying filter: %s", elem.pgno, err) + } readLeafCellInto(&cell, leafPage, elem.index) key := roaring.FilterKey(cell.Key) if key < minKey { @@ -1730,7 +1733,10 @@ func (s *containerFilter) ApplyRewriter() (err error) { } for err := s.cursor.Next(); err == nil; err = s.cursor.Next() { elem := &s.cursor.stack.elems[s.cursor.stack.top] - leafPage, _, _ := s.cursor.tx.readPage(elem.pgno) + leafPage, _, err := s.cursor.tx.readPage(elem.pgno) + if err != nil { + return fmt.Errorf("reading from pgno %d applying rewriter: %s", elem.pgno, err) + } readLeafCellInto(&cell, leafPage, elem.index) key = roaring.FilterKey(cell.Key) if key < minKey { @@ -1741,7 +1747,10 @@ func (s *containerFilter) ApplyRewriter() (err error) { return res.Err } if res.YesKey <= key && res.NoKey <= key { - data := intoWritableContainer(cell, s.cursor.tx, &s.header, s.body[:]) + data, err := intoWritableContainer(cell, s.cursor.tx, &s.header, s.body[:]) + if err != nil { + return fmt.Errorf("applying rewriter: %s", err) + } res = s.rewriter.RewriteData(key, data, writeback) if res.Err != nil { return res.Err diff --git a/row_test.go b/row_test.go index dfea01fcf..44e9712af 100644 --- a/row_test.go +++ b/row_test.go @@ -7,7 +7,7 @@ import ( "reflect" "testing" - pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3" ) // Ensure a row can be merged diff --git a/schema.go b/schema.go index 5e7d0d677..9c9a4a426 100644 --- a/schema.go +++ b/schema.go @@ -138,7 +138,10 @@ func IndexInfoToTable(ii *IndexInfo) *dax.Table { PartitionN: dax.DefaultPartitionN, Description: ii.Options.Description, + Owner: ii.Owner, CreatedAt: ii.CreatedAt, + UpdatedAt: ii.UpdatedAt, + UpdatedBy: ii.LastUpdateUser, } // Sort ii.Fields by CreatedAt before adding them to sortedFields. @@ -271,8 +274,11 @@ func TablesToIndexInfos(tbls []*dax.Table) []*IndexInfo { // TableToIndexInfo converts a dax.Table to a featurease.IndexInfo. func TableToIndexInfo(tbl *dax.Table) *IndexInfo { ii := &IndexInfo{ - Name: string(tbl.Name), // TODO(tlt): this should be TableKey i think - CreatedAt: tbl.CreatedAt, + Name: string(tbl.Name), + Owner: tbl.Owner, + CreatedAt: tbl.CreatedAt, + UpdatedAt: tbl.UpdatedAt, + LastUpdateUser: tbl.UpdatedBy, Options: IndexOptions{ Keys: tbl.StringKeys(), TrackExistence: true, diff --git a/server.go b/server.go index 8fb55ca64..67cbad129 100644 --- a/server.go +++ b/server.go @@ -17,20 +17,19 @@ import ( uuid "github.com/satori/go.uuid" - "github.com/featurebasedb/featurebase/v3/dax/computer" - "github.com/featurebasedb/featurebase/v3/dax/inmem" - "github.com/featurebasedb/featurebase/v3/disco" - "github.com/featurebasedb/featurebase/v3/logger" - pnet "github.com/featurebasedb/featurebase/v3/net" - rbfcfg "github.com/featurebasedb/featurebase/v3/rbf/cfg" - "github.com/featurebasedb/featurebase/v3/roaring" - "github.com/featurebasedb/featurebase/v3/sql3" - "github.com/featurebasedb/featurebase/v3/sql3/parser" - planner_types "github.com/featurebasedb/featurebase/v3/sql3/planner/types" - "github.com/featurebasedb/featurebase/v3/stats" - "github.com/featurebasedb/featurebase/v3/storage" - "github.com/pkg/errors" - "golang.org/x/sync/errgroup" + daxstorage "github.com/featurebasedb/featurebase/v3/dax/storage" + "github.com/featurebasedb/featurebase/v3/disco" + "github.com/featurebasedb/featurebase/v3/logger" + pnet "github.com/featurebasedb/featurebase/v3/net" + rbfcfg "github.com/featurebasedb/featurebase/v3/rbf/cfg" + "github.com/featurebasedb/featurebase/v3/roaring" + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/parser" + planner_types "github.com/featurebasedb/featurebase/v3/sql3/planner/types" + "github.com/featurebasedb/featurebase/v3/stats" + "github.com/featurebasedb/featurebase/v3/storage" + "github.com/pkg/errors" + "golang.org/x/sync/errgroup" _ "github.com/lib/pq" ) @@ -97,11 +96,10 @@ type Server struct { // nolint: maligned executionPlannerFn ExecutionPlannerFn - writeLogReader computer.WriteLogReader - writeLogWriter computer.WriteLogWriter - snapshotReadWriter computer.SnapshotReadWriter + serverlessStorage *daxstorage.ResourceManager - dataframeEnabled bool + dataframeEnabled bool + dataframeUseParquet bool } type ExecutionPlannerFn func(executor Executor, api *API, sql string) sql3.CompilePlanner @@ -429,15 +427,6 @@ func OptServerPartitionAssigner(p string) ServerOption { } } -// OptServerWriteLogReader provides an implemenation of the WriteLogReader -// interface. -func OptServerWriteLogReader(wlr computer.WriteLogReader) ServerOption { - return func(s *Server) error { - s.writeLogReader = wlr - return nil - } -} - func OptServerExecutionPlannerFn(fn ExecutionPlannerFn) ServerOption { return func(s *Server) error { s.executionPlannerFn = fn @@ -445,20 +434,9 @@ func OptServerExecutionPlannerFn(fn ExecutionPlannerFn) ServerOption { } } -// OptServerWriteLogWriter provides an implemenation of the WriteLogWriter -// interface. -func OptServerWriteLogWriter(wlw computer.WriteLogWriter) ServerOption { - return func(s *Server) error { - s.writeLogWriter = wlw - return nil - } -} - -// OptServerSnapshotReadWriter provides an implemenation of the -// SnapshotReadWriter interface. -func OptServerSnapshotReadWriter(snap computer.SnapshotReadWriter) ServerOption { +func OptServerServerlessStorage(mm *daxstorage.ResourceManager) ServerOption { return func(s *Server) error { - s.snapshotReadWriter = snap + s.serverlessStorage = mm return nil } } @@ -479,6 +457,13 @@ func OptServerIsDataframeEnabled(is bool) ServerOption { } } +func OptServerDataframeUseParquet(is bool) ServerOption { + return func(s *Server) error { + s.dataframeUseParquet = is + return nil + } +} + // NewServer returns a new instance of Server. func NewServer(opts ...ServerOption) (*Server, error) { cluster := newCluster() @@ -550,6 +535,7 @@ func NewServer(opts ...ServerOption) (*Server, error) { } s.executor = newExecutor(executorOpts...) s.executor.dataframeEnabled = s.dataframeEnabled + s.executor.datafameUseParquet = s.dataframeUseParquet path, err := expandDirName(s.dataDir) if err != nil { @@ -564,23 +550,13 @@ func NewServer(opts ...ServerOption) (*Server, error) { s.holder.Logger.Infof("cwd: %v", cwd) s.holder.Logger.Infof("cmd line: %v", strings.Join(os.Args, " ")) - // The compute nodes keep a local cache of the VersionStore which applies - // only to the data (shard, partitions, fields) managed by the compute node - // (as opposed to the VersionStore in MDS which keeps information about all - // data). It would be okay for this to be an in-memory implementation as - // long as the compute node isn't expected to survive a restart; in that - // case, it would be necessary to use an implementation which saves state - // somewhere, such as local disk. - versionStore := inmem.NewVersionStore() - s.cluster.Path = path s.cluster.logger = s.logger s.cluster.holder = s.holder s.cluster.disCo = s.disCo s.cluster.noder = s.noder s.cluster.sharder = s.sharder - s.cluster.writeLogWriter = s.writeLogWriter - s.cluster.versionStore = versionStore + s.cluster.serverlessStorage = s.serverlessStorage // Append the NodeID tag to stats. s.holder.Stats = s.holder.Stats.WithTags(fmt.Sprintf("node_id:%s", s.nodeID)) @@ -596,7 +572,6 @@ func NewServer(opts ...ServerOption) (*Server, error) { s.holder.broadcaster = s s.holder.sharder = s.sharder s.holder.serializer = s.serializer - s.holder.versionStore = versionStore // Initial stats must be invoked after the executor obtains reference to the holder. s.executor.InitStats() @@ -828,6 +803,7 @@ func (s *Server) Close() error { var errh, errd error var errhs error var errc error + var errSS error if s.cluster != nil { errc = s.cluster.close() @@ -839,6 +815,9 @@ func (s *Server) Close() error { if s.holder != nil { errh = s.holder.Close() } + if s.serverlessStorage != nil { + errSS = s.serverlessStorage.RemoveAll() + } // prefer to return holder error over cluster // error. This order is somewhat arbitrary. It would be better if we had @@ -856,7 +835,10 @@ func (s *Server) Close() error { if errd != nil { return errors.Wrap(errd, "closing disco") } - return errors.Wrap(errE, "closing executor") + if errE != nil { + return errors.Wrap(errE, "closing executor") + } + return errors.Wrap(errSS, "unlocking all serverless storage") } } diff --git a/server/config.go b/server/config.go index 20e53848f..282e3fe56 100644 --- a/server/config.go +++ b/server/config.go @@ -238,6 +238,7 @@ type Config struct { DataDog struct { Enable bool `toml:"enable"` + EnableTracing bool `toml:"enable-tracing"` Service string `toml:"service"` Env string `toml:"env"` Version string `toml:"version"` @@ -252,7 +253,8 @@ type Config struct { Auth Auth Dataframe struct { - Enable bool `toml:"enable"` + Enable bool `toml:"enable"` + UseParquet bool `toml:"use-parquet"` } `toml:"dataframe"` } diff --git a/server/handler_test.go b/server/handler_test.go index 0364a7c3c..765de0c81 100644 --- a/server/handler_test.go +++ b/server/handler_test.go @@ -19,7 +19,6 @@ import ( "time" pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/boltdb" "github.com/featurebasedb/featurebase/v3/encoding/proto" "github.com/featurebasedb/featurebase/v3/pql" pb "github.com/featurebasedb/featurebase/v3/proto" @@ -1395,7 +1394,7 @@ func TestHandler_Endpoints(t *testing.T) { func TestCluster_TranslateStore(t *testing.T) { cluster := test.MustRunUnsharedCluster(t, 1, []server.CommandOption{ server.OptCommandServerOptions( - pilosa.OptServerOpenTranslateStore(boltdb.OpenTranslateStore), + pilosa.OptServerOpenTranslateStore(pilosa.OpenTranslateStore), ), }) defer cluster.Close() // nolint: errcheck @@ -1407,7 +1406,7 @@ func TestClusterTranslator(t *testing.T) { cluster := test.MustRunUnsharedCluster(t, 3, []server.CommandOption{ server.OptCommandServerOptions( - pilosa.OptServerOpenTranslateStore(boltdb.OpenTranslateStore), + pilosa.OptServerOpenTranslateStore(pilosa.OpenTranslateStore), )}, ) defer cluster.Close() diff --git a/server/server.go b/server/server.go index dcc37a016..3e9c22b26 100644 --- a/server/server.go +++ b/server/server.go @@ -27,30 +27,30 @@ import ( "time" pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/authn" - "github.com/featurebasedb/featurebase/v3/authz" - "github.com/featurebasedb/featurebase/v3/boltdb" - "github.com/featurebasedb/featurebase/v3/dax" - "github.com/featurebasedb/featurebase/v3/dax/computer" - "github.com/featurebasedb/featurebase/v3/disco" - "github.com/featurebasedb/featurebase/v3/encoding/proto" - petcd "github.com/featurebasedb/featurebase/v3/etcd" - "github.com/featurebasedb/featurebase/v3/gcnotify" - "github.com/featurebasedb/featurebase/v3/gopsutil" - "github.com/featurebasedb/featurebase/v3/logger" - pnet "github.com/featurebasedb/featurebase/v3/net" - "github.com/featurebasedb/featurebase/v3/prometheus" - "github.com/featurebasedb/featurebase/v3/sql3" - "github.com/featurebasedb/featurebase/v3/sql3/planner" - "github.com/featurebasedb/featurebase/v3/statik" - "github.com/featurebasedb/featurebase/v3/stats" - "github.com/featurebasedb/featurebase/v3/statsd" - "github.com/featurebasedb/featurebase/v3/systemlayer" - "github.com/featurebasedb/featurebase/v3/syswrap" - "github.com/featurebasedb/featurebase/v3/testhook" - "github.com/pelletier/go-toml" - "github.com/pkg/errors" - "golang.org/x/sync/errgroup" + "github.com/featurebasedb/featurebase/v3/authn" + "github.com/featurebasedb/featurebase/v3/authz" + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/dax/computer" + "github.com/featurebasedb/featurebase/v3/dax/storage" + "github.com/featurebasedb/featurebase/v3/disco" + "github.com/featurebasedb/featurebase/v3/encoding/proto" + petcd "github.com/featurebasedb/featurebase/v3/etcd" + "github.com/featurebasedb/featurebase/v3/gcnotify" + "github.com/featurebasedb/featurebase/v3/gopsutil" + "github.com/featurebasedb/featurebase/v3/logger" + pnet "github.com/featurebasedb/featurebase/v3/net" + "github.com/featurebasedb/featurebase/v3/prometheus" + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/planner" + "github.com/featurebasedb/featurebase/v3/statik" + "github.com/featurebasedb/featurebase/v3/stats" + "github.com/featurebasedb/featurebase/v3/statsd" + "github.com/featurebasedb/featurebase/v3/systemlayer" + "github.com/featurebasedb/featurebase/v3/syswrap" + "github.com/featurebasedb/featurebase/v3/testhook" + "github.com/pelletier/go-toml" + "github.com/pkg/errors" + "golang.org/x/sync/errgroup" ) type loggerLogger interface { @@ -75,9 +75,10 @@ type Command struct { logger loggerLogger queryLogger loggerLogger - Registrar computer.Registrar - writeLogService computer.WriteLogService - snapshotService computer.SnapshotService + Registrar computer.Registrar + serverlessStorage *storage.ResourceManager + writeLogService computer.WriteLogService + snapshotService computer.SnapshotService Handler pilosa.HandlerI httpHandler http.Handler @@ -127,6 +128,7 @@ func OptCommandConfig(config *Config) CommandOption { c.Config.TLS = config.TLS c.Config.MDSAddress = config.MDSAddress c.Config.WriteLogger = config.WriteLogger + c.Config.SQL.EndpointEnabled = config.SQL.EndpointEnabled return nil } c.Config = config @@ -194,8 +196,10 @@ const ( // we want to set resource limits *exactly once*, and then be able // to report on whether or not that succeeded. -var setupResourceLimitsOnce sync.Once -var setupResourceLimitsErr error +var ( + setupResourceLimitsOnce sync.Once + setupResourceLimitsErr error +) // doSetupResourceLimits is the function which actually does the // resource limit setup, possibly yielding an error. it's a Command @@ -550,19 +554,8 @@ func (m *Command) setupServer() error { m.Config.Etcd.Dir = filepath.Join(path, pilosa.DiscoDir) } - // WriteLogger setup. - var wlw computer.WriteLogWriter = computer.NewNopWriteLogWriter() - var wlr computer.WriteLogReader = computer.NewNopWriteLogReader() - if m.writeLogService != nil { - wlrw := computer.NewWriteLogReadWriter(m.writeLogService) - wlr = wlrw - wlw = wlrw - } - - // Snapshotter setup. - var snap computer.SnapshotReadWriter = computer.NewNopSnapshotReadWriter() - if m.snapshotService != nil { - snap = computer.NewSnapshotReadWriter(m.snapshotService) + if m.writeLogService != nil && m.snapshotService != nil { + m.serverlessStorage = storage.NewResourceManager(m.snapshotService, m.writeLogService, m.logger) } executionPlannerFn := func(e pilosa.Executor, api *pilosa.API, sql string) sql3.CompilePlanner { @@ -581,7 +574,7 @@ func (m *Command) setupServer() error { pilosa.OptServerMetricInterval(time.Duration(m.Config.Metric.PollInterval)), pilosa.OptServerDiagnosticsInterval(diagnosticsInterval), pilosa.OptServerExecutorPoolSize(m.Config.WorkerPoolSize), - pilosa.OptServerOpenTranslateStore(boltdb.OpenTranslateStore), + pilosa.OptServerOpenTranslateStore(pilosa.OpenTranslateStore), pilosa.OptServerOpenTranslateReader(pilosa.GetOpenTranslateReaderWithLockerFunc(c, &sync.Mutex{})), pilosa.OptServerOpenIDAllocator(pilosa.OpenIDAllocator), pilosa.OptServerLogger(m.logger), @@ -599,10 +592,9 @@ func (m *Command) setupServer() error { pilosa.OptServerQueryHistoryLength(m.Config.QueryHistoryLength), pilosa.OptServerPartitionAssigner(m.Config.Cluster.PartitionToNodeAssignment), pilosa.OptServerExecutionPlannerFn(executionPlannerFn), - pilosa.OptServerWriteLogReader(wlr), - pilosa.OptServerWriteLogWriter(wlw), - pilosa.OptServerSnapshotReadWriter(snap), + pilosa.OptServerServerlessStorage(m.serverlessStorage), pilosa.OptServerIsDataframeEnabled(m.Config.Dataframe.Enable), + pilosa.OptServerDataframeUseParquet(m.Config.Dataframe.UseParquet), } if m.isComputeNode { @@ -646,9 +638,7 @@ func (m *Command) setupServer() error { m.API, err = pilosa.NewAPI( pilosa.OptAPIServer(m.Server), pilosa.OptAPIImportWorkerPoolSize(m.Config.ImportWorkerPoolSize), - pilosa.OptAPIWriteLogReader(wlr), - pilosa.OptAPIWriteLogWriter(wlw), - pilosa.OptAPISnapshotter(snap), + pilosa.OptAPIServerlessStorage(m.serverlessStorage), pilosa.OptAPIDirectiveWorkerPoolSize(m.Config.DirectiveWorkerPoolSize), pilosa.OptAPIIsComputeNode(m.isComputeNode), ) @@ -747,7 +737,7 @@ func (m *Command) setupLogger() error { var f *logger.FileWriter var err error if m.Config.LogPath != "" { - f, err = logger.NewFileWriter(m.Config.LogPath) + f, err = logger.NewFileWriterMode(m.Config.LogPath, 0640) if err != nil { return errors.Wrap(err, "opening file") } diff --git a/sql3/errors.go b/sql3/errors.go index 8efd7ef47..10c5ceff3 100644 --- a/sql3/errors.go +++ b/sql3/errors.go @@ -9,8 +9,7 @@ import ( ) const ( - ErrInternal errors.Code = "ErrInternal" - + ErrInternal errors.Code = "ErrInternal" ErrCacheKeyNotFound errors.Code = "ErrCacheKeyNotFound" ErrDuplicateColumn errors.Code = "ErrDuplicateColumn" @@ -58,7 +57,8 @@ const ( ErrTypeAssignmentIncompatible errors.Code = "ErrTypeAssignmentIncompatible" - ErrInvalidUngroupedColumnReference errors.Code = "ErrInvalidUngroupedColumnReference" + ErrInvalidUngroupedColumnReference errors.Code = "ErrInvalidUngroupedColumnReference" + ErrInvalidUngroupedColumnReferenceInHaving errors.Code = "ErrInvalidUngroupedColumnReferenceInHaving" ErrInvalidTimeUnit errors.Code = "ErrInvalidTimeUnit" ErrInvalidTimeEpoch errors.Code = "ErrInvalidTimeEpoch" @@ -74,10 +74,10 @@ const ( ErrTableIDColumnConstraints errors.Code = "ErrTableIDColumnConstraints" ErrTableIDColumnAlter errors.Code = "ErrTableIDColumnAlter" ErrTableNotFound errors.Code = "ErrTableNotFound" + ErrTableExists errors.Code = "ErrTableExists" ErrColumnNotFound errors.Code = "ErrColumnNotFound" ErrTableColumnNotFound errors.Code = "ErrTableColumnNotFound" ErrInvalidKeyPartitionsValue errors.Code = "ErrInvalidKeyPartitionsValue" - ErrInvalidShardWidthValue errors.Code = "ErrInvalidShardWidthValue" ErrBadColumnConstraint errors.Code = "ErrBadColumnConstraint" ErrConflictingColumnConstraint errors.Code = "ErrConflictingColumnConstraint" @@ -113,6 +113,10 @@ const ( // optimizer errors ErrAggregateNotAllowedInGroupBy errors.Code = "ErrIdPercentileNotAllowedInGroupBy" + + // function evaluation + ErrValueOutOfRange errors.Code = "ErrValueOutOfRange" + ErrStringLengthMismatch errors.Code = "ErrStringLengthMismatch" ) func NewErrDuplicateColumn(line int, col int, column string) error { @@ -184,6 +188,13 @@ func NewErrInvalidUngroupedColumnReference(line, col int, column string) error { ) } +func NewErrInvalidUngroupedColumnReferenceInHaving(line, col int, column string) error { + return errors.New( + ErrInvalidUngroupedColumnReferenceInHaving, + fmt.Sprintf("[%d:%d] column '%s' invalid in the having clause because it is not contained in an aggregate or the GROUP BY clause", line, col, column), + ) +} + func NewErrInvalidCast(line, col int, from, to string) error { return errors.New( ErrInvalidCast, @@ -496,6 +507,13 @@ func NewErrTableNotFound(line, col int, tableName string) error { ) } +func NewErrTableExists(line, col int, tableName string) error { + return errors.New( + ErrTableExists, + fmt.Sprintf("[%d:%d] table '%s' already exists", line, col, tableName), + ) +} + func NewErrColumnNotFound(line, col int, columnName string) error { return errors.New( ErrColumnNotFound, @@ -517,13 +535,6 @@ func NewErrInvalidKeyPartitionsValue(line, col int, keypartitions int64) error { ) } -func NewErrInvalidShardWidthValue(line, col int, shardwidth int64) error { - return errors.New( - ErrInvalidShardWidthValue, - fmt.Sprintf("[%d:%d] invalid value '%d' for shardwidth (should be a number that is a power of 2 and greater or equal to 2^16)", line, col, shardwidth), - ) -} - func NewErrBadColumnConstraint(line, col int, constraint, columnType string) error { return errors.New( ErrBadColumnConstraint, @@ -687,3 +698,18 @@ func NewErrAggregateNotAllowedInGroupBy(line, col int, aggName string) error { fmt.Sprintf("[%d:%d] aggregate '%s' not allowed in GROUP BY", line, col, aggName), ) } + +// function evaluation +func NewErrValueOutOfRange(line, col int, val interface{}) error { + return errors.New( + ErrValueOutOfRange, + fmt.Sprintf("[%d:%d] value '%v' out of range", line, col, val), + ) +} + +func NewErrStringLengthMismatch(line, col, len int, val interface{}) error { + return errors.New( + ErrStringLengthMismatch, + fmt.Sprintf("[%d:%d] value '%v' should be of the length %d", line, col, val, len), + ) +} diff --git a/sql3/parser/ast.go b/sql3/parser/ast.go index 84bc019fa..0c90d7c78 100644 --- a/sql3/parser/ast.go +++ b/sql3/parser/ast.go @@ -78,7 +78,6 @@ func (*ResultColumn) node() {} func (*RollbackStatement) node() {} func (*SavepointStatement) node() {} func (*SelectStatement) node() {} -func (*ShardWidthOption) node() {} func (*StringLit) node() {} func (*TableValuedFunction) node() {} func (*TimeUnitConstraint) node() {} @@ -196,7 +195,7 @@ func StatementSource(stmt Statement) Source { case *UpdateStatement: return stmt.Table case *DeleteStatement: - return stmt.Table + return stmt.Source default: return nil } @@ -771,7 +770,6 @@ type TableOption interface { } func (*KeyPartitionsOption) option() {} -func (*ShardWidthOption) option() {} func (*CommentOption) option() {} type KeyPartitionsOption struct { @@ -787,19 +785,6 @@ func (o *KeyPartitionsOption) String() string { return buf.String() } -type ShardWidthOption struct { - ShardWidth Pos // position of SHARDWIDTH keyword - Expr Expr // expression -} - -func (o *ShardWidthOption) String() string { - var buf bytes.Buffer - buf.WriteString("SHARDWIDTH (") - buf.WriteString(o.Expr.String()) - buf.WriteString(")") - return buf.String() -} - type CommentOption struct { Comment Pos // position of COMMENT keyword Expr Expr // expression @@ -2818,14 +2803,15 @@ type BulkInsertStatement struct { TransformList []Expr // source to column map TransformRparen Pos // position of column list right paren - From Pos // position of FROM keyword - DataSource Expr // data source - With Pos // position of WITH keyword - BatchSize Expr - RowsLimit Expr - Format Expr - Input Expr - HeaderRow Expr // has header row (that needs to be skipped) + From Pos // position of FROM keyword + DataSource Expr // data source + With Pos // position of WITH keyword + BatchSize Expr + RowsLimit Expr + Format Expr + Input Expr + HeaderRow Expr // has header row (that needs to be skipped) + AllowMissingValues Expr // allows missing values } func (s *BulkInsertStatement) String() string { @@ -3165,23 +3151,14 @@ func (s *UpdateStatement) String() string { } type DeleteStatement struct { - WithClause *WithClause // clause containing CTEs - Delete Pos // position of UPDATE keyword - From Pos // position of FROM keyword - Table *QualifiedTableName // table name + // WithClause *WithClause // clause containing CTEs + Delete Pos // position of UPDATE keyword + From Pos // position of FROM keyword + TableName *QualifiedTableName // the name of the table we are deleting from + Source Source // source for the delete Where Pos // position of WHERE keyword WhereExpr Expr // conditional expression - - Order Pos // position of ORDER keyword - OrderBy Pos // position of BY keyword after ORDER - OrderingTerms []*OrderingTerm // terms of ORDER BY clause - - Limit Pos // position of LIMIT keyword - LimitExpr Expr // limit expression - Offset Pos // position of OFFSET keyword - OffsetComma Pos // position of COMMA (instead of OFFSET) - OffsetExpr Expr // offset expression } // Clone returns a deep copy of s. @@ -3190,47 +3167,20 @@ func (s *DeleteStatement) Clone() *DeleteStatement { return nil } other := *s - other.WithClause = s.WithClause.Clone() - other.Table = s.Table.Clone() + //other.WithClause = s.WithClause.Clone() + other.Source = CloneSource(s.Source) other.WhereExpr = CloneExpr(s.WhereExpr) - other.OrderingTerms = cloneOrderingTerms(s.OrderingTerms) - other.LimitExpr = CloneExpr(s.LimitExpr) - other.OffsetExpr = CloneExpr(s.OffsetExpr) return &other } // String returns the string representation of the clause. func (s *DeleteStatement) String() string { var buf bytes.Buffer - if s.WithClause != nil { - buf.WriteString(s.WithClause.String()) - buf.WriteString(" ") - } - fmt.Fprintf(&buf, "DELETE FROM %s", s.Table.String()) + fmt.Fprintf(&buf, "DELETE FROM %s", s.TableName.String()) if s.WhereExpr != nil { fmt.Fprintf(&buf, " WHERE %s", s.WhereExpr.String()) } - - // Write ORDER BY. - if len(s.OrderingTerms) != 0 { - buf.WriteString(" ORDER BY ") - for i, term := range s.OrderingTerms { - if i != 0 { - buf.WriteString(", ") - } - buf.WriteString(term.String()) - } - } - - // Write LIMIT/OFFSET. - if s.LimitExpr != nil { - fmt.Fprintf(&buf, " LIMIT %s", s.LimitExpr.String()) - if s.OffsetExpr != nil { - fmt.Fprintf(&buf, " OFFSET %s", s.OffsetExpr.String()) - } - } - return buf.String() } diff --git a/sql3/parser/ast_test.go b/sql3/parser/ast_test.go index 78f1357c5..092b21e6c 100644 --- a/sql3/parser/ast_test.go +++ b/sql3/parser/ast_test.go @@ -393,7 +393,6 @@ func TestCreateFunctionStatement_String(t *testing.T) { } func TestCreateViewStatement_String(t *testing.T) { - t.Skip("CREATE VIEW is currently disabled in the parser") AssertStatementStringer(t, &parser.CreateViewStatement{ Name: &parser.Ident{Name: "vw"}, Columns: []*parser.Ident{ @@ -403,7 +402,7 @@ func TestCreateViewStatement_String(t *testing.T) { Select: &parser.SelectStatement{ Columns: []*parser.ResultColumn{{Star: pos(0)}}, }, - }, `CREATE VIEW "vw" ("x", "y") AS SELECT *`) + }, `CREATE VIEW vw (x, y) AS SELECT *`) AssertStatementStringer(t, &parser.CreateViewStatement{ IfNotExists: pos(0), @@ -411,12 +410,12 @@ func TestCreateViewStatement_String(t *testing.T) { Select: &parser.SelectStatement{ Columns: []*parser.ResultColumn{{Star: pos(0)}}, }, - }, `CREATE VIEW IF NOT EXISTS "vw" AS SELECT *`) + }, `CREATE VIEW IF NOT EXISTS vw AS SELECT *`) } func TestDeleteStatement_String(t *testing.T) { AssertStatementStringer(t, &parser.DeleteStatement{ - Table: &parser.QualifiedTableName{Name: &parser.Ident{Name: "tbl"}, Alias: &parser.Ident{Name: "tbl2"}}, + TableName: &parser.QualifiedTableName{Name: &parser.Ident{Name: "tbl"}, Alias: &parser.Ident{Name: "tbl2"}}, }, `DELETE FROM tbl AS tbl2`) // AssertStatementStringer(t, &sql.DeleteStatement{ diff --git a/sql3/parser/parser.go b/sql3/parser/parser.go index 763d7e6ab..3f19647c8 100644 --- a/sql3/parser/parser.go +++ b/sql3/parser/parser.go @@ -122,7 +122,7 @@ func (p *Parser) parseNonExplainStatement() (Statement, error) { case UPDATE: return p.parseUpdateStatement(nil) case DELETE: - return p.parseDeleteStatement(nil) + return p.parseDeleteStatement() // case WITH: // return p.parseWithStatement() case SHOW: @@ -326,14 +326,14 @@ func (p *Parser) parseCreateStatement() (Statement, error) { switch p.peek() { case TABLE: return p.parseCreateTableStatement(pos) - /* case VIEW: - return p.parseCreateViewStatement(pos) - case INDEX, UNIQUE: - return p.parseCreateIndexStatement(pos)*/ + case VIEW: + return p.parseCreateViewStatement(pos) + /*case INDEX, UNIQUE: + return p.parseCreateIndexStatement(pos)*/ case FUNCTION: return p.parseCreateFunctionStatement(pos) default: - return nil, p.errorExpected(pos, tok, "TABLE") + return nil, p.errorExpected(pos, tok, "TABLE, VIEW or FUNCTION") } } @@ -344,14 +344,14 @@ func (p *Parser) parseDropStatement() (Statement, error) { switch p.peek() { case TABLE: return p.parseDropTableStatement(pos) - /* case VIEW: - return p.parseDropViewStatement(pos) - case INDEX: - return p.parseDropIndexStatement(pos)*/ + case VIEW: + return p.parseDropViewStatement(pos) + /* case INDEX: + return p.parseDropIndexStatement(pos)*/ case FUNCTION: return p.parseDropFunctionStatement(pos) default: - return nil, p.errorExpected(pos, tok, "TABLE") + return nil, p.errorExpected(pos, tok, "TABLE, VIEW or FUNCTION") } } @@ -441,15 +441,13 @@ func (p *Parser) parseTableOption() (_ TableOption, err error) { var optionPos Pos - // Parse column constraints. + // Parse table options. switch p.peek() { case KEYPARTITIONS: return p.parseKeyPartitionsOption(optionPos) - case COMMENT: - return p.parseCommentOption(optionPos) default: - assert(p.peek() == SHARDWIDTH) - return p.parseShardWidthOption(optionPos) + assert(p.peek() == COMMENT) + return p.parseCommentOption(optionPos) } } @@ -484,21 +482,6 @@ func (p *Parser) parseKeyPartitionsOption(optionPos Pos) (_ *KeyPartitionsOption return &opt, nil } -func (p *Parser) parseShardWidthOption(optionPos Pos) (_ *ShardWidthOption, err error) { - assert(p.peek() == SHARDWIDTH) - - var opt ShardWidthOption - opt.ShardWidth, _, _ = p.scan() - - if isLiteralToken(p.peek()) { - opt.Expr = p.mustParseLiteral() - } else { - return &opt, p.errorExpected(p.pos, p.tok, "literal") - } - - return &opt, nil -} - func (p *Parser) parseColumnDefinitions() (_ []*ColumnDefinition, err error) { var columns []*ColumnDefinition for { @@ -1045,7 +1028,7 @@ func (p *Parser) parseDropTableStatement(dropPos Pos) (_ *DropTableStatement, er return &stmt, nil } -/*func (p *Parser) parseCreateViewStatement(createPos Pos) (_ *CreateViewStatement, err error) { +func (p *Parser) parseCreateViewStatement(createPos Pos) (_ *CreateViewStatement, err error) { assert(p.peek() == VIEW) var stmt CreateViewStatement @@ -1100,9 +1083,9 @@ func (p *Parser) parseDropTableStatement(dropPos Pos) (_ *DropTableStatement, er return &stmt, err } return &stmt, nil -}*/ +} -/*func (p *Parser) parseDropViewStatement(dropPos Pos) (_ *DropViewStatement, err error) { +func (p *Parser) parseDropViewStatement(dropPos Pos) (_ *DropViewStatement, err error) { assert(p.peek() == VIEW) var stmt DropViewStatement @@ -1123,7 +1106,7 @@ func (p *Parser) parseDropTableStatement(dropPos Pos) (_ *DropTableStatement, er } return &stmt, nil -}*/ +} /*func (p *Parser) parseCreateIndexStatement(createPos Pos) (_ *CreateIndexStatement, err error) { assert(p.peek() == INDEX || p.peek() == UNIQUE) @@ -1540,7 +1523,7 @@ func (p *Parser) parseBulkInsertStatement() (_ *BulkInsertStatement, err error) } stmt.With, _, _ = p.scan() if !isBulkInsertOptionStartToken(p.peek(), p) { - return nil, p.errorExpected(p.pos, p.tok, "BATCHSIZE, ROWSLIMIT, FORMAT, INPUT or HEADER_ROW") + return nil, p.errorExpected(p.pos, p.tok, "BATCHSIZE, ROWSLIMIT, FORMAT, INPUT, ALLOW_MISSING_VALUES or HEADER_ROW") } for { err := p.parseBulkInsertOption(&stmt) @@ -1594,6 +1577,10 @@ func (p *Parser) parseBulkInsertOption(stmt *BulkInsertStatement) error { } else { return p.errorExpected(p.pos, p.tok, "literal") } + case "ALLOW_MISSING_VALUES": + stmt.AllowMissingValues = ident + return nil + case "HEADER_ROW": stmt.HeaderRow = ident return nil @@ -1896,11 +1883,11 @@ func (p *Parser) parseUpdateStatement(withClause *WithClause) (_ *UpdateStatemen return &stmt, nil } -func (p *Parser) parseDeleteStatement(withClause *WithClause) (_ *DeleteStatement, err error) { +func (p *Parser) parseDeleteStatement( /*withClause *WithClause*/ ) (_ *DeleteStatement, err error) { assert(p.peek() == DELETE) var stmt DeleteStatement - stmt.WithClause = withClause + //stmt.WithClause = withClause // Parse "DELETE FROM tbl" stmt.Delete, _, _ = p.scan() @@ -1913,12 +1900,15 @@ func (p *Parser) parseDeleteStatement(withClause *WithClause) (_ *DeleteStatemen if err != nil { return &stmt, err } - stmt.Table, err = p.parseQualifiedTableName(ident) + tableName, err := p.parseQualifiedTableName(ident) if err != nil { return &stmt, err } + stmt.Source = tableName + // keep the table name too + stmt.TableName = tableName.Clone() - // Parse WHERE clause. + // parse WHERE clause. if p.peek() == WHERE { stmt.Where, _, _ = p.scan() if stmt.WhereExpr, err = p.ParseExpr(); err != nil { @@ -1926,31 +1916,6 @@ func (p *Parser) parseDeleteStatement(withClause *WithClause) (_ *DeleteStatemen } } - // Parse ORDER BY clause. This differs from the SELECT parsing in that - // if an ORDER BY is specified then the LIMIT is required. - if p.peek() == ORDER { - if p.peek() == ORDER { - stmt.Order, _, _ = p.scan() - if p.peek() != BY { - return &stmt, p.errorExpected(p.pos, p.tok, "BY") - } - stmt.OrderBy, _, _ = p.scan() - - for { - term, err := p.parseOrderingTerm() - if err != nil { - return &stmt, err - } - stmt.OrderingTerms = append(stmt.OrderingTerms, term) - - if p.peek() != COMMA { - break - } - p.scan() - } - } - } - return &stmt, nil } @@ -3436,7 +3401,7 @@ func (e Error) Error() string { // isTableOptionStartToken returns true if tok is the initial token of a table option. func isTableOptionStartToken(tok Token) bool { switch tok { - case KEYPARTITIONS, SHARDWIDTH, COMMENT: + case KEYPARTITIONS, COMMENT: return true default: return false @@ -3453,7 +3418,7 @@ func isBulkInsertOptionStartToken(tok Token, p *Parser) bool { return false } switch strings.ToUpper(ident.Name) { - case "BATCHSIZE", "ROWSLIMIT", "FORMAT", "INPUT", "HEADER_ROW": + case "BATCHSIZE", "ROWSLIMIT", "FORMAT", "INPUT", "HEADER_ROW", "ALLOW_MISSING_VALUES": return true } } diff --git a/sql3/parser/parser_test.go b/sql3/parser/parser_test.go index 0c4145ff4..74ecaf725 100644 --- a/sql3/parser/parser_test.go +++ b/sql3/parser/parser_test.go @@ -1565,13 +1565,13 @@ func TestParser_ParseStatement(t *testing.T) { AssertParseStatementError(t, `DROP TABLE IF EXISTS`, `1:20: expected table name, found 'EOF'`) }) - /*t.Run("CreateView", func(t *testing.T) { + t.Run("CreateView", func(t *testing.T) { AssertParseStatement(t, `CREATE VIEW vw (col1, col2) AS SELECT x, y`, &parser.CreateViewStatement{ Create: pos(0), View: pos(7), Name: &parser.Ident{NamePos: pos(12), Name: "vw"}, Lparen: pos(15), - Columns: []*sql.Ident{ + Columns: []*parser.Ident{ {NamePos: pos(16), Name: "col1"}, {NamePos: pos(22), Name: "col2"}, }, @@ -1579,7 +1579,7 @@ func TestParser_ParseStatement(t *testing.T) { As: pos(28), Select: &parser.SelectStatement{ Select: pos(31), - Columns: []*sql.ResultColumn{ + Columns: []*parser.ResultColumn{ {Expr: &parser.Ident{NamePos: pos(38), Name: "x"}}, {Expr: &parser.Ident{NamePos: pos(41), Name: "y"}}, }, @@ -1592,7 +1592,7 @@ func TestParser_ParseStatement(t *testing.T) { As: pos(15), Select: &parser.SelectStatement{ Select: pos(18), - Columns: []*sql.ResultColumn{ + Columns: []*parser.ResultColumn{ {Expr: &parser.Ident{NamePos: pos(25), Name: "x"}}, }, }, @@ -1607,7 +1607,7 @@ func TestParser_ParseStatement(t *testing.T) { As: pos(29), Select: &parser.SelectStatement{ Select: pos(32), - Columns: []*sql.ResultColumn{ + Columns: []*parser.ResultColumn{ {Expr: &parser.Ident{NamePos: pos(39), Name: "x"}}, }, }, @@ -1618,11 +1618,11 @@ func TestParser_ParseStatement(t *testing.T) { AssertParseStatementError(t, `CREATE VIEW vw`, `1:14: expected AS, found 'EOF'`) AssertParseStatementError(t, `CREATE VIEW vw (`, `1:16: expected column name, found 'EOF'`) AssertParseStatementError(t, `CREATE VIEW vw (x`, `1:17: expected comma or right paren, found 'EOF'`) - AssertParseStatementError(t, `CREATE VIEW vw AS`, `1:17: expected SELECT or VALUES, found 'EOF'`) + AssertParseStatementError(t, `CREATE VIEW vw AS`, `1:17: expected SELECT, found 'EOF'`) AssertParseStatementError(t, `CREATE VIEW vw AS SELECT`, `1:24: expected expression, found 'EOF'`) - })*/ + }) - /*t.Run("DropView", func(t *testing.T) { + t.Run("DropView", func(t *testing.T) { AssertParseStatement(t, `DROP VIEW vw`, &parser.DropViewStatement{ Drop: pos(0), View: pos(5), @@ -1635,11 +1635,11 @@ func TestParser_ParseStatement(t *testing.T) { IfExists: pos(13), Name: &parser.Ident{NamePos: pos(20), Name: "vw"}, }) - AssertParseStatementError(t, `DROP`, `1:1: expected TABLE, VIEW, INDEX, or TRIGGER`) + AssertParseStatementError(t, `DROP`, `1:1: expected TABLE, VIEW or FUNCTION`) AssertParseStatementError(t, `DROP VIEW`, `1:9: expected view name, found 'EOF'`) AssertParseStatementError(t, `DROP VIEW IF`, `1:12: expected EXISTS, found 'EOF'`) AssertParseStatementError(t, `DROP VIEW IF EXISTS`, `1:19: expected view name, found 'EOF'`) - })*/ + }) /*t.Run("CreateIndex", func(t *testing.T) { AssertParseStatement(t, `CREATE INDEX idx ON tbl (x ASC, y DESC, z)`, &parser.CreateIndexStatement{ @@ -2815,14 +2815,20 @@ func TestParser_ParseStatement(t *testing.T) { AssertParseStatement(t, `DELETE FROM tbl`, &parser.DeleteStatement{ Delete: pos(0), From: pos(7), - Table: &parser.QualifiedTableName{ + TableName: &parser.QualifiedTableName{ + Name: &parser.Ident{NamePos: pos(12), Name: "tbl"}, + }, + Source: &parser.QualifiedTableName{ Name: &parser.Ident{NamePos: pos(12), Name: "tbl"}, }, }) AssertParseStatement(t, `DELETE FROM tbl WHERE x = 1`, &parser.DeleteStatement{ Delete: pos(0), From: pos(7), - Table: &parser.QualifiedTableName{ + TableName: &parser.QualifiedTableName{ + Name: &parser.Ident{NamePos: pos(12), Name: "tbl"}, + }, + Source: &parser.QualifiedTableName{ Name: &parser.Ident{NamePos: pos(12), Name: "tbl"}, }, Where: pos(16), @@ -2902,8 +2908,8 @@ func TestParser_ParseStatement(t *testing.T) { AssertParseStatementError(t, `DELETE`, `1:6: expected FROM, found 'EOF'`) AssertParseStatementError(t, `DELETE FROM`, `1:11: expected table name, found 'EOF'`) AssertParseStatementError(t, `DELETE FROM tbl WHERE`, `1:21: expected expression, found 'EOF'`) - AssertParseStatementError(t, `DELETE FROM tbl ORDER `, `1:22: expected BY, found 'EOF'`) - AssertParseStatementError(t, `DELETE FROM tbl ORDER BY`, `1:24: expected expression, found 'EOF'`) + //AssertParseStatementError(t, `DELETE FROM tbl ORDER `, `1:22: expected BY, found 'EOF'`) + //AssertParseStatementError(t, `DELETE FROM tbl ORDER BY`, `1:24: expected expression, found 'EOF'`) //AssertParseStatementError(t, `DELETE FROM tbl ORDER BY x`, `1:26: expected LIMIT, found 'EOF'`) //AssertParseStatementError(t, `DELETE FROM tbl LIMIT`, `1:21: expected expression, found 'EOF'`) //AssertParseStatementError(t, `DELETE FROM tbl LIMIT 1,`, `1:24: expected expression, found 'EOF'`) diff --git a/sql3/parser/scanner.go b/sql3/parser/scanner.go index 573483677..af03dec3a 100644 --- a/sql3/parser/scanner.go +++ b/sql3/parser/scanner.go @@ -185,6 +185,8 @@ func (s *Scanner) scanString() (Pos, Token, string) { ch, _ := s.read() if ch == -1 { return pos, ILLEGAL, `'` + s.buf.String() + } else if ch == '\n' { + return pos, UNTERMSTRING, `'` + s.buf.String() } else if ch == '\'' { if s.peek() == '\'' { // escaped quote s.read() @@ -214,6 +216,11 @@ func (s *Scanner) scanBlob() (Pos, Token, string) { for i := 0; ; i++ { ch, _ := s.read() if ch == '\'' { + if s.peek() == '\'' { // escaped quote + s.read() + s.buf.WriteRune('\'') + continue + } return pos, BLOB, s.buf.String() } else if ch == -1 { return pos, ILLEGAL, string(start) + `'` + s.buf.String() diff --git a/sql3/parser/token.go b/sql3/parser/token.go index b79809c4d..bc765a6a1 100644 --- a/sql3/parser/token.go +++ b/sql3/parser/token.go @@ -29,6 +29,7 @@ const ( ILLEGAL Token = iota EOF SPACE + UNTERMSTRING literal_beg IDENT // IDENT @@ -214,7 +215,6 @@ const ( SELECT SELECT_COLUMN SET - SHARDWIDTH SIZE SHOW SPAN @@ -254,9 +254,10 @@ const ( ) var tokens = [...]string{ - ILLEGAL: "ILLEGAL", - EOF: "EOF", - SPACE: "SPACE", + ILLEGAL: "ILLEGAL", + EOF: "EOF", + SPACE: "SPACE", + UNTERMSTRING: "unterminated string literal", IDENT: "IDENT", VARIABLE: "VARIABLE", @@ -438,7 +439,6 @@ var tokens = [...]string{ SELECT_COLUMN: "SELECT_COLUMN", SET: "SET", SIZE: "SIZE", - SHARDWIDTH: "SHARDWIDTH", SHOW: "SHOW", SPAN: "SPAN", TABLE: "TABLE", diff --git a/sql3/parser/walk.go b/sql3/parser/walk.go index 1607fd10c..7db4c35a4 100644 --- a/sql3/parser/walk.go +++ b/sql3/parser/walk.go @@ -165,9 +165,6 @@ func walk(v Visitor, node Node) (_ Node, err error) { if err := walkIdent(v, &n.Name); err != nil { return node, err } - // if err := walkIdentList(v, n.UpdateOfColumns); err != nil { - // return node, err - // } for i := range n.Body { if body, err := walk(v, n.Body[i]); err != nil { return node, err @@ -188,15 +185,6 @@ func walk(v Visitor, node Node) (_ Node, err error) { n.WithClause = nil } } - /*for i := range n.ValueLists { - if list, err := walk(v, n.ValueLists[i]); err != nil { - return node, err - } else if list != nil { - n.ValueLists[i] = list.(*ExprList) - } else { - n.ValueLists[i] = nil - } - }*/ for i := range n.Columns { if col, err := walk(v, n.Columns[i]); err != nil { return node, err @@ -289,15 +277,6 @@ func walk(v Visitor, node Node) (_ Node, err error) { n.Select = nil } }*/ - /*if n.UpsertClause != nil { - if clause, err := walk(v, n.UpsertClause); err != nil { - return node, err - } else if clause != nil { - n.UpsertClause = clause.(*UpsertClause) - } else { - n.UpsertClause = nil - } - }*/ case *UpdateStatement: if n.WithClause != nil { @@ -352,42 +331,27 @@ func walk(v Visitor, node Node) (_ Node, err error) { } case *DeleteStatement: - if n.WithClause != nil { - if clause, err := walk(v, n.WithClause); err != nil { - return node, err - } else if clause != nil { - n.WithClause = clause.(*WithClause) - } else { - n.WithClause = nil - } - } - if n.Table != nil { - if tbl, err := walk(v, n.Table); err != nil { + // if n.WithClause != nil { + // if clause, err := walk(v, n.WithClause); err != nil { + // return node, err + // } else if clause != nil { + // n.WithClause = clause.(*WithClause) + // } else { + // n.WithClause = nil + // } + // } + if n.Source != nil { + if tbl, err := walk(v, n.Source); err != nil { return node, err } else if tbl != nil { - n.Table = tbl.(*QualifiedTableName) + n.Source = tbl.(*QualifiedTableName) } else { - n.Table = nil + n.Source = nil } } if err := walkExpr(v, &n.WhereExpr); err != nil { return node, err } - for i := range n.OrderingTerms { - if term, err := walk(v, n.OrderingTerms[i]); err != nil { - return node, err - } else if term != nil { - n.OrderingTerms[i] = term.(*OrderingTerm) - } else { - n.OrderingTerms[i] = nil - } - } - if err := walkExpr(v, &n.LimitExpr); err != nil { - return node, err - } - if err := walkExpr(v, &n.OffsetExpr); err != nil { - return node, err - } case *PrimaryKeyConstraint: if err := walkIdent(v, &n.Name); err != nil { diff --git a/sql3/planner/compilebulkinsert.go b/sql3/planner/compilebulkinsert.go index 30d881c0d..d934dc61b 100644 --- a/sql3/planner/compilebulkinsert.go +++ b/sql3/planner/compilebulkinsert.go @@ -80,6 +80,13 @@ func (p *ExecutionPlanner) compileBulkInsertStatement(stmt *parser.BulkInsertSta } options.hasHeaderRow = bliteral.Value + // ALLOW_MISSING_VALUES + bliteral, sok = stmt.AllowMissingValues.(*parser.BoolLit) + if !sok { + return nil, sql3.NewErrBoolLiteral(stmt.AllowMissingValues.Pos().Line, stmt.AllowMissingValues.Pos().Column) + } + options.allowMissingValues = bliteral.Value + // batchsize literal, ok := stmt.BatchSize.(*parser.IntegerLit) if !ok { @@ -145,7 +152,7 @@ func (p *ExecutionPlanner) compileBulkInsertStatement(stmt *parser.BulkInsertSta } } - return NewPlanOpBulkInsert(p, tableName, options), nil + return NewPlanOpQuery(p, NewPlanOpBulkInsert(p, tableName, options), p.sql), nil } // analyzeBulkInsertStatement analyzes a BULK INSERT statement and returns an @@ -263,14 +270,13 @@ func (p *ExecutionPlanner) analyzeBulkInsertStatement(stmt *parser.BulkInsertSta } // header row is true if specified, false if not - if stmt.HeaderRow != nil { - stmt.HeaderRow = &parser.BoolLit{ - Value: true, - } - } else { - stmt.HeaderRow = &parser.BoolLit{ - Value: false, - } + stmt.HeaderRow = &parser.BoolLit{ + Value: stmt.HeaderRow != nil, + } + + // allow missing values is true if specified, false if not + stmt.AllowMissingValues = &parser.BoolLit{ + Value: stmt.AllowMissingValues != nil, } // analyze map expressions diff --git a/sql3/planner/compilecreatetable.go b/sql3/planner/compilecreatetable.go index 70d483043..1d34aadae 100644 --- a/sql3/planner/compilecreatetable.go +++ b/sql3/planner/compilecreatetable.go @@ -67,7 +67,11 @@ func (p *ExecutionPlanner) compileCreateTableStatement(stmt *parser.CreateTableS columns = append(columns, column) } - return NewPlanOpQuery(p, NewPlanOpCreateTable(p, tableName, failIfExists, isKeyed, keyPartitions, description, columns), p.sql), nil + cop := NewPlanOpCreateTable(p, tableName, failIfExists, isKeyed, keyPartitions, description, columns) + if keyPartitions > 0 { + cop.AddWarning("The value of KEYPARTITIONS is currently ignored") + } + return NewPlanOpQuery(p, cop, p.sql), nil } // compiles a column def @@ -166,12 +170,14 @@ func (p *ExecutionPlanner) compileColumn(col *parser.ColumnDefinition) (*createT unit := c.Expr.(*parser.StringLit) timeUnit = unit.Value - epochString := c.EpochExpr.(*parser.StringLit) - tm, err := time.ParseInLocation(time.RFC3339, epochString.Value, time.UTC) - if err != nil { - return nil, sql3.NewErrInvalidTimeEpoch(c.EpochExpr.Pos().Line, c.EpochExpr.Pos().Line, epochString.Value) + if c.EpochExpr != nil { + epochString := c.EpochExpr.(*parser.StringLit) + tm, err := time.ParseInLocation(time.RFC3339, epochString.Value, time.UTC) + if err != nil { + return nil, sql3.NewErrInvalidTimeEpoch(c.EpochExpr.Pos().Line, c.EpochExpr.Pos().Line, epochString.Value) + } + epoch = tm } - epoch = tm case *parser.TimeQuantumConstraint: unit := c.Expr.(*parser.StringLit) @@ -289,22 +295,6 @@ func (p *ExecutionPlanner) analyzeCreateTableStatement(stmt *parser.CreateTableS return sql3.NewErrInvalidKeyPartitionsValue(o.Expr.Pos().Line, o.Expr.Pos().Column, i) } - case *parser.ShardWidthOption: - //check the type of the expression - literal, ok := o.Expr.(*parser.IntegerLit) - if !ok { - return sql3.NewErrIntegerLiteral(o.Expr.Pos().Line, o.Expr.Pos().Column) - } - //shardwidth needs to be a power of 2 and > 2^16 - i, err := strconv.ParseInt(literal.Value, 10, 64) - if err != nil { - return err - } - isPwrOf2 := (i & (i - 1)) == 0 - if (i == 0) || !isPwrOf2 || i < (1<<16) { - return sql3.NewErrInvalidShardWidthValue(o.Expr.Pos().Line, o.Expr.Pos().Column, i) - } - case *parser.CommentOption: _, ok := o.Expr.(*parser.StringLit) diff --git a/sql3/planner/compiledelete.go b/sql3/planner/compiledelete.go new file mode 100644 index 000000000..2e253d4aa --- /dev/null +++ b/sql3/planner/compiledelete.go @@ -0,0 +1,72 @@ +// Copyright 2022 Molecula Corp. All rights reserved. + +package planner + +import ( + "github.com/featurebasedb/featurebase/v3/sql3/parser" + "github.com/featurebasedb/featurebase/v3/sql3/planner/types" +) + +// compileDeleteStatement compiles a parser.DeleteStatment AST into a PlanOperator +func (p *ExecutionPlanner) compileDeleteStatement(stmt *parser.DeleteStatement) (types.PlanOperator, error) { + query := NewPlanOpQuery(p, NewPlanOpNullTable(), p.sql) + + tableName := parser.IdentName(stmt.TableName.Name) + + // source expression + source, err := p.compileSource(query, stmt.Source) + if err != nil { + return nil, err + } + + // handle the where clause + where, err := p.compileExpr(stmt.WhereExpr) + if err != nil { + return nil, err + } + + _, sourceIsScan := source.(*PlanOpPQLTableScan) + + // no where clause and source is a scan so it's a truncate + if where == nil && sourceIsScan { + delOp := NewPlanOpPQLTruncateTable(p, string(tableName)) + + children := []types.PlanOperator{ + delOp, + } + return query.WithChildren(children...) + } + + var delOp types.PlanOperator + + // if we did have a where, insert the filter op + if where != nil { + delOp = NewPlanOpPQLConstRowDelete(p, string(tableName), NewPlanOpFilter(p, where, source)) + } else { + delOp = NewPlanOpPQLConstRowDelete(p, string(tableName), source) + } + + children := []types.PlanOperator{ + delOp, + } + return query.WithChildren(children...) +} + +func (p *ExecutionPlanner) analyzeDeleteStatement(stmt *parser.DeleteStatement) error { + + err := p.analyzeSource(stmt.Source, stmt) + if err != nil { + return err + } + + // if we have a where clause, check that + if stmt.WhereExpr != nil { + expr, err := p.analyzeExpression(stmt.WhereExpr, stmt) + if err != nil { + return err + } + stmt.WhereExpr = expr + } + + return nil +} diff --git a/sql3/planner/compileinsert.go b/sql3/planner/compileinsert.go index 5482097e7..4815e746c 100644 --- a/sql3/planner/compileinsert.go +++ b/sql3/planner/compileinsert.go @@ -68,7 +68,7 @@ func (p *ExecutionPlanner) compileInsertStatement(stmt *parser.InsertStatement) insertValues = append(insertValues, tupleValues) } - return NewPlanOpInsert(p, tableName, targetColumns, insertValues), nil + return NewPlanOpQuery(p, NewPlanOpInsert(p, tableName, targetColumns, insertValues), p.sql), nil } // analyzeInsertStatement analyzes an INSERT statement and returns and error if diff --git a/sql3/planner/compileselect.go b/sql3/planner/compileselect.go index 5d01228ed..abdca8222 100644 --- a/sql3/planner/compileselect.go +++ b/sql3/planner/compileselect.go @@ -17,7 +17,8 @@ import ( // compileSelectStatment compiles a parser.SelectStatment AST into a PlanOperator func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, isSubquery bool) (types.PlanOperator, error) { query := NewPlanOpQuery(p, NewPlanOpNullTable(), p.sql) - p.scopeStack.push(query) + + aggregates := make([]types.PlanExpression, 0) // handle projections projections := make([]types.PlanExpression, 0) @@ -30,6 +31,7 @@ func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, planExpr = newAliasPlanExpression(c.Alias.Name, planExpr) } projections = append(projections, planExpr) + aggregates = p.gatherExprAggregates(planExpr, aggregates) } // group by clause. @@ -44,15 +46,6 @@ func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, } var err error - if stmt.Having.IsValid() { - query.AddWarning("HAVING is not yet supported") - } - - // handle distinct - if stmt.Distinct.IsValid() { - query.AddWarning("DISTINCT not yet implemented") - } - // handle the where clause where, err := p.compileExpr(stmt.WhereExpr) if err != nil { @@ -60,21 +53,89 @@ func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, } // source expression - source, err := p.compileSelectSource(query, stmt.Source) + source, err := p.compileSource(query, stmt.Source) if err != nil { return nil, err } // if we did have a where, insert the filter op if where != nil { + aggregates = p.gatherExprAggregates(where, aggregates) source = NewPlanOpFilter(p, where, source) } + // handle the having clause + having, err := p.compileExpr(stmt.HavingExpr) + if err != nil { + return nil, err + } + + if having != nil { + // gather aggregates + aggregates = p.gatherExprAggregates(having, aggregates) + + // make sure that any references are columns in the group by list, or in an aggregate + + // make a list of group by expresssions + aggregateAndGroupByExprs := make([]types.PlanExpression, 0) + aggregateAndGroupByExprs = append(aggregateAndGroupByExprs, groupByExprs...) + // add to that the refs used by all the aggregates.. + for _, agg := range aggregates { + InspectExpression(agg, func(expr types.PlanExpression) bool { + switch ex := expr.(type) { + case *sumPlanExpression, *countPlanExpression, *countDistinctPlanExpression, + *avgPlanExpression, *minPlanExpression, *maxPlanExpression, + *percentilePlanExpression: + ch := ex.Children() + // first arg is always the ref + aggregateAndGroupByExprs = append(aggregateAndGroupByExprs, ch[0]) + return false + } + return true + }) + } + + // inspect the having expression, build a list of references that are not + // part of an aggregate + havingReferences := make([]*qualifiedRefPlanExpression, 0) + InspectExpression(having, func(expr types.PlanExpression) bool { + switch ex := expr.(type) { + case *sumPlanExpression, *countPlanExpression, *countDistinctPlanExpression, + *avgPlanExpression, *minPlanExpression, *maxPlanExpression, + *percentilePlanExpression: + return false + case *qualifiedRefPlanExpression: + havingReferences = append(havingReferences, ex) + return false + } + return true + }) + + // check the list of references against the aggregate and group by expressions + for _, nae := range havingReferences { + found := false + for _, pe := range aggregateAndGroupByExprs { + gbe, ok := pe.(*qualifiedRefPlanExpression) + if !ok { + continue + } + if strings.EqualFold(nae.columnName, gbe.columnName) && + strings.EqualFold(nae.tableName, gbe.tableName) { + found = true + break + } + } + if !found { + return nil, sql3.NewErrInvalidUngroupedColumnReferenceInHaving(0, 0, nae.columnName) + } + } + } + // do we have straight projection or a group by? var compiledOp types.PlanOperator - if len(query.aggregates) > 0 { + if len(aggregates) > 0 { //check that any projections that are not aggregates are in the group by list - var nonAggregateReferences []*qualifiedRefPlanExpression + nonAggregateReferences := make([]*qualifiedRefPlanExpression, 0) for _, expr := range projections { InspectExpression(expr, func(expr types.PlanExpression) bool { switch ex := expr.(type) { @@ -108,8 +169,12 @@ func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, return nil, sql3.NewErrInvalidUngroupedColumnReference(0, 0, nae.columnName) } } - - compiledOp = NewPlanOpProjection(projections, NewPlanOpGroupBy(query.aggregates, groupByExprs, source)) + var groupByOp types.PlanOperator + groupByOp = NewPlanOpGroupBy(aggregates, groupByExprs, source) + if having != nil { + groupByOp = NewPlanOpHaving(p, having, groupByOp) + } + compiledOp = NewPlanOpProjection(projections, groupByOp) } else { compiledOp = NewPlanOpProjection(projections, source) } @@ -153,8 +218,10 @@ func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, compiledOp = NewPlanOpTop(topExpr, compiledOp) } - // pop the scope - _ = p.scopeStack.pop() + // handle distinct + if stmt.Distinct.IsValid() { + compiledOp = NewPlanOpDistinct(p, compiledOp) + } // if it is a subquery, don't wrap in a PlanOpQuery if isSubquery { @@ -166,7 +233,33 @@ func (p *ExecutionPlanner) compileSelectStatement(stmt *parser.SelectStatement, return query.WithChildren(children...) } -func (p *ExecutionPlanner) compileSelectSource(scope *PlanOpQuery, source parser.Source) (types.PlanOperator, error) { +func (p *ExecutionPlanner) gatherExprAggregates(expr types.PlanExpression, aggregates []types.PlanExpression) []types.PlanExpression { + result := aggregates + InspectExpression(expr, func(expr types.PlanExpression) bool { + switch ex := expr.(type) { + case *sumPlanExpression, *countPlanExpression, *countDistinctPlanExpression, + *avgPlanExpression, *minPlanExpression, *maxPlanExpression, + *percentilePlanExpression: + found := false + for _, ag := range result { + //compare based on string representation + if strings.EqualFold(ag.String(), ex.String()) { + found = true + break + } + } + if !found { + result = append(result, ex) + } + // return false because thats as far down we want to inspect + return false + } + return true + }) + return result +} + +func (p *ExecutionPlanner) compileSource(scope *PlanOpQuery, source parser.Source) (types.PlanOperator, error) { if source == nil { return NewPlanOpNullTable(), nil } @@ -192,11 +285,11 @@ func (p *ExecutionPlanner) compileSelectSource(scope *PlanOpQuery, source parser } } - topOp, err := p.compileSelectSource(scope, sourceExpr.X) + topOp, err := p.compileSource(scope, sourceExpr.X) if err != nil { return nil, err } - bottomOp, err := p.compileSelectSource(scope, sourceExpr.Y) + bottomOp, err := p.compileSource(scope, sourceExpr.Y) if err != nil { return nil, err } @@ -216,22 +309,14 @@ func (p *ExecutionPlanner) compileSelectSource(scope *PlanOpQuery, source parser return NewPlanOpSystemTable(p, st), nil } - // get all the qualified refs that refer to this table + // get all the columns for this table - we will eliminate unused ones + // later on in the optimizer extractColumns := make([]string, 0) - for _, r := range scope.referenceList { - if sourceExpr.MatchesTablenameOrAlias(r.tableName) { - found := false - for _, c := range extractColumns { - if strings.EqualFold(c, r.columnName) { - found = true - break - } - } - if !found { - extractColumns = append(extractColumns, r.columnName) - } - } + + for _, oc := range sourceExpr.OutputColumns { + extractColumns = append(extractColumns, oc.ColumnName) } + if sourceExpr.Alias != nil { aliasName := parser.IdentName(sourceExpr.Alias) @@ -255,14 +340,14 @@ func (p *ExecutionPlanner) compileSelectSource(scope *PlanOpQuery, source parser case *parser.ParenSource: if sourceExpr.Alias != nil { aliasName := parser.IdentName(sourceExpr.Alias) - op, err := p.compileSelectSource(scope, sourceExpr.X) + op, err := p.compileSource(scope, sourceExpr.X) if err != nil { return nil, err } return NewPlanOpRelAlias(aliasName, op), nil } - return p.compileSelectSource(scope, sourceExpr.X) + return p.compileSource(scope, sourceExpr.X) case *parser.SelectStatement: subQuery, err := p.compileSelectStatement(sourceExpr, true) @@ -368,7 +453,7 @@ func (p *ExecutionPlanner) analyzeSource(source parser.Source, scope parser.Stat return nil case *parser.SelectStatement: - err := p.analyzeSelectStatement(source) + _, err := p.analyzeSelectStatement(source) if err != nil { return err } @@ -379,21 +464,21 @@ func (p *ExecutionPlanner) analyzeSource(source parser.Source, scope parser.Stat } } -func (p *ExecutionPlanner) analyzeSelectStatement(stmt *parser.SelectStatement) error { +func (p *ExecutionPlanner) analyzeSelectStatement(stmt *parser.SelectStatement) (parser.Expr, error) { // analyze source first - needed for name resolution err := p.analyzeSource(stmt.Source, stmt) if err != nil { - return err + return nil, err } if err := p.analyzeSelectStatementWildcards(stmt); err != nil { - return err + return nil, err } for _, col := range stmt.Columns { expr, err := p.analyzeExpression(col.Expr, stmt) if err != nil { - return err + return nil, err } if expr != nil { col.Expr = expr @@ -402,25 +487,31 @@ func (p *ExecutionPlanner) analyzeSelectStatement(stmt *parser.SelectStatement) expr, err := p.analyzeExpression(stmt.TopExpr, stmt) if err != nil { - return err + return nil, err } if expr != nil { if !(expr.IsLiteral() && typeIsInteger(expr.DataType())) { - return sql3.NewErrIntegerLiteral(stmt.TopExpr.Pos().Line, stmt.TopExpr.Pos().Column) + return nil, sql3.NewErrIntegerLiteral(stmt.TopExpr.Pos().Line, stmt.TopExpr.Pos().Column) } stmt.TopExpr = expr } + expr, err = p.analyzeExpression(stmt.HavingExpr, stmt) + if err != nil { + return nil, err + } + stmt.HavingExpr = expr + expr, err = p.analyzeExpression(stmt.WhereExpr, stmt) if err != nil { - return err + return nil, err } stmt.WhereExpr = expr for i, g := range stmt.GroupByExprs { expr, err = p.analyzeExpression(g, stmt) if err != nil { - return err + return nil, err } if expr != nil { stmt.GroupByExprs[i] = expr @@ -429,7 +520,7 @@ func (p *ExecutionPlanner) analyzeSelectStatement(stmt *parser.SelectStatement) expr, err = p.analyzeExpression(stmt.HavingExpr, stmt) if err != nil { - return err + return nil, err } if expr != nil { stmt.HavingExpr = expr @@ -438,12 +529,12 @@ func (p *ExecutionPlanner) analyzeSelectStatement(stmt *parser.SelectStatement) for _, term := range stmt.OrderingTerms { expr, err := p.analyzeOrderingTermExpression(term.X, stmt) if err != nil { - return err + return nil, err } term.X = expr } - return nil + return stmt, nil } func (p *ExecutionPlanner) analyzeSelectStatementWildcards(stmt *parser.SelectStatement) error { diff --git a/sql3/planner/compileshow.go b/sql3/planner/compileshow.go index ae9d8d7ed..6b9fed8c8 100644 --- a/sql3/planner/compileshow.go +++ b/sql3/planner/compileshow.go @@ -41,7 +41,7 @@ func (p *ExecutionPlanner) compileShowTablesStatement(stmt parser.Statement) (ty }, &qualifiedRefPlanExpression{ tableName: "fb_tables", - columnName: "last_updated_user", + columnName: "updated_by", columnIndex: 3, dataType: parser.NewDataTypeString(), }, @@ -53,9 +53,9 @@ func (p *ExecutionPlanner) compileShowTablesStatement(stmt parser.Statement) (ty }, &qualifiedRefPlanExpression{ tableName: "fb_tables", - columnName: "track_existence", + columnName: "updated_at", columnIndex: 5, - dataType: parser.NewDataTypeBool(), + dataType: parser.NewDataTypeTimestamp(), }, &qualifiedRefPlanExpression{ tableName: "fb_tables", @@ -65,7 +65,7 @@ func (p *ExecutionPlanner) compileShowTablesStatement(stmt parser.Statement) (ty }, &qualifiedRefPlanExpression{ tableName: "fb_tables", - columnName: "shard_width", + columnName: "space_used", columnIndex: 7, dataType: parser.NewDataTypeInt(), }, @@ -76,7 +76,7 @@ func (p *ExecutionPlanner) compileShowTablesStatement(stmt parser.Statement) (ty dataType: parser.NewDataTypeString(), }} - return NewPlanOpQuery(p, NewPlanOpProjection(columns, NewPlanOpFeatureBaseTables(pilosa.TablesToIndexInfos(tbls))), p.sql), nil + return NewPlanOpQuery(p, NewPlanOpProjection(columns, NewPlanOpFeatureBaseTables(p, pilosa.TablesToIndexInfos(tbls))), p.sql), nil } func (p *ExecutionPlanner) compileShowColumnsStatement(stmt *parser.ShowColumnsStatement) (_ types.PlanOperator, err error) { diff --git a/sql3/planner/executionplanner.go b/sql3/planner/executionplanner.go index 93c9cb937..5f7f59106 100644 --- a/sql3/planner/executionplanner.go +++ b/sql3/planner/executionplanner.go @@ -12,14 +12,6 @@ import ( "github.com/featurebasedb/featurebase/v3/sql3/planner/types" ) -// PlannerScope holds scope for the planner -// there is a stack of these in the ExecutionPlanner and some corresponding push/pop functions -// this allows us to do scoped operations without passing stuff down into -// every function -type PlannerScope struct { - scope types.PlanOperator -} - // ExecutionPlanner compiles SQL text into a query plan type ExecutionPlanner struct { executor pilosa.Executor @@ -29,7 +21,6 @@ type ExecutionPlanner struct { importer pilosa.Importer logger logger.Logger sql string - scopeStack *scopeStack } func NewExecutionPlanner(executor pilosa.Executor, schemaAPI pilosa.SchemaAPI, systemAPI pilosa.SystemAPI, systemLayerAPI pilosa.SystemLayerAPI, importer pilosa.Importer, logger logger.Logger, sql string) *ExecutionPlanner { @@ -41,7 +32,6 @@ func NewExecutionPlanner(executor pilosa.Executor, schemaAPI pilosa.SchemaAPI, s importer: importer, logger: logger, sql: sql, - scopeStack: newScopeStack(), } } @@ -77,6 +67,8 @@ func (p *ExecutionPlanner) CompilePlan(ctx context.Context, stmt parser.Statemen rootOperator, err = p.compileInsertStatement(stmt) case *parser.BulkInsertStatement: rootOperator, err = p.compileBulkInsertStatement(stmt) + case *parser.DeleteStatement: + rootOperator, err = p.compileDeleteStatement(stmt) default: return nil, sql3.NewErrInternalf("cannot plan statement: %T", stmt) } @@ -90,7 +82,8 @@ func (p *ExecutionPlanner) CompilePlan(ctx context.Context, stmt parser.Statemen func (p *ExecutionPlanner) analyzePlan(stmt parser.Statement) error { switch stmt := stmt.(type) { case *parser.SelectStatement: - return p.analyzeSelectStatement(stmt) + _, err := p.analyzeSelectStatement(stmt) + return err case *parser.ShowTablesStatement: return nil case *parser.ShowColumnsStatement: @@ -107,6 +100,8 @@ func (p *ExecutionPlanner) analyzePlan(stmt parser.Statement) error { return p.analyzeInsertStatement(stmt) case *parser.BulkInsertStatement: return p.analyzeBulkInsertStatement(stmt) + case *parser.DeleteStatement: + return p.analyzeDeleteStatement(stmt) default: return sql3.NewErrInternalf("cannot analyze statement: %T", stmt) } @@ -125,73 +120,3 @@ const ( func (p *ExecutionPlanner) checkAccess(ctx context.Context, objectName string, _ accessType) error { return nil } - -// convenience function that allows the planner to keep track of aggregates so we can -// use them during optimization -func (p *ExecutionPlanner) addAggregate(agg types.PlanExpression) error { - table := p.scopeStack.read() - if table == nil { - return sql3.NewErrInternalf("unexpected symbol table state") - } - - switch s := table.scope.(type) { - case *PlanOpQuery: - s.aggregates = append(s.aggregates, agg) - } - return nil -} - -// addReference is a convenience function that allows the planner to keep track -// of references so we can use them during optimization. -func (p *ExecutionPlanner) addReference(ref *qualifiedRefPlanExpression) error { - table := p.scopeStack.read() - if table == nil { - return sql3.NewErrInternalf("unexpected symbol table state") - } - - switch s := table.scope.(type) { - case *PlanOpQuery: - s.referenceList = append(s.referenceList, ref) - } - return nil -} - -// scopeStack is a stack of PlannerScope with the usual push/pop methods. -type scopeStack struct { - st []*PlannerScope -} - -// newScopeStack returns a scope stack initialized with zero elements on the -// stack. -func newScopeStack() *scopeStack { - return &scopeStack{ - st: make([]*PlannerScope, 0), - } -} - -// push adds the provided PlanOperator (as the scope of a PlannerScope) to the -// scope stack. -func (ss *scopeStack) push(scope types.PlanOperator) { - ss.st = append(ss.st, &PlannerScope{ - scope: scope, - }) -} - -// pop removes (and returns) the last scope pushed to the stack. -func (ss *scopeStack) pop() *PlannerScope { - if len(ss.st) == 0 { - return nil - } - ret := ss.st[len(ss.st)-1] - ss.st = ss.st[:len(ss.st)-1] - return ret -} - -// read returns the last scope pushed to the stack, but unlike pop, it does not -// remove it. -func (ss *scopeStack) read() *PlannerScope { - if len(ss.st) == 0 { - return nil - } - return ss.st[len(ss.st)-1] -} diff --git a/sql3/planner/expression.go b/sql3/planner/expression.go index c9b7e0e58..e0ad68c1f 100644 --- a/sql3/planner/expression.go +++ b/sql3/planner/expression.go @@ -173,6 +173,7 @@ func (n *unaryOpPlanExpression) String() string { func (n *unaryOpPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["op"] = n.op result["rhs"] = n.rhs.Plan() @@ -343,6 +344,10 @@ func (n *binOpPlanExpression) Evaluate(currentRow []interface{}) (interface{}, e return nl != nr, nil case parser.EQ: return nl == nr, nil + case parser.AND: + return nl && nr, nil + case parser.OR: + return nl || nr, nil default: return nil, sql3.NewErrInternalf("unhandled operator %d", n.op) @@ -664,6 +669,7 @@ func (n *binOpPlanExpression) String() string { func (n *binOpPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["op"] = n.op result["lhs"] = n.lhs.Plan() @@ -735,6 +741,7 @@ func (n *rangePlanExpression) String() string { func (n *rangePlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["lhs"] = n.lhs.Plan() result["rhs"] = n.rhs.Plan() @@ -950,6 +957,7 @@ func (n *casePlanExpression) String() string { func (n *casePlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() if n.baseExpr != nil { result["baseExpr"] = n.baseExpr.Plan() @@ -1035,6 +1043,7 @@ func (n *caseBlockPlanExpression) String() string { func (n *caseBlockPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["condition"] = n.condition.Plan() result["body"] = n.body.Plan() @@ -1104,6 +1113,7 @@ func (n *subqueryPlanExpression) String() string { func (n *subqueryPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["subquery"] = n.op.Plan() return result @@ -1212,6 +1222,7 @@ func (n *betweenOpPlanExpression) String() string { func (n *betweenOpPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["lhs"] = n.lhs.Plan() result["rhs"] = n.rhs.Plan() @@ -1430,6 +1441,7 @@ func (n *inOpPlanExpression) String() string { func (n *inOpPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["lhs"] = n.lhs.Plan() result["rhs"] = n.rhs.Plan() @@ -1480,8 +1492,30 @@ func (n *callPlanExpression) Evaluate(currentRow []interface{}) (interface{}, er return n.EvaluateReverse(currentRow) case "UPPER": return n.EvaluateUpper(currentRow) + case "STRINGSPLIT": + return n.EvaluateStringSplit(currentRow) + case "CHAR": + return n.EvaluateChar(currentRow) + case "ASCII": + return n.EvaluateAscii(currentRow) case "SUBSTRING": return n.EvaluateSubstring(currentRow) + case "LOWER": + return n.EvaluateLower(currentRow) + case "REPLACEALL": + return n.EvaluateReplaceAll(currentRow) + case "TRIM": + return n.EvaluateTrim(currentRow) + case "RTRIM": + return n.EvaluateRTrim(currentRow) + case "LTRIM": + return n.EvaluateLTrim(currentRow) + case "SUFFIX": + return n.EvaluateSuffix(currentRow) + case "PREFIX": + return n.EvaluatePrefix(currentRow) + case "SPACE": + return n.EvaluateSpace(currentRow) default: return nil, sql3.NewErrInternalf("unhandled function name '%s'", n.name) } @@ -1505,6 +1539,7 @@ func (n *callPlanExpression) String() string { func (n *callPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["name"] = n.name result["dataType"] = n.Type().TypeDescription() ps := make([]interface{}, 0) @@ -1561,6 +1596,7 @@ func (n *aliasPlanExpression) String() string { func (n *aliasPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["aliasName"] = n.aliasName result["expr"] = n.expr.Plan() @@ -1654,6 +1690,7 @@ func (n *qualifiedRefPlanExpression) String() string { func (n *qualifiedRefPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["tableName"] = n.tableName result["columnName"] = n.columnName result["columnIndex"] = n.columnIndex @@ -1716,6 +1753,7 @@ func (n *variableRefPlanExpression) String() string { func (n *variableRefPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["name"] = n.name result["dataType"] = n.dataType.TypeDescription() return result @@ -1751,6 +1789,7 @@ func (n *nullLiteralPlanExpression) String() string { func (n *nullLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() return result } @@ -1789,6 +1828,7 @@ func (n *intLiteralPlanExpression) String() string { func (n *intLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["value"] = n.value return result @@ -1829,6 +1869,7 @@ func (n *floatLiteralPlanExpression) String() string { func (n *floatLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["value"] = n.value return result @@ -1868,6 +1909,7 @@ func (n *boolLiteralPlanExpression) String() string { func (n *boolLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["value"] = n.value return result @@ -1907,6 +1949,7 @@ func (n *dateLiteralPlanExpression) String() string { func (n *dateLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["value"] = n.value return result @@ -1946,6 +1989,7 @@ func (n *stringLiteralPlanExpression) String() string { func (n *stringLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["value"] = n.value return result @@ -2170,6 +2214,7 @@ func (n *castPlanExpression) String() string { func (n *castPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["lhs"] = n.lhs.Plan() return result @@ -2221,6 +2266,7 @@ func (n *exprListPlanExpression) String() string { func (n *exprListPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() ps := make([]interface{}, 0) for _, e := range n.exprs { ps = append(ps, e.Plan()) @@ -2311,6 +2357,7 @@ func (n *exprSetLiteralPlanExpression) String() string { func (n *exprSetLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() ps := make([]interface{}, 0) for _, e := range n.members { ps = append(ps, e.Plan()) @@ -2392,6 +2439,7 @@ func (n *exprTupleLiteralPlanExpression) String() string { func (n *exprTupleLiteralPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() ps := make([]interface{}, 0) for _, e := range n.members { ps = append(ps, e.Plan()) @@ -2503,7 +2551,6 @@ func (p *ExecutionPlanner) compileExpr(expr parser.Expr) (_ types.PlanExpression case *parser.QualifiedRef: ref := newQualifiedRefPlanExpression(parser.IdentName(expr.Table), parser.IdentName(expr.Column), expr.ColumnIndex, expr.DataType()) - p.addReference(ref) return ref, nil case *parser.Range: @@ -2698,32 +2745,26 @@ func (p *ExecutionPlanner) compileCallExpr(expr *parser.Call) (_ types.PlanExpre } else { agg = newCountPlanExpression(args[0], expr.ResultDataType) } - p.addAggregate(agg) return agg, nil case "SUM": agg := newSumPlanExpression(args[0], expr.ResultDataType) - p.addAggregate(agg) return agg, nil case "AVG": agg := newAvgPlanExpression(args[0], expr.ResultDataType) - p.addAggregate(agg) return agg, nil case "PERCENTILE": agg := newPercentilePlanExpression(args[0], args[1], expr.ResultDataType) - p.addAggregate(agg) return agg, nil case "MIN": agg := newMinPlanExpression(args[0], expr.ResultDataType) - p.addAggregate(agg) return agg, nil case "MAX": agg := newMaxPlanExpression(args[0], expr.ResultDataType) - p.addAggregate(agg) return agg, nil default: diff --git a/sql3/planner/expressionagg.go b/sql3/planner/expressionagg.go index 9eeabe22a..5d5ad01a3 100644 --- a/sql3/planner/expressionagg.go +++ b/sql3/planner/expressionagg.go @@ -126,6 +126,7 @@ func (n *countPlanExpression) String() string { func (n *countPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() return result @@ -194,6 +195,7 @@ func (n *countDistinctPlanExpression) String() string { func (n *countDistinctPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() return result @@ -257,6 +259,24 @@ func (m *aggregateSum) Update(ctx context.Context, row types.Row) error { } dsum = pql.AddDecimal(dsum, val) m.sum = dsum + + case *parser.DataTypeInt: + val, ok := v.(int64) + if !ok { + return sql3.NewErrInternalf("unexpected type conversion '%T'", v) + } + var dsum int64 + if m.sum != nil { + dsum, ok = m.sum.(int64) + if !ok { + return sql3.NewErrInternalf("unexpected type conversion '%T'", m.sum) + } + } else { + dsum = 0 + } + dsum = dsum + val + m.sum = dsum + default: return sql3.NewErrInternalf("unhandled aggregate expression datatype '%T'", dataType) } @@ -271,6 +291,13 @@ func (m *aggregateSum) Eval(ctx context.Context) (interface{}, error) { return nil, sql3.NewErrInternalf("unexpected type conversion '%T'", m.sum) } return dsum, nil + + case *parser.DataTypeInt: + dsum, ok := m.sum.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type conversion '%T'", m.sum) + } + return dsum, nil default: return nil, sql3.NewErrInternalf("unhandled aggregate expression datatype '%T'", m.expr.Type()) } @@ -327,6 +354,7 @@ func (n *sumPlanExpression) String() string { func (n *sumPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() return result @@ -500,6 +528,7 @@ func (n *avgPlanExpression) String() string { func (n *avgPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() return result @@ -641,6 +670,7 @@ func (n *minPlanExpression) String() string { func (n *minPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() return result @@ -783,6 +813,7 @@ func (n *maxPlanExpression) String() string { func (n *maxPlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() return result @@ -855,6 +886,7 @@ func (n *percentilePlanExpression) String() string { func (n *percentilePlanExpression) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_expr"] = fmt.Sprintf("%T", n) + result["description"] = n.String() result["dataType"] = n.Type().TypeDescription() result["arg"] = n.arg.Plan() result["ntharg"] = n.nthArg.Plan() diff --git a/sql3/planner/expressionanalyzer.go b/sql3/planner/expressionanalyzer.go index 1b1c2bac5..aedd6b446 100644 --- a/sql3/planner/expressionanalyzer.go +++ b/sql3/planner/expressionanalyzer.go @@ -56,7 +56,6 @@ func (p *ExecutionPlanner) analyzeExpression(expr parser.Expr, scope parser.Stat case *parser.Ident: switch sc := scope.(type) { case *parser.SelectStatement: - // turn *parser.Ident into *parser.QualifiedRef if sc.Source == nil { return nil, sql3.NewErrColumnNotFound(e.NamePos.Line, e.NamePos.Column, e.Name) } @@ -69,6 +68,7 @@ func (p *ExecutionPlanner) analyzeExpression(expr parser.Expr, scope parser.Stat return nil, sql3.NewErrColumnNotFound(e.NamePos.Line, e.NamePos.Column, e.Name) } + // now turn *parser.Ident into *parser.QualifiedRef ident := &parser.QualifiedRef{ Table: &parser.Ident{ Name: oc.TableName, @@ -85,6 +85,29 @@ func (p *ExecutionPlanner) analyzeExpression(expr parser.Expr, scope parser.Stat case *parser.InsertStatement: return nil, sql3.NewErrColumnNotFound(e.NamePos.Line, e.NamePos.Column, e.Name) + case *parser.DeleteStatement: + + // go find the first ident in the source that matches + oc, err := sc.Source.OutputColumnNamed(e.Name) + if err != nil { + return nil, err + } else if oc == nil { + return nil, sql3.NewErrColumnNotFound(e.NamePos.Line, e.NamePos.Column, e.Name) + } + + ident := &parser.QualifiedRef{ + Table: &parser.Ident{ + Name: oc.TableName, + NamePos: e.NamePos, + }, + Column: &parser.Ident{ + Name: oc.ColumnName, + NamePos: e.NamePos, + }, + ColumnIndex: oc.ColumnIndex, + } + return p.analyzeExpression(ident, scope) + default: return nil, sql3.NewErrInternalf("unhandled scope type '%T'", sc) } @@ -224,6 +247,19 @@ func (p *ExecutionPlanner) analyzeExpression(expr parser.Expr, scope parser.Stat return nil, sql3.NewErrColumnNotFound(e.Column.NamePos.Line, e.Column.NamePos.Column, e.Column.Name) } + case *parser.DeleteStatement: + oc, err := sc.Source.OutputColumnNamed(e.Column.Name) + if err != nil { + return nil, err + } + if oc != nil { + e.RefDataType = oc.Datatype + e.ColumnIndex = oc.ColumnIndex + return e, nil + + } + return nil, sql3.NewErrColumnNotFound(e.Column.NamePos.Line, e.Column.NamePos.Column, e.Column.Name) + default: return nil, sql3.NewErrInternalf("unhandled scope type '%T'", sc) } @@ -298,7 +334,7 @@ func (p *ExecutionPlanner) analyzeExpression(expr parser.Expr, scope parser.Stat return p.analyzeUnaryExpression(e, scope) case *parser.SelectStatement: - err := p.analyzeSelectStatement(e) + selExpr, err := p.analyzeSelectStatement(e) if err != nil { return nil, err } @@ -306,7 +342,7 @@ func (p *ExecutionPlanner) analyzeExpression(expr parser.Expr, scope parser.Stat if len(e.Columns) > 1 { return nil, sql3.NewErrInternalf("subquery must return only one column") } - return e, nil + return selExpr, nil default: return nil, sql3.NewErrInternalf("unexpected SQL expression type: %T", expr) @@ -368,6 +404,18 @@ func (p *ExecutionPlanner) analyzeBinaryExpression(expr *parser.BinaryExpr, scop } expr.Y = y + // check nil for either of these expressions after they were ananlyzed, they may have been eliminated + // in which case we return the remaining one or nil if both have been eliminated + if x == nil && y == nil { + return nil, nil + } + if x == nil { + return y, nil + } + if y == nil { + return x, nil + } + //handle operator switch op := expr.Op; op { @@ -576,45 +624,71 @@ func (p *ExecutionPlanner) analyzeBinaryExpression(expr *parser.BinaryExpr, scop if !typesAreComparable(x.DataType(), sel.Columns[0].Expr.DataType()) { return nil, sql3.NewErrTypesAreNotEquatable(x.Pos().Line, x.Pos().Column, x.DataType().TypeDescription(), ex.DataType().TypeDescription()) } - //need to turn this into an inner join - selStmt, ok := scope.(*parser.SelectStatement) - if !ok { - return nil, sql3.NewErrInternalf("unexpected scope type '%T'", scope) - } - operator := &parser.JoinOperator{ Inner: expr.OpPos, } constraint := &parser.OnConstraint{ X: &parser.BinaryExpr{ - X: expr.X, - Op: parser.EQ, - Y: sel.Columns[0].Expr, + X: expr.X, + Op: parser.EQ, + Y: sel.Columns[0].Expr, + ResultDataType: parser.NewDataTypeBool(), }, } - if lhs, ok := selStmt.Source.(*parser.JoinClause); ok { - selStmt.Source = &parser.JoinClause{ - X: lhs.X, - Operator: lhs.Operator, - Y: &parser.JoinClause{ - X: lhs.Y, + switch scopeStmt := scope.(type) { + case *parser.SelectStatement: + + if lhs, ok := scopeStmt.Source.(*parser.JoinClause); ok { + scopeStmt.Source = &parser.JoinClause{ + X: lhs.X, + Operator: lhs.Operator, + Y: &parser.JoinClause{ + X: lhs.Y, + Operator: operator, + Y: sel, + Constraint: constraint, + }, + Constraint: lhs.Constraint, + } + } else { + scopeStmt.Source = &parser.JoinClause{ + X: scopeStmt.Source, Operator: operator, Y: sel, Constraint: constraint, - }, - Constraint: lhs.Constraint, + } } - } else { - selStmt.Source = &parser.JoinClause{ - X: selStmt.Source, - Operator: operator, - Y: sel, - Constraint: constraint, + + case *parser.DeleteStatement: + if lhs, ok := scopeStmt.Source.(*parser.JoinClause); ok { + scopeStmt.Source = &parser.JoinClause{ + X: lhs.X, + Operator: lhs.Operator, + Y: &parser.JoinClause{ + X: lhs.Y, + Operator: operator, + Y: sel, + Constraint: constraint, + }, + Constraint: lhs.Constraint, + } + } else { + scopeStmt.Source = &parser.JoinClause{ + X: scopeStmt.Source, + Operator: operator, + Y: sel, + Constraint: constraint, + } } + + default: + return nil, sql3.NewErrInternalf("unexpected scope type '%T'", scope) } + // we are eliminating this expression, since we moved it into the source, so + // return nil return nil, nil } diff --git a/sql3/planner/expressionanalyzercall.go b/sql3/planner/expressionanalyzercall.go index 6401cb17e..a2b6d04b1 100644 --- a/sql3/planner/expressionanalyzercall.go +++ b/sql3/planner/expressionanalyzercall.go @@ -243,11 +243,32 @@ func (p *ExecutionPlanner) analyzeCallExpression(call *parser.Call, scope parser return p.analyzeFunctionSubtable(call, scope) case "REVERSE": return p.analyseFunctionReverse(call, scope) + case "CHAR": + return p.analyseFunctionChar(call, scope) + case "ASCII": + return p.analyseFunctionAscii(call, scope) case "UPPER": return p.analyzeFunctionUpper(call, scope) + case "STRINGSPLIT": + return p.analyseFunctionStringSplit(call, scope) case "SUBSTRING": return p.analyseFunctionSubstring(call, scope) - + case "LOWER": + return p.analyzeFunctionLower(call, scope) + case "REPLACEALL": + return p.analyseFunctionReplaceAll(call, scope) + case "TRIM": + return p.analyseFunctionTrim(call, scope) + case "RTRIM": + return p.analyseFunctionTrim(call, scope) + case "LTRIM": + return p.analyseFunctionTrim(call, scope) + case "SUFFIX": + return p.analyseFunctionPrefixSuffix(call, scope) + case "PREFIX": + return p.analyseFunctionPrefixSuffix(call, scope) + case "SPACE": + return p.analyseFunctionSpace(call, scope) default: return nil, sql3.NewErrCallUnknownFunction(call.Name.NamePos.Line, call.Name.NamePos.Column, call.Name.Name) } diff --git a/sql3/planner/expressionpql.go b/sql3/planner/expressionpql.go index b425aacfb..4af10a11a 100644 --- a/sql3/planner/expressionpql.go +++ b/sql3/planner/expressionpql.go @@ -4,6 +4,7 @@ package planner import ( "context" + "strconv" "strings" "github.com/featurebasedb/featurebase/v3/pql" @@ -98,6 +99,60 @@ func (p *ExecutionPlanner) generatePQLCallFromExpr(ctx context.Context, expr typ return nil, sql3.NewErrInternalf("unsupported scalar function '%s'", expr.name) } + case *inOpPlanExpression: + // lhs will be qualified ref + lhs, ok := expr.lhs.(*qualifiedRefPlanExpression) + if !ok { + return nil, sql3.NewErrInternalf("unexpected lhs %T", expr.lhs) + } + + // rhs is expression list - need to convert to a big OR + + list, ok := expr.rhs.(*exprListPlanExpression) + if !ok { + return nil, sql3.NewErrInternalf("unexpected argument type '%T'", expr.rhs) + } + + // if it is the _id column, we can use ConstRow with a list + if strings.EqualFold(lhs.columnName, "_id") { + values := make([]interface{}, len(list.exprs)) + for i, m := range list.exprs { + pqlValue, err := planExprToValue(m) + if err != nil { + return nil, err + } + values[i] = pqlValue + } + call := &pql.Call{ + Name: "ConstRow", + Args: map[string]interface{}{ + "columns": values, + }, + Type: pql.PrecallGlobal, + } + return call, nil + } + // otherwise, OR them all + call := &pql.Call{ + Name: "Union", + Children: []*pql.Call{}, + } + + for _, m := range list.exprs { + pqlValue, err := planExprToValue(m) + if err != nil { + return nil, err + } + rc := &pql.Call{ + Name: "Row", + Args: map[string]interface{}{ + lhs.columnName: pqlValue, + }, + } + call.Children = append(call.Children, rc) + } + return call, nil + default: return nil, sql3.NewErrInternalf("unexpected expression type: %T", expr) } @@ -153,13 +208,26 @@ func (p *ExecutionPlanner) generatePQLCallFromBinaryExpr(ctx context.Context, ex }, nil case *parser.DataTypeID: + // TODO (pok) range queries on _id are not supported if strings.EqualFold(lhs.columnName, "_id") { - return &pql.Call{ + cr := &pql.Call{ Name: "ConstRow", Args: map[string]interface{}{ "columns": []interface{}{pqlValue}, }, Type: pql.PrecallGlobal, + } + // TODO (pok) when we fix FB-1828 (https://molecula.atlassian.net/browse/FB-1828) + // we can remove this - ConstRow returns a ghost record, thus to eliminate + // we interset with All + return &pql.Call{ + Name: "Intersect", + Children: []*pql.Call{ + { + Name: "All", + }, + cr, + }, }, nil } return &pql.Call{ @@ -170,13 +238,26 @@ func (p *ExecutionPlanner) generatePQLCallFromBinaryExpr(ctx context.Context, ex }, nil case *parser.DataTypeString: + // TODO (pok) range queries on _id are not supported if strings.EqualFold(lhs.columnName, "_id") { - return &pql.Call{ + cr := &pql.Call{ Name: "ConstRow", Args: map[string]interface{}{ "columns": []interface{}{pqlValue}, }, Type: pql.PrecallGlobal, + } + // TODO (pok) when we fix FB-1828 (https://molecula.atlassian.net/browse/FB-1828) + // we can remove this - ConstRow returns a ghost record, thus to eliminate + // we interset with All + return &pql.Call{ + Name: "Intersect", + Children: []*pql.Call{ + { + Name: "All", + }, + cr, + }, }, nil } return &pql.Call{ @@ -194,6 +275,27 @@ func (p *ExecutionPlanner) generatePQLCallFromBinaryExpr(ctx context.Context, ex }, }, nil + case *parser.DataTypeBool: + return &pql.Call{ + Name: "Row", + Args: map[string]interface{}{ + lhs.columnName: pqlValue, + }, + }, nil + + case *parser.DataTypeDecimal: + val, ok := pqlValue.(float64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type '%T", pqlValue) + } + d := pql.FromFloat64(val) + return &pql.Call{ + Name: "Row", + Args: map[string]interface{}{ + lhs.columnName: d, + }, + }, nil + default: return nil, sql3.NewErrInternalf("unsupported type for binary expression: %v (%T)", typ, typ) } @@ -279,6 +381,14 @@ func planExprToValue(expr types.PlanExpression) (interface{}, error) { return expr.value, nil case *dateLiteralPlanExpression: return expr.value, nil + case *boolLiteralPlanExpression: + return expr.value, nil + case *floatLiteralPlanExpression: + f, err := strconv.ParseFloat(expr.value, 64) + if err != nil { + return nil, err + } + return f, nil default: return nil, sql3.NewErrInternalf("cannot convert SQL expression %T to a literal value", expr) } diff --git a/sql3/planner/expressiontypes.go b/sql3/planner/expressiontypes.go index 03698174b..8be5d2fb1 100644 --- a/sql3/planner/expressiontypes.go +++ b/sql3/planner/expressiontypes.go @@ -475,6 +475,15 @@ func typeIsString(testType parser.ExprDataType) bool { } } +func typeIsVoid(testType parser.ExprDataType) bool { + switch testType.(type) { + case *parser.DataTypeVoid: + return true + default: + return false + } +} + // returns true if the type is timestamp func typeIsTimestamp(testType parser.ExprDataType) bool { switch testType.(type) { diff --git a/sql3/planner/inbuiltfunctionsstring.go b/sql3/planner/inbuiltfunctionsstring.go index 3a10209e6..fda45b18d 100644 --- a/sql3/planner/inbuiltfunctionsstring.go +++ b/sql3/planner/inbuiltfunctionsstring.go @@ -1,8 +1,6 @@ package planner import ( - "fmt" - "strconv" "strings" "github.com/featurebasedb/featurebase/v3/sql3" @@ -15,7 +13,7 @@ func (p *ExecutionPlanner) analyseFunctionReverse(call *parser.Call, scope parse return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) } - if !typeIsString(call.Args[0].DataType()) { + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) } @@ -24,18 +22,76 @@ func (p *ExecutionPlanner) analyseFunctionReverse(call *parser.Call, scope parse return call, nil } +func (p *ExecutionPlanner) analyzeFunctionLower(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + if len(call.Args) != 1 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) + } + + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +func (p *ExecutionPlanner) analyzeFunctionUpper(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + //one argument for Upper Function + if len(call.Args) != 1 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) + } + + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +func (p *ExecutionPlanner) analyseFunctionChar(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + //one argument + if len(call.Args) != 1 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) + } + + if !typeIsInteger(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrIntExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +func (p *ExecutionPlanner) analyseFunctionAscii(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + //one argument + if len(call.Args) != 1 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) + } + + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeInt() + + return call, nil +} + func (p *ExecutionPlanner) analyseFunctionSubstring(call *parser.Call, scope parser.Statement) (parser.Expr, error) { if len(call.Args) <= 1 || len(call.Args) > 3 { return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 2, len(call.Args)) } - if !typeIsString(call.Args[0].DataType()) { + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) } // the third parameter is optional for i := 1; i < len(call.Args); i++ { - if !typeIsInteger(call.Args[i].DataType()) { + if !typeIsInteger(call.Args[i].DataType()) && !typeIsVoid(call.Args[i].DataType()) { return nil, sql3.NewErrIntExpressionExpected(call.Args[i].Pos().Line, call.Args[i].Pos().Column) } } @@ -45,98 +101,482 @@ func (p *ExecutionPlanner) analyseFunctionSubstring(call *parser.Call, scope par return call, nil } +func (p *ExecutionPlanner) analyseFunctionReplaceAll(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + if len(call.Args) != 3 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 3, len(call.Args)) + } + // input string + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + // string to find and replace + if !typeIsString(call.Args[1].DataType()) && !typeIsVoid(call.Args[1].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[1].Pos().Line, call.Args[1].Pos().Column) + } + // string to replace with + if !typeIsString(call.Args[2].DataType()) && !typeIsVoid(call.Args[2].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[2].Pos().Line, call.Args[2].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +func (p *ExecutionPlanner) analyseFunctionStringSplit(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + if len(call.Args) <= 1 || len(call.Args) > 3 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 2, len(call.Args)) + } + + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + // string seperator + if !typeIsString(call.Args[1].DataType()) && !typeIsVoid(call.Args[1].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[1].Pos().Line, call.Args[1].Pos().Column) + } + + // third argument is the position. optional, defaults to 0 + if len(call.Args) == 3 && !typeIsInteger(call.Args[2].DataType()) && !typeIsVoid(call.Args[2].DataType()) { + return nil, sql3.NewErrIntExpressionExpected(call.Args[2].Pos().Line, call.Args[2].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +// Analyze function for Trim/RTrim/LTrim +func (p *ExecutionPlanner) analyseFunctionTrim(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + //one argument for Trim Functions + if len(call.Args) != 1 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) + } + + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +func (p *ExecutionPlanner) analyseFunctionPrefixSuffix(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + if len(call.Args) != 2 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 2, len(call.Args)) + } + + if !typeIsString(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + if !typeIsInteger(call.Args[1].DataType()) && !typeIsVoid(call.Args[1].DataType()) { + return nil, sql3.NewErrIntExpressionExpected(call.Args[1].Pos().Line, call.Args[1].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + + return call, nil +} + +func (p *ExecutionPlanner) analyseFunctionSpace(call *parser.Call, scope parser.Statement) (parser.Expr, error) { + //one argument + if len(call.Args) != 1 { + return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) + } + + if !typeIsInteger(call.Args[0].DataType()) && !typeIsVoid(call.Args[0].DataType()) { + return nil, sql3.NewErrIntExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + } + + call.ResultDataType = parser.NewDataTypeString() + return call, nil +} + // reverses the string func (n *callPlanExpression) EvaluateReverse(currentRow []interface{}) (interface{}, error) { - argOneEval, err := n.args[0].Evaluate(currentRow) + argEval, err := n.args[0].Evaluate(currentRow) if err != nil { return nil, err } - - stringArgOne, ok := argOneEval.(string) + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) if !ok { - return nil, sql3.NewErrInternalf("unexpected type converion %T", argOneEval) + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) } // reverse the string - runes := []rune(stringArgOne) + runes := []rune(stringArg) for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { runes[i], runes[j] = runes[j], runes[i] } return string(runes), nil } -func (p *ExecutionPlanner) analyzeFunctionUpper(call *parser.Call, scope parser.Statement) (parser.Expr, error) { - //one argument for Upper Function - if len(call.Args) != 1 { - return nil, sql3.NewErrCallParameterCountMismatch(call.Rparen.Line, call.Rparen.Column, call.Name.Name, 1, len(call.Args)) +func (n *callPlanExpression) EvaluateLower(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err } - - if !typeIsString(call.Args[0].DataType()) { - return nil, sql3.NewErrStringExpressionExpected(call.Args[0].Pos().Line, call.Args[0].Pos().Column) + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) } - call.ResultDataType = parser.NewDataTypeString() - - return call, nil + return strings.ToLower(stringArg), nil } // Convert string to Upper case func (n *callPlanExpression) EvaluateUpper(currentRow []interface{}) (interface{}, error) { - argOneEval, err := n.args[0].Evaluate(currentRow) + argEval, err := n.args[0].Evaluate(currentRow) if err != nil { return nil, err } - - stringArgOne, ok := argOneEval.(string) + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) if !ok { - return nil, sql3.NewErrInternalf("unexpected type converion %T", argOneEval) + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) } + // convert to Upper - res := strings.ToUpper(stringArgOne) - return fmt.Sprintf("%s", res), nil + return strings.ToUpper(stringArg), nil } -// Takes string, startIndex and length and returns the substring. -func (n *callPlanExpression) EvaluateSubstring(currentRow []interface{}) (interface{}, error) { +func (n *callPlanExpression) EvaluateChar(currentRow []interface{}) (interface{}, error) { + // Get the integer argument from the function call + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return 0, err + } + if argEval == nil { + return nil, nil + } + intArg, ok := argEval.(int64) + if !ok { + return 0, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } - argOneEval, err := n.args[0].Evaluate(currentRow) + // Return the character that corresponds to the integer value + return string(rune(intArg)), nil +} + +// this takes a string and returns the ascii value. +// sthe string should be of the length 1. +func (n *callPlanExpression) EvaluateAscii(currentRow []interface{}) (interface{}, error) { + // Get the string argument from the function call + argEval, err := n.args[0].Evaluate(currentRow) if err != nil { return nil, err } + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } - stringArgOne, ok := argOneEval.(string) + if len(stringArg) == 0 { + return "", nil + } + + if len(stringArg) != 1 { + return nil, sql3.NewErrStringLengthMismatch(0, 0, 1, stringArg) + } + + res := []rune(stringArg) + return int64(res[0]), nil +} + +// Takes string, startIndex and length and returns the substring. +func (n *callPlanExpression) EvaluateSubstring(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArgOne, ok := argEval.(string) if !ok { - return nil, sql3.NewErrInternalf("unexpected type converion %T", argOneEval) + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) } // this takes a sliding window approach to evaluate substring. - startIndex, err := strconv.Atoi(n.args[1].String()) + argEval, err = n.args[1].Evaluate(currentRow) if err != nil { - return nil, sql3.NewErrInternalf("unexpected type converion %T", n.args[1]) + return 0, err } - if startIndex >= len(stringArgOne) { - return "", nil + if argEval == nil { + return nil, nil + } + + startIndex, ok := argEval.(int64) + if !ok { + return 0, sql3.NewErrInternalf("unexpected type converion %T", argEval) } - endIndex := len(stringArgOne) + if startIndex < 0 || startIndex >= int64(len(stringArgOne)) { + return nil, sql3.NewErrValueOutOfRange(0, 0, startIndex) + } + + endIndex := int64(len(stringArgOne)) if len(n.args) > 2 { - ln, err := strconv.Atoi(n.args[2].String()) + argEval, err = n.args[2].Evaluate(currentRow) if err != nil { - return nil, sql3.NewErrInternalf("unexpected type converion %T", n.args[1]) + return 0, err + } + if argEval == nil { + return nil, nil + } + ln, ok := argEval.(int64) + if !ok { + return 0, sql3.NewErrInternalf("unexpected type converion %T", argEval) } endIndex = startIndex + ln } - if endIndex < 0 { - return "", nil + + if endIndex < startIndex || endIndex > int64(len(stringArgOne)) { + return nil, sql3.NewErrValueOutOfRange(0, 0, endIndex) + } + + return stringArgOne[startIndex:endIndex], nil +} + +// takes string, findstring, replacestring. +// replaces all occurances of findstring with replacestring +func (n *callPlanExpression) EvaluateReplaceAll(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArgOne, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + argEval, err = n.args[1].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArgTwo, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + argEval, err = n.args[2].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil } + stringArgThree, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + return strings.ReplaceAll(stringArgOne, stringArgTwo, stringArgThree), nil +} - if startIndex < 0 { - startIndex = 0 +// takes a string, seperator and the position `n`, splits the string and returns n'th substring +func (n *callPlanExpression) EvaluateStringSplit(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + inputString, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) } - if endIndex > len(stringArgOne) { - return stringArgOne[startIndex:], nil + argEval, err = n.args[1].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + seperator, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) } - return stringArgOne[startIndex:endIndex], nil + if len(n.args) == 2 { + return strings.Split(inputString, seperator)[0], nil + } + argEval, err = n.args[2].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + pos, ok := argEval.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + res := strings.Split(inputString, seperator) + if pos <= 0 { + return res[0], nil + } else if int64(len(res)) > pos { + return res[pos], nil + } + return "", nil +} + +// Execute Trim function to remove whitespaces from string +func (n *callPlanExpression) EvaluateTrim(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + // Trim the whitespace from string + return strings.TrimSpace(stringArg), nil +} + +// Execute RTrim function to remove trailing whitespaces from string +func (n *callPlanExpression) EvaluateRTrim(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + // Trim the trailing whitespace from string + return strings.TrimRight(stringArg, " "), nil +} + +// Execute LTrim function to remove leading whitespaces from string +func (n *callPlanExpression) EvaluateLTrim(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArg, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + // Trim the leading whitespace from string + return strings.TrimLeft(stringArg, " "), nil +} + +func (n *callPlanExpression) EvaluatePrefix(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArgOne, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + argEval, err = n.args[1].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + intArgTwo, ok := argEval.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + if intArgTwo < 0 || intArgTwo > int64(len(stringArgOne)) { + return nil, sql3.NewErrValueOutOfRange(0, 0, intArgTwo) + } + + return stringArgOne[:intArgTwo], nil +} + +func (n *callPlanExpression) EvaluateSuffix(currentRow []interface{}) (interface{}, error) { + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + stringArgOne, ok := argEval.(string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + argEval, err = n.args[1].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + intArgTwo, ok := argEval.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + if intArgTwo < 0 || intArgTwo > int64(len(stringArgOne)) { + return nil, sql3.NewErrValueOutOfRange(0, 0, intArgTwo) + } + + return stringArgOne[int64(len(stringArgOne))-intArgTwo:], nil +} + +func (n *callPlanExpression) EvaluateSpace(currentRow []interface{}) (interface{}, error) { + // Get the integer argument from the function call + argEval, err := n.args[0].Evaluate(currentRow) + if err != nil { + return nil, err + } + if argEval == nil { + return nil, nil + } + intArg, ok := argEval.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type converion %T", argEval) + } + + // Return a string containing a number of spaces equal to the integer value + spaces := "" + for i := int64(0); i < intArg; i++ { + spaces += " " + } + return spaces, nil } diff --git a/sql3/planner/memoryobj.go b/sql3/planner/memoryobj.go deleted file mode 100644 index 784127149..000000000 --- a/sql3/planner/memoryobj.go +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2022 Molecula Corp. All rights reserved. - -package planner - -import ( - "hash/maphash" - - "github.com/featurebasedb/featurebase/v3/sql3" - "github.com/featurebasedb/featurebase/v3/sql3/planner/types" -) - -var prototypeHash maphash.Hash - -// ObjectCache is a cache of interface{} values -type ObjectCache interface { - // Put a new value in the cache - PutObject(uint64, interface{}) error - - // Get the value with the given key - GetObject(uint64) (interface{}, error) - - // Size returns the number of values in the cache - Size() int -} - -// RowCache is a cache of rows used during row iteration -type RowCache interface { - Add(row types.Row) error - - // AllRows returns all rows. - AllRows() []types.Row -} - -// KeyedRowCache is a cache of keyed rows used during row iteration -type KeyedRowCache interface { - // Put adds row to the cache at the given key. - Put(key uint64, row types.Row) error - - // Get returns the rows specified by key. - Get(key uint64) (types.Row, error) - - // Size returns the number of rows in the cache. - Size() int -} - -// Ensure type implements interface -var _ KeyedRowCache = (*inMemoryKeyedRowCache)(nil) - -// default implementation of KeyedRowCache (in memory) -type inMemoryKeyedRowCache struct { - store map[uint64][]interface{} -} - -func newinMemoryKeyedRowCache() *inMemoryKeyedRowCache { - return &inMemoryKeyedRowCache{ - store: make(map[uint64][]interface{}), - } -} - -func (m inMemoryKeyedRowCache) Put(u uint64, i types.Row) error { - m.store[u] = i - return nil -} - -func (m inMemoryKeyedRowCache) Get(u uint64) (types.Row, error) { - return m.store[u], nil -} - -func (m inMemoryKeyedRowCache) Size() int { - return len(m.store) -} - -// Ensure type implements interface -var _ RowCache = (*inMemoryRowCache)(nil) - -type inMemoryRowCache struct { - rows []types.Row -} - -func newInMemoryRowCache() *inMemoryRowCache { - return &inMemoryRowCache{} -} - -func (c *inMemoryRowCache) Add(row types.Row) error { - c.rows = append(c.rows, row) - return nil -} - -func (c *inMemoryRowCache) AllRows() []types.Row { - return c.rows -} - -// Ensure type implements interface -var _ ObjectCache = (*mapObjectCache)(nil) - -// mapObjectCache is a simple in-memory implementation of a cache -type mapObjectCache struct { - cache map[uint64]interface{} -} - -func (m mapObjectCache) PutObject(u uint64, i interface{}) error { - m.cache[u] = i - return nil -} - -func (m mapObjectCache) GetObject(u uint64) (interface{}, error) { - v, ok := m.cache[u] - if !ok { - return nil, sql3.NewErrCacheKeyNotFound(u) - } - return v, nil -} - -func (m mapObjectCache) Size() int { - return len(m.cache) -} - -func NewMapObjectCache() mapObjectCache { - return mapObjectCache{ - cache: make(map[uint64]interface{}), - } -} diff --git a/sql3/planner/opbulkinsert.go b/sql3/planner/opbulkinsert.go index 0177ec615..ca36b6b54 100644 --- a/sql3/planner/opbulkinsert.go +++ b/sql3/planner/opbulkinsert.go @@ -41,6 +41,8 @@ type bulkInsertOptions struct { format string // whether the source has a header row hasHeaderRow bool + // whether we allow missing values for NDJSON jsonpath expressions + allowMissingValues bool // input specifier (FILE is the only one right now) input string @@ -74,11 +76,7 @@ func NewPlanOpBulkInsert(p *ExecutionPlanner, tableName string, options *bulkIns func (p *PlanOpBulkInsert) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc + result["_schema"] = p.Schema().Plan() result["tableName"] = p.tableName options := make(map[string]interface{}) @@ -88,6 +86,7 @@ func (p *PlanOpBulkInsert) Plan() map[string]interface{} { options["format"] = p.options.format options["input"] = p.options.input options["hasHeaderRow"] = p.options.hasHeaderRow + options["allowMissingValues"] = p.options.allowMissingValues colMap := make([]interface{}, 0) for _, m := range p.options.targetColumns { @@ -279,10 +278,14 @@ func (i *bulkInsertSourceCSVRowIter) Next(ctx context.Context) (types.Row, error result[idx] = intVal case *parser.DataTypeIDSet: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, evalValue, mapColumn.colType.TypeDescription()) + intVal, err := strconv.ParseInt(evalValue, 10, 64) + if err != nil { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, evalValue, mapColumn.colType.TypeDescription()) + } + result[idx] = []int64{intVal} case *parser.DataTypeStringSet: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, evalValue, mapColumn.colType.TypeDescription()) + result[idx] = []string{evalValue} case *parser.DataTypeTimestamp: intVal, err := strconv.ParseInt(evalValue, 10, 64) @@ -457,258 +460,280 @@ func (i *bulkInsertSourceNDJsonRowIter) Next(ctx context.Context) (types.Row, er } } - if i.reader.Scan() { - if err := i.reader.Err(); err != nil { - return nil, err - } - - jsonValue := i.reader.Text() - - // now we do the mapping to the output row - result := make([]interface{}, len(i.options.mapExpressions)) - - // parse the json - v := interface{}(nil) - err := json.Unmarshal([]byte(jsonValue), &v) + for { + if i.reader.Scan() { + if err := i.reader.Err(); err != nil { + return nil, err + } - if err != nil { - return nil, sql3.NewErrParsingJSON(0, 0, jsonValue, err.Error()) - } + jsonValue := i.reader.Text() + jsonValue = strings.TrimSpace(jsonValue) + if len(jsonValue) == 0 { + continue + } - // type check against the output type of the map operation + // now we do the mapping to the output row + result := make([]interface{}, len(i.options.mapExpressions)) - for idx, expr := range i.pathExpressions { + // parse the json + v := interface{}(nil) + err := json.Unmarshal([]byte(jsonValue), &v) - evalValue, err := expr(ctx, v) if err != nil { - return nil, sql3.NewErrEvaluatingJSONPathExpr(0, 0, i.mapExpressionResults[idx], jsonValue, err.Error()) + return nil, sql3.NewErrParsingJSON(0, 0, jsonValue, err.Error()) } - // if nil (null) then return nil - if evalValue == nil { - result[idx] = nil - continue - } + // type check against the output type of the map operation - mapColumn := i.options.mapExpressions[idx] - switch mapColumn.colType.(type) { - case *parser.DataTypeID, *parser.DataTypeInt: + for idx, expr := range i.pathExpressions { - switch v := evalValue.(type) { - case float64: - // if v is a whole number then make it an int - if v == float64(int64(v)) { - result[idx] = int64(v) + evalValue, err := expr(ctx, v) + if err != nil { + if i.options.allowMissingValues && (strings.HasPrefix(err.Error(), "unknown key") || strings.HasPrefix(err.Error(), "unknown parameter")) { + evalValue = nil } else { - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + return nil, sql3.NewErrEvaluatingJSONPathExpr(0, 0, i.mapExpressionResults[idx], jsonValue, err.Error()) } + } - case []interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + // if nil (null) then return nil + if evalValue == nil { + result[idx] = nil + continue + } - case string: - intVal, err := strconv.ParseInt(v, 10, 64) - if err != nil { - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - } - result[idx] = intVal + mapColumn := i.options.mapExpressions[idx] + switch mapColumn.colType.(type) { + case *parser.DataTypeID, *parser.DataTypeInt: - case bool: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + switch v := evalValue.(type) { + case float64: + // if v is a whole number then make it an int + if v == float64(int64(v)) { + result[idx] = int64(v) + } else { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case []interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) - } + case string: + intVal, err := strconv.ParseInt(v, 10, 64) + if err != nil { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + result[idx] = intVal - case *parser.DataTypeIDSet: - switch v := evalValue.(type) { - case float64: - // if v is a whole number then make it an int, and then turn that into an idset - if v == float64(int64(v)) { - result[idx] = []int64{int64(v)} - } else { + case bool: return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + + case interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) } - case []interface{}: - setValue := make([]int64, 0) - for _, i := range v { - f, ok := i.(float64) - if !ok { - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - } - if f == float64(int64(f)) { - setValue = append(setValue, int64(f)) + case *parser.DataTypeIDSet: + switch v := evalValue.(type) { + case float64: + // if v is a whole number then make it an int, and then turn that into an idset + if v == float64(int64(v)) { + result[idx] = []int64{int64(v)} } else { return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) } - } - result[idx] = setValue - case string: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - - case bool: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case []interface{}: + setValue := make([]int64, 0) + for _, i := range v { + switch v := i.(type) { + case float64: + if v == float64(int64(v)) { + setValue = append(setValue, int64(v)) + } else { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + case string: + intVal, err := strconv.ParseInt(v, 10, 64) + if err != nil { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + setValue = append(setValue, int64(intVal)) + + default: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + } + result[idx] = setValue - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case string: + intVal, err := strconv.ParseInt(v, 10, 64) + if err != nil { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + result[idx] = []int64{int64(intVal)} - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) - } + case bool: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case *parser.DataTypeStringSet: - switch v := evalValue.(type) { - case float64: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case []interface{}: - setValue := make([]string, 0) - for _, i := range v { - f, ok := i.(string) - if !ok { - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - } - setValue = append(setValue, f) + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) } - result[idx] = setValue - case string: - result[idx] = []string{v} + case *parser.DataTypeStringSet: + switch v := evalValue.(type) { + case float64: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case bool: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case []interface{}: + setValue := make([]string, 0) + for _, i := range v { + f, ok := i.(string) + if !ok { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + setValue = append(setValue, f) + } + result[idx] = setValue - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case string: + result[idx] = []string{v} - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) - } + case bool: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case *parser.DataTypeTimestamp: - switch v := evalValue.(type) { - case float64: - // if v is a whole number then make it an int - if v == float64(int64(v)) { - result[idx] = time.UnixMilli(int64(v)).UTC() - } else { + case interface{}: return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) } - case []interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case *parser.DataTypeTimestamp: + switch v := evalValue.(type) { + case float64: + // if v is a whole number then make it an int + if v == float64(int64(v)) { + result[idx] = time.UnixMilli(int64(v)).UTC() + } else { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } - case string: - if tm, err := time.ParseInLocation(time.RFC3339Nano, v, time.UTC); err == nil { - result[idx] = tm - } else if tm, err := time.ParseInLocation(time.RFC3339, v, time.UTC); err == nil { - result[idx] = tm - } else if tm, err := time.ParseInLocation("2006-01-02", v, time.UTC); err == nil { - result[idx] = tm - } else { + case []interface{}: return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - } - case bool: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case string: + if tm, err := time.ParseInLocation(time.RFC3339Nano, v, time.UTC); err == nil { + result[idx] = tm + } else if tm, err := time.ParseInLocation(time.RFC3339, v, time.UTC); err == nil { + result[idx] = tm + } else if tm, err := time.ParseInLocation("2006-01-02", v, time.UTC); err == nil { + result[idx] = tm + } else { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case bool: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) - } + case interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case *parser.DataTypeString: - switch v := evalValue.(type) { - case float64: - // if a whole number make it an int - if v == float64(int64(v)) { - result[idx] = fmt.Sprintf("%d", int64(v)) - } else { - result[idx] = fmt.Sprintf("%f", v) + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) } - case []interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - - case string: - result[idx] = v + case *parser.DataTypeString: + switch v := evalValue.(type) { + case float64: + // if a whole number make it an int + if v == float64(int64(v)) { + result[idx] = fmt.Sprintf("%d", int64(v)) + } else { + result[idx] = fmt.Sprintf("%f", v) + } - case bool: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case []interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case string: + result[idx] = v - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) - } + case bool: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case *parser.DataTypeBool: - switch v := evalValue.(type) { - case float64: - // if a whole number make it an int, and convert to a bool - if v == float64(int64(v)) { - result[idx] = v > 0 - } else { + case interface{}: return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) } - case []interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case *parser.DataTypeBool: + switch v := evalValue.(type) { + case float64: + // if a whole number make it an int, and convert to a bool + if v == float64(int64(v)) { + result[idx] = v > 0 + } else { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } - case string: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case []interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case bool: - result[idx] = v + case string: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case bool: + result[idx] = v - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) - } + case interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case *parser.DataTypeDecimal: - switch v := evalValue.(type) { - case float64: - result[idx] = pql.FromFloat64(v) + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) + } - case []interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case *parser.DataTypeDecimal: + switch v := evalValue.(type) { + case float64: + result[idx] = pql.FromFloat64(v) - case string: - // try to parse from a string - dv, err := pql.ParseDecimal(v) - if err != nil { + case []interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + + case string: + // try to parse from a string + dv, err := pql.ParseDecimal(v) + if err != nil { + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + } + result[idx] = dv + + case bool: return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - } - result[idx] = dv - case bool: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + case interface{}: + return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) - case interface{}: - return nil, sql3.NewErrTypeConversionOnMap(0, 0, v, mapColumn.colType.TypeDescription()) + default: + return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) + } default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", evalValue) + return nil, sql3.NewErrInternalf("unhandled type '%T'", mapColumn.colType) } - - default: - return nil, sql3.NewErrInternalf("unhandled type '%T'", mapColumn.colType) } + return result, nil } - return result, nil + return nil, types.ErrNoMoreRows } - return nil, types.ErrNoMoreRows } func (i *bulkInsertSourceNDJsonRowIter) Close(ctx context.Context) { diff --git a/sql3/planner/opcreatetable.go b/sql3/planner/opcreatetable.go index e16e7e76b..0409d5176 100644 --- a/sql3/planner/opcreatetable.go +++ b/sql3/planner/opcreatetable.go @@ -8,6 +8,7 @@ import ( pilosa "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/sql3" "github.com/featurebasedb/featurebase/v3/sql3/planner/types" "github.com/pkg/errors" ) @@ -41,11 +42,6 @@ func NewPlanOpCreateTable(p *ExecutionPlanner, tableName string, failIfExists bo func (p *PlanOpCreateTable) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps result["name"] = p.tableName result["failIfExists"] = p.failIfExists return result @@ -135,7 +131,7 @@ func (i *createTableRowIter) Next(ctx context.Context) (types.Row, error) { if err := i.planner.schemaAPI.CreateTable(ctx, tbl); err != nil { if _, ok := errors.Cause(err).(pilosa.ConflictError); ok { if i.failIfExists { - return nil, err + return nil, sql3.NewErrTableExists(0, 0, i.tableName) } } else { return nil, err diff --git a/sql3/planner/opdistinct.go b/sql3/planner/opdistinct.go index 288ff6346..e3715ac61 100644 --- a/sql3/planner/opdistinct.go +++ b/sql3/planner/opdistinct.go @@ -3,39 +3,166 @@ package planner import ( + "bytes" + "context" "fmt" + "github.com/featurebasedb/featurebase/v3/bufferpool" + "github.com/featurebasedb/featurebase/v3/extendiblehash" + "github.com/featurebasedb/featurebase/v3/sql3" "github.com/featurebasedb/featurebase/v3/sql3/planner/types" ) // PlanOpDistinct plan operator handles DISTINCT +// DISTINCT returns unique rows from its iterator and does this by +// creating a hash table and probing new rows against that hash table, +// if the row has already been seen, it is skipped, it it has not been +// seen, a 'key' is created from all the values in the row and this is +// inserted into the hash table. +// The hash table is implemented using Extendible Hashing and is backed +// by a buffer pool. The buffer pool is allocated to 128 pages (or 1Mb) +// and the disk manager used by the buffer pool will use an in-memory +// implementation up to 128 pages and thereafter spill to disk type PlanOpDistinct struct { planner *ExecutionPlanner - source types.PlanOperator + ChildOp types.PlanOperator warnings []string } -func NewPlanOpDistinct(p *ExecutionPlanner, source types.PlanOperator) *PlanOpDistinct { +func NewPlanOpDistinct(p *ExecutionPlanner, child types.PlanOperator) *PlanOpDistinct { return &PlanOpDistinct{ planner: p, - source: source, + ChildOp: child, warnings: make([]string, 0), } } -func (n *PlanOpDistinct) Plan() map[string]interface{} { +func (p *PlanOpDistinct) Schema() types.Schema { + return p.ChildOp.Schema() +} + +func (p *PlanOpDistinct) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { + i, err := p.ChildOp.Iterator(ctx, row) + if err != nil { + return nil, err + } + return newDistinctIterator(p.Schema(), i), nil +} + +func (p *PlanOpDistinct) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { + if len(children) != 1 { + return nil, sql3.NewErrInternalf("unexpected number of children '%d'", len(children)) + } + return NewPlanOpDistinct(p.planner, children[0]), nil +} + +func (p *PlanOpDistinct) Children() []types.PlanOperator { + return []types.PlanOperator{ + p.ChildOp, + } +} + +func (p *PlanOpDistinct) Plan() map[string]interface{} { result := make(map[string]interface{}) - result["_op"] = fmt.Sprintf("%T", n) + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() + result["child"] = p.ChildOp.Plan() return result } -func (n *PlanOpDistinct) AddWarning(warning string) { - n.warnings = append(n.warnings, warning) +func (p *PlanOpDistinct) String() string { + return "" +} + +func (p *PlanOpDistinct) AddWarning(warning string) { + p.warnings = append(p.warnings, warning) } -func (n *PlanOpDistinct) Warnings() []string { +func (p *PlanOpDistinct) Warnings() []string { var w []string - w = append(w, n.warnings...) - w = append(w, n.source.Warnings()...) + w = append(w, p.warnings...) + w = append(w, p.ChildOp.Warnings()...) return w } + +type distinctIterator struct { + child types.RowIterator + schema types.Schema + hasStarted *struct{} + hashTable *extendiblehash.ExtendibleHashTable +} + +func newDistinctIterator(schema types.Schema, child types.RowIterator) *distinctIterator { + return &distinctIterator{ + schema: schema, + child: child, + } +} + +func (i *distinctIterator) rowSeen(ctx context.Context, row types.Row) (bool, error) { + keyBytes := generateRowKey(row) + _, found, err := i.hashTable.Get(keyBytes) + if err != nil { + return false, nil + } + // put the row in the hash table to recored that we've seen it + if !found { + i.hashTable.Put(keyBytes, []byte{1}) + } + return found, nil +} + +func (i *distinctIterator) Next(ctx context.Context) (types.Row, error) { + if i.hasStarted == nil { + //create the hashtable + + // ask the diskmanager to spill after 1Mb (128 8K pages) + diskManager := bufferpool.NewInMemDiskSpillingDiskManager(128) + // use 1Mb (128 8K pages) + bufferPool := bufferpool.NewBufferPool(128, diskManager) + + // we're going to use something pretty conservative here - we could have sets, strings, ... all sorts of stuff + // and we don't yet support long key overflowing + keyLength := 128 // bytes + + valueLength := 1 // we're going to store a 1 (byte) for every key in the table + + ht, err := extendiblehash.NewExtendibleHashTable(keyLength, valueLength, bufferPool) + if err != nil { + return nil, err + } + i.hashTable = ht + i.hasStarted = &struct{}{} + } + + for { + row, err := i.child.Next(ctx) + if err != nil { + // clean up + // TODO(pok) - we need to move clean up to higher level, and + // implement at the operator level + if err == types.ErrNoMoreRows { + i.hashTable.Close() + } + return nil, err + } + // does row exist in hash table + seen, err := i.rowSeen(ctx, row) + if err != nil { + return nil, err + } + // if we've seen it before, go to the next row + if seen { + continue + } + return row, nil + } +} + +func generateRowKey(row types.Row) []byte { + var buf bytes.Buffer + for _, v := range row { + buf.WriteString(fmt.Sprintf("%#v", v)) + } + return buf.Bytes() +} diff --git a/sql3/planner/opdroptable.go b/sql3/planner/opdroptable.go index a25891ea5..f611478ea 100644 --- a/sql3/planner/opdroptable.go +++ b/sql3/planner/opdroptable.go @@ -29,11 +29,6 @@ func NewPlanOpDropTable(p *ExecutionPlanner, index *pilosa.IndexInfo) *PlanOpDro func (p *PlanOpDropTable) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps result["tableName"] = p.index.Name return result } diff --git a/sql3/planner/opfeaturebasecolumns.go b/sql3/planner/opfeaturebasecolumns.go index 9e8aa23c3..d0d73b5ee 100644 --- a/sql3/planner/opfeaturebasecolumns.go +++ b/sql3/planner/opfeaturebasecolumns.go @@ -29,11 +29,7 @@ func NewPlanOpFeatureBaseColumns(tbl *dax.Table) *PlanOpFeatureBaseColumns { func (p *PlanOpFeatureBaseColumns) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() return result } diff --git a/sql3/planner/opfeaturebasetables.go b/sql3/planner/opfeaturebasetables.go index 64373af16..59b03375d 100644 --- a/sql3/planner/opfeaturebasetables.go +++ b/sql3/planner/opfeaturebasetables.go @@ -5,6 +5,7 @@ package planner import ( "context" "fmt" + "strings" "time" pilosa "github.com/featurebasedb/featurebase/v3" @@ -15,12 +16,14 @@ import ( // PlanOpFeatureBaseTables wraps a []*IndexInfo that is returned from // schemaAPI.Schema(). type PlanOpFeatureBaseTables struct { + planner *ExecutionPlanner indexInfo []*pilosa.IndexInfo warnings []string } -func NewPlanOpFeatureBaseTables(indexInfo []*pilosa.IndexInfo) *PlanOpFeatureBaseTables { +func NewPlanOpFeatureBaseTables(planner *ExecutionPlanner, indexInfo []*pilosa.IndexInfo) *PlanOpFeatureBaseTables { return &PlanOpFeatureBaseTables{ + planner: planner, indexInfo: indexInfo, warnings: make([]string, 0), } @@ -29,11 +32,7 @@ func NewPlanOpFeatureBaseTables(indexInfo []*pilosa.IndexInfo) *PlanOpFeatureBas func (p *PlanOpFeatureBaseTables) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() return result } @@ -68,7 +67,7 @@ func (p *PlanOpFeatureBaseTables) Schema() types.Schema { }, &types.PlannerColumn{ RelationName: "fb_tables", - ColumnName: "last_updated_user", + ColumnName: "updated_by", Type: parser.NewDataTypeString(), }, &types.PlannerColumn{ @@ -78,8 +77,8 @@ func (p *PlanOpFeatureBaseTables) Schema() types.Schema { }, &types.PlannerColumn{ RelationName: "fb_tables", - ColumnName: "track_existence", - Type: parser.NewDataTypeBool(), + ColumnName: "updated_at", + Type: parser.NewDataTypeTimestamp(), }, &types.PlannerColumn{ RelationName: "fb_tables", @@ -88,7 +87,7 @@ func (p *PlanOpFeatureBaseTables) Schema() types.Schema { }, &types.PlannerColumn{ RelationName: "fb_tables", - ColumnName: "shard_width", + ColumnName: "space_used", Type: parser.NewDataTypeInt(), }, &types.PlannerColumn{ @@ -105,15 +104,17 @@ func (p *PlanOpFeatureBaseTables) Children() []types.PlanOperator { func (p *PlanOpFeatureBaseTables) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { return &showTablesRowIter{ + planner: p.planner, indexInfo: p.indexInfo, }, nil } func (p *PlanOpFeatureBaseTables) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { - return NewPlanOpFeatureBaseTables(p.indexInfo), nil + return NewPlanOpFeatureBaseTables(p.planner, p.indexInfo), nil } type showTablesRowIter struct { + planner *ExecutionPlanner indexInfo []*pilosa.IndexInfo rowIndex int } @@ -122,16 +123,37 @@ var _ types.RowIterator = (*showTablesRowIter)(nil) func (i *showTablesRowIter) Next(ctx context.Context) (types.Row, error) { if i.rowIndex < len(i.indexInfo) { - tm := time.Unix(0, i.indexInfo[i.rowIndex].CreatedAt) + + indexName := i.indexInfo[i.rowIndex].Name + + var err error + var spaceUsed pilosa.DiskUsage + switch strings.ToLower(indexName) { + case "fb_cluster_info", "fb_cluster_nodes", "fb_performance_counters", "fb_exec_requests", "fb_table_ddl": + spaceUsed = pilosa.DiskUsage{ + Usage: 0, + } + default: + u := i.planner.systemAPI.DataDir() + u = fmt.Sprintf("%s/indexes/%s", u, indexName) + + spaceUsed, err = pilosa.GetDiskUsage(u) + if err != nil { + return nil, err + } + } + + createdAt := time.Unix(0, i.indexInfo[i.rowIndex].CreatedAt) + updatedAt := time.Unix(0, i.indexInfo[i.rowIndex].UpdatedAt) row := []interface{}{ - i.indexInfo[i.rowIndex].Name, - i.indexInfo[i.rowIndex].Name, + indexName, + indexName, i.indexInfo[i.rowIndex].Owner, i.indexInfo[i.rowIndex].LastUpdateUser, - tm.Format(time.RFC3339), - i.indexInfo[i.rowIndex].Options.TrackExistence, + createdAt.Format(time.RFC3339), + updatedAt.Format(time.RFC3339), i.indexInfo[i.rowIndex].Options.Keys, - i.indexInfo[i.rowIndex].ShardWidth, + spaceUsed.Usage, i.indexInfo[i.rowIndex].Options.Description, } i.rowIndex += 1 diff --git a/sql3/planner/opfilter.go b/sql3/planner/opfilter.go index b4329122f..37a249f7b 100644 --- a/sql3/planner/opfilter.go +++ b/sql3/planner/opfilter.go @@ -56,11 +56,7 @@ func (p *PlanOpFilter) Children() []types.PlanOperator { func (p *PlanOpFilter) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() result["predicate"] = p.Predicate.Plan() result["child"] = p.ChildOp.Plan() return result @@ -91,8 +87,7 @@ func (p *PlanOpFilter) WithUpdatedExpressions(exprs ...types.PlanExpression) (ty if len(exprs) != 1 { return nil, sql3.NewErrInternalf("unexpected number of exprs '%d'", len(exprs)) } - p.Predicate = exprs[0] - return p, nil + return NewPlanOpFilter(p.planner, exprs[0], p.ChildOp), nil } type filterIterator struct { diff --git a/sql3/planner/opgroupby.go b/sql3/planner/opgroupby.go index aac4c811b..6263cfd11 100644 --- a/sql3/planner/opgroupby.go +++ b/sql3/planner/opgroupby.go @@ -3,11 +3,10 @@ package planner import ( + "bytes" "context" "fmt" - "hash/maphash" - "github.com/featurebasedb/featurebase/v3/errors" "github.com/featurebasedb/featurebase/v3/sql3" "github.com/featurebasedb/featurebase/v3/sql3/planner/types" ) @@ -100,11 +99,7 @@ func (p *PlanOpGroupBy) WithUpdatedExpressions(exprs ...types.PlanExpression) (t func (p *PlanOpGroupBy) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc + result["_schema"] = p.Schema().Plan() result["child"] = p.ChildOp.Plan() ps := make([]interface{}, 0) for _, e := range p.Aggregates { @@ -193,8 +188,8 @@ type keysAndAggregations struct { type groupByGroupingIter struct { aggregates []types.PlanExpression groupByExprs []types.PlanExpression - aggregations ObjectCache - keys []uint64 + aggregations map[string]*keysAndAggregations + keys []string child types.RowIterator } @@ -208,16 +203,16 @@ func newGroupByGroupingIter(ctx context.Context, aggregates, groupByExprs []type func (i *groupByGroupingIter) Next(ctx context.Context) (types.Row, error) { if i.aggregations == nil { - i.aggregations = NewMapObjectCache() + i.aggregations = make(map[string]*keysAndAggregations) if err := i.compute(ctx); err != nil { return nil, err } } if len(i.keys) > 0 { - buffers, err := i.get(i.keys[0]) - if err != nil { - return nil, err + buffers, ok := i.aggregations[i.keys[0]] + if !ok { + return nil, sql3.NewErrInternalf("unexpected absence of key") } i.keys = i.keys[1:] @@ -245,13 +240,13 @@ func (i *groupByGroupingIter) compute(ctx context.Context) error { return err } - key, keyValues, err := groupingKeyHash(ctx, i.groupByExprs, row) + key, keyValues, err := groupingKey(ctx, i.groupByExprs, row) if err != nil { return err } - b, err := i.get(key) - if errors.Is(err, sql3.ErrCacheKeyNotFound) { + b, ok := i.aggregations[key] + if !ok { b = &keysAndAggregations{} b.buffers = make([]types.AggregationBuffer, len(i.aggregates)) for j, a := range i.aggregates { @@ -261,9 +256,7 @@ func (i *groupByGroupingIter) compute(ctx context.Context) error { } } b.groupByKeys = keyValues - if err := i.aggregations.PutObject(key, b); err != nil { - return err - } + i.aggregations[key] = b i.keys = append(i.keys, key) } else if err != nil { return err @@ -277,17 +270,6 @@ func (i *groupByGroupingIter) compute(ctx context.Context) error { return nil } -func (i *groupByGroupingIter) get(key uint64) (*keysAndAggregations, error) { - v, err := i.aggregations.GetObject(key) - if err != nil { - return nil, err - } - if v == nil { - return nil, nil - } - return v.(*keysAndAggregations), err -} - func newAggregationBuffer(expr types.PlanExpression) (types.AggregationBuffer, error) { switch n := expr.(type) { case types.Aggregable: @@ -318,21 +300,16 @@ func evalBuffers(ctx context.Context, aggregationBuffers *keysAndAggregations) ( return row, nil } -func groupingKeyHash(ctx context.Context, groupByExprs []types.PlanExpression, row types.Row) (uint64, types.Row, error) { +func groupingKey(ctx context.Context, groupByExprs []types.PlanExpression, row types.Row) (string, types.Row, error) { + var buf bytes.Buffer rowKeys := make([]interface{}, len(groupByExprs)) - var hash maphash.Hash - hash.SetSeed(prototypeHash.Seed()) for i, expr := range groupByExprs { v, err := expr.Evaluate(row) if err != nil { - return 0, nil, err - } - _, err = hash.Write(([]byte)(fmt.Sprintf("%#v,", v))) - if err != nil { - return 0, nil, err + return "", nil, err } + buf.WriteString(fmt.Sprintf("%#v", v)) rowKeys[i] = v } - result := hash.Sum64() - return result, rowKeys, nil + return buf.String(), rowKeys, nil } diff --git a/sql3/planner/ophaving.go b/sql3/planner/ophaving.go new file mode 100644 index 000000000..a4f71a54c --- /dev/null +++ b/sql3/planner/ophaving.go @@ -0,0 +1,91 @@ +// Copyright 2022 Molecula Corp. All rights reserved. + +package planner + +import ( + "context" + "fmt" + + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/planner/types" +) + +// PlanOpHaving is a filter operator for the HAVING clause +type PlanOpHaving struct { + planner *ExecutionPlanner + ChildOp types.PlanOperator + Predicate types.PlanExpression + + warnings []string +} + +func NewPlanOpHaving(planner *ExecutionPlanner, predicate types.PlanExpression, child types.PlanOperator) *PlanOpHaving { + return &PlanOpHaving{ + planner: planner, + Predicate: predicate, + ChildOp: child, + warnings: make([]string, 0), + } +} + +func (p *PlanOpHaving) Schema() types.Schema { + return p.ChildOp.Schema() +} + +func (p *PlanOpHaving) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { + i, err := p.ChildOp.Iterator(ctx, row) + if err != nil { + return nil, err + } + return newFilterIterator(ctx, p.Predicate, i), nil +} + +func (p *PlanOpHaving) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { + if len(children) != 1 { + return nil, sql3.NewErrInternalf("unexpected number of children '%d'", len(children)) + } + return NewPlanOpHaving(p.planner, p.Predicate, children[0]), nil +} + +func (p *PlanOpHaving) Children() []types.PlanOperator { + return []types.PlanOperator{ + p.ChildOp, + } +} + +func (p *PlanOpHaving) Plan() map[string]interface{} { + result := make(map[string]interface{}) + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() + result["predicate"] = p.Predicate.Plan() + result["child"] = p.ChildOp.Plan() + return result +} + +func (p *PlanOpHaving) String() string { + return "" +} + +func (p *PlanOpHaving) AddWarning(warning string) { + p.warnings = append(p.warnings, warning) +} + +func (p *PlanOpHaving) Warnings() []string { + return p.warnings +} + +func (p *PlanOpHaving) Expressions() []types.PlanExpression { + if p.Predicate != nil { + return []types.PlanExpression{ + p.Predicate, + } + } + return []types.PlanExpression{} +} + +func (p *PlanOpHaving) WithUpdatedExpressions(exprs ...types.PlanExpression) (types.PlanOperator, error) { + if len(exprs) != 1 { + return nil, sql3.NewErrInternalf("unexpected number of exprs '%d'", len(exprs)) + } + return NewPlanOpHaving(p.planner, exprs[0], p.ChildOp), nil +} diff --git a/sql3/planner/opinsert.go b/sql3/planner/opinsert.go index 5819bb2fe..33c20981d 100644 --- a/sql3/planner/opinsert.go +++ b/sql3/planner/opinsert.go @@ -39,26 +39,14 @@ func NewPlanOpInsert(p *ExecutionPlanner, tableName string, targetColumns []*qua func (p *PlanOpInsert) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc + result["_schema"] = p.Schema().Plan() result["tableName"] = p.tableName ps := make([]interface{}, 0) for _, e := range p.targetColumns { ps = append(ps, e.Plan()) } result["targetColumns"] = ps - pps := make([]interface{}, 0) - for _, tuple := range p.insertValues { - ps := make([]interface{}, 0) - for _, e := range tuple { - ps = append(ps, e.Plan()) - } - pps = append(pps, ps) - } - result["insertValues"] = pps + result["insertTupleCount"] = len(p.insertValues) return result } @@ -180,6 +168,9 @@ func (i *insertRowIter) Next(ctx context.Context) (types.Row, error) { return nil, errors.Wrap(err, "setting up batch") } + var qbatchTime fbbatch.QuantizedTime + qbatchTime.Set(time.Now().UTC()) + // row is the single instance of batch.Row allocated. It is re-used // throughout the for loop to minimize memory allocation. var row fbbatch.Row @@ -261,6 +252,22 @@ func (i *insertRowIter) Next(ctx context.Context) (types.Row, error) { row.Values[posVals[idx]] = eval } + case pilosa.FieldTypeTime: + row.Time = qbatchTime + switch v := eval.(type) { + case []int64: + uint64s := make([]uint64, len(v)) + for i := range v { + if v[i] < 0 { + return nil, sql3.NewErrInternalf("converting negative slice value to uint64: %d", v[i]) + } + uint64s[i] = uint64(v[i]) + } + row.Values[posVals[idx]] = uint64s + default: + row.Values[posVals[idx]] = eval + } + case pilosa.FieldTypeInt: if eval != nil { v, ok := eval.(int64) diff --git a/sql3/planner/opnestedloops.go b/sql3/planner/opnestedloops.go index 7fe91dcca..71df5894b 100644 --- a/sql3/planner/opnestedloops.go +++ b/sql3/planner/opnestedloops.go @@ -31,11 +31,7 @@ func NewPlanOpNestedLoops(top, bottom types.PlanOperator, condition types.PlanEx func (p *PlanOpNestedLoops) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() result["top"] = p.top.Plan() result["bottom"] = p.bottom.Plan() result["condition"] = p.cond.Plan() diff --git a/sql3/planner/opnulltable.go b/sql3/planner/opnulltable.go index f6a861106..304167135 100644 --- a/sql3/planner/opnulltable.go +++ b/sql3/planner/opnulltable.go @@ -40,11 +40,7 @@ func (p *PlanOpNullTable) Children() []types.PlanOperator { func (p *PlanOpNullTable) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() return result } diff --git a/sql3/planner/oporderby.go b/sql3/planner/oporderby.go index c9a6b6c9b..b11c431a1 100644 --- a/sql3/planner/oporderby.go +++ b/sql3/planner/oporderby.go @@ -86,11 +86,7 @@ func (n *PlanOpOrderBy) String() string { func (n *PlanOpOrderBy) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", n) - sc := make([]string, 0) - for _, e := range n.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc + result["_schema"] = n.Schema().Plan() result["child"] = n.ChildOp.Plan() ps := make([]interface{}, 0) @@ -150,7 +146,7 @@ func (i *orderByIter) Next(ctx context.Context) (types.Row, error) { } func (i *orderByIter) computeOrderByRows(ctx context.Context) error { - cache := newInMemoryRowCache() + cache := make([]types.Row, 0) for { row, err := i.childIter.Next(ctx) @@ -162,15 +158,12 @@ func (i *orderByIter) computeOrderByRows(ctx context.Context) error { return err } - if err := cache.Add(row); err != nil { - return err - } + cache = append(cache, row) } - rows := cache.AllRows() sorter := &OrderBySorter{ SortFields: i.s.orderByFields, - Rows: rows, + Rows: cache, LastError: nil, Ctx: ctx, } @@ -178,7 +171,7 @@ func (i *orderByIter) computeOrderByRows(ctx context.Context) error { if sorter.LastError != nil { return sorter.LastError } - i.sortedRows = rows + i.sortedRows = cache return nil } diff --git a/sql3/planner/oppqlaggregate.go b/sql3/planner/oppqlaggregate.go index 0867ce24f..d1c85daf8 100644 --- a/sql3/planner/oppqlaggregate.go +++ b/sql3/planner/oppqlaggregate.go @@ -7,6 +7,7 @@ import ( "fmt" pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/pql" "github.com/featurebasedb/featurebase/v3/sql3" "github.com/featurebasedb/featurebase/v3/sql3/parser" @@ -36,11 +37,7 @@ func NewPlanOpPQLAggregate(p *ExecutionPlanner, tableName string, aggregate type func (p *PlanOpPQLAggregate) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() result["tableName"] = p.tableName if p.filter != nil { result["filter"] = p.filter.Plan() @@ -251,7 +248,12 @@ func (i *pqlAggregateRowIter) Next(ctx context.Context) (types.Row, error) { return nil, sql3.NewErrInternalf("unhandled aggregate type '%d'", i.aggregate.AggType()) } - queryResponse, err := i.planner.executor.Execute(ctx, i.tableName, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + tbl, err := i.planner.schemaAPI.TableByName(ctx, dax.TableName(i.tableName)) + if err != nil { + return nil, sql3.NewErrTableNotFound(0, 0, i.tableName) + } + + queryResponse, err := i.planner.executor.Execute(ctx, tbl, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) if err != nil { return nil, err } diff --git a/sql3/planner/oppqldelete.go b/sql3/planner/oppqldelete.go new file mode 100644 index 000000000..b4a6e40d8 --- /dev/null +++ b/sql3/planner/oppqldelete.go @@ -0,0 +1,171 @@ +// Copyright 2022 Molecula Corp. All rights reserved. + +package planner + +import ( + "context" + "fmt" + + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/pql" + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/parser" + "github.com/featurebasedb/featurebase/v3/sql3/planner/types" +) + +// PlanOpPQLConstRowDelete plan operator to delete rows from a table based on a single key. +type PlanOpPQLConstRowDelete struct { + planner *ExecutionPlanner + ChildOp types.PlanOperator + tableName string + warnings []string +} + +func NewPlanOpPQLConstRowDelete(p *ExecutionPlanner, tableName string, child types.PlanOperator) *PlanOpPQLConstRowDelete { + return &PlanOpPQLConstRowDelete{ + planner: p, + ChildOp: child, + tableName: tableName, + warnings: make([]string, 0), + } +} + +func (p *PlanOpPQLConstRowDelete) Plan() map[string]interface{} { + result := make(map[string]interface{}) + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() + result["child"] = p.ChildOp.Plan() + result["tableName"] = p.tableName + return result +} + +func (p *PlanOpPQLConstRowDelete) String() string { + return "" +} + +func (p *PlanOpPQLConstRowDelete) AddWarning(warning string) { + p.warnings = append(p.warnings, warning) +} + +func (p *PlanOpPQLConstRowDelete) Warnings() []string { + return p.warnings +} + +func (p *PlanOpPQLConstRowDelete) Schema() types.Schema { + return types.Schema{} +} + +func (p *PlanOpPQLConstRowDelete) Children() []types.PlanOperator { + return []types.PlanOperator{ + p.ChildOp, + } +} + +func (p *PlanOpPQLConstRowDelete) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { + childIter, err := p.ChildOp.Iterator(ctx, row) + if err != nil { + return nil, err + } + + return &constRowDeleteRowIter{ + planner: p.planner, + childIter: childIter, + tableName: p.tableName, + }, nil +} + +func (p *PlanOpPQLConstRowDelete) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { + if len(children) != 1 { + return nil, sql3.NewErrInternalf("unexpected number of children '%d'", len(children)) + } + return NewPlanOpPQLConstRowDelete(p.planner, p.tableName, children[0]), nil +} + +func (p *PlanOpPQLConstRowDelete) Expressions() []types.PlanExpression { + // since we have to do const row lookups, we should always reference the _id column in the + // table we are deleting from + + tbl, err := p.planner.schemaAPI.TableByName(context.Background(), dax.TableName(p.tableName)) + if err != nil { + return []types.PlanExpression{} + } + + var colType parser.ExprDataType + if tbl.StringKeys() { + colType = parser.NewDataTypeString() + } else { + colType = parser.NewDataTypeID() + } + + return []types.PlanExpression{ + &qualifiedRefPlanExpression{ + tableName: p.tableName, + columnIndex: 0, + dataType: colType, + columnName: "_id", + }, + } +} + +func (p *PlanOpPQLConstRowDelete) WithUpdatedExpressions(exprs ...types.PlanExpression) (types.PlanOperator, error) { + // just return ourselves + return p, nil +} + +type constRowDeleteRowIter struct { + planner *ExecutionPlanner + childIter types.RowIterator + tableName string +} + +var _ types.RowIterator = (*constRowDeleteRowIter)(nil) + +func (i *constRowDeleteRowIter) Next(ctx context.Context) (types.Row, error) { + var err error + + err = i.planner.checkAccess(ctx, i.tableName, accessTypeWriteData) + if err != nil { + return nil, err + } + + var row []interface{} + row, err = i.childIter.Next(ctx) + if err != nil { + return nil, err + } + keys := make([]interface{}, 0) + for { + keys = append(keys, row[0]) + + row, err = i.childIter.Next(ctx) + if err == types.ErrNoMoreRows { + break + } + if err != nil { + return nil, err + } + } + + if len(keys) > 0 { + // this row should contain the key to delete + cond := &pql.Call{ + Name: "ConstRow", + Args: map[string]interface{}{ + "columns": keys, + }, + Type: pql.PrecallGlobal, + } + call := &pql.Call{Name: "Delete", Children: []*pql.Call{cond}} + + tbl, err := i.planner.schemaAPI.TableByName(ctx, dax.TableName(i.tableName)) + if err != nil { + return nil, sql3.NewErrTableNotFound(0, 0, i.tableName) + } + + _, err = i.planner.executor.Execute(ctx, tbl, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + if err != nil { + return nil, err + } + } + return nil, types.ErrNoMoreRows +} diff --git a/sql3/planner/oppqldistinctscan.go b/sql3/planner/oppqldistinctscan.go new file mode 100644 index 000000000..ef4299cb4 --- /dev/null +++ b/sql3/planner/oppqldistinctscan.go @@ -0,0 +1,293 @@ +// Copyright 2023 Molecula Corp. All rights reserved. + +package planner + +import ( + "context" + "fmt" + "strings" + "time" + + pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/pql" + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/parser" + "github.com/featurebasedb/featurebase/v3/sql3/planner/types" + "github.com/pkg/errors" +) + +// PlanOpPQLDistinctScan plan operator handles a PQL distinct scan +// i.e. a scan with only one column that is used in a distinct query +type PlanOpPQLDistinctScan struct { + planner *ExecutionPlanner + tableName string + column string + filter types.PlanExpression + topExpr types.PlanExpression + warnings []string +} + +func NewPlanOpPQLDistinctScan(p *ExecutionPlanner, tableName string, column string) (*PlanOpPQLDistinctScan, error) { + if strings.EqualFold("_id", column) { + return nil, sql3.NewErrInternalf("non _id column required") + } + return &PlanOpPQLDistinctScan{ + planner: p, + tableName: tableName, + column: column, + warnings: make([]string, 0), + }, nil +} + +func (p *PlanOpPQLDistinctScan) Plan() map[string]interface{} { + result := make(map[string]interface{}) + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() + result["tableName"] = p.tableName + + if p.topExpr != nil { + result["topExpr"] = p.topExpr.Plan() + } + if p.filter != nil { + result["filter"] = p.filter.Plan() + } + result["column"] = p.column + return result +} + +func (p *PlanOpPQLDistinctScan) String() string { + return "" +} + +func (p *PlanOpPQLDistinctScan) AddWarning(warning string) { + p.warnings = append(p.warnings, warning) +} + +func (p *PlanOpPQLDistinctScan) Warnings() []string { + return p.warnings +} + +func (p *PlanOpPQLDistinctScan) Name() string { + return p.tableName +} + +func (p *PlanOpPQLDistinctScan) UpdateFilters(filterCondition types.PlanExpression) (types.PlanOperator, error) { + p.filter = filterCondition + return p, nil +} + +func (p *PlanOpPQLDistinctScan) Schema() types.Schema { + result := make(types.Schema, 0) + + tname := dax.TableName(p.tableName) + table, err := p.planner.schemaAPI.TableByName(context.Background(), tname) + if err != nil { + return result + } + + for _, fld := range table.Fields { + if strings.EqualFold(string(fld.Name), p.column) { + result = append(result, &types.PlannerColumn{ + ColumnName: string(fld.Name), + RelationName: p.tableName, + Type: fieldSQLDataType(pilosa.FieldToFieldInfo(fld)), + }) + break + } + } + return result +} + +func (p *PlanOpPQLDistinctScan) Children() []types.PlanOperator { + return []types.PlanOperator{} +} + +func (p *PlanOpPQLDistinctScan) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { + return &distinctScanRowIter{ + planner: p.planner, + tableName: p.tableName, + column: p.column, + predicate: p.filter, + topExpr: p.topExpr, + }, nil +} + +func (p *PlanOpPQLDistinctScan) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { + return nil, nil +} + +type distinctScanRowIter struct { + planner *ExecutionPlanner + tableName string + column string + predicate types.PlanExpression + topExpr types.PlanExpression + + result []interface{} + rowWidth int + columnDataType parser.ExprDataType +} + +var _ types.RowIterator = (*distinctScanRowIter)(nil) + +func (i *distinctScanRowIter) Next(ctx context.Context) (types.Row, error) { + if i.result == nil { + err := i.planner.checkAccess(ctx, i.tableName, accessTypeReadData) + if err != nil { + return nil, err + } + + //go get the schema def and map names to indexes in the resultant row + tname := dax.TableName(i.tableName) + table, err := i.planner.schemaAPI.TableByName(context.Background(), tname) + if err != nil { + if errors.Is(err, pilosa.ErrIndexNotFound) { + return nil, sql3.NewErrInternalf("table not found '%s'", i.tableName) + } + return nil, err + } + i.rowWidth = 1 + + for _, fld := range table.Fields { + if strings.EqualFold(i.column, string(fld.Name)) { + i.columnDataType = fieldSQLDataType(pilosa.FieldToFieldInfo(fld)) + break + } + } + + var cond *pql.Call + + cond, err = i.planner.generatePQLCallFromExpr(ctx, i.predicate) + if err != nil { + return nil, err + } + if cond == nil { + cond = &pql.Call{Name: "All"} + } + + if i.topExpr != nil { + _, ok := i.topExpr.(*intLiteralPlanExpression) + if !ok { + return nil, sql3.NewErrInternalf("unexpected top expression type: %T", i.topExpr) + } + pqlValue, err := planExprToValue(i.topExpr) + if err != nil { + return nil, err + } + cond = &pql.Call{ + Name: "Limit", + Children: []*pql.Call{cond}, + Args: map[string]interface{}{"limit": pqlValue}, + Type: pql.PrecallGlobal, + } + } + call := &pql.Call{ + Name: "Distinct", + Args: map[string]interface{}{"field": i.column}, + Children: []*pql.Call{cond}, + } + + queryResponse, err := i.planner.executor.Execute(ctx, table, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + if err != nil { + return nil, err + } + + switch res := queryResponse.Results[0].(type) { + case *pilosa.Row: + result := make([]interface{}, 0) + if len(res.Keys) > 0 { + for _, n := range res.Keys { + result = append(result, n) + } + } else { + for _, n := range res.Columns() { + result = append(result, int64(n)) + } + } + i.result = result + + case pilosa.SignedRow: + result := make([]interface{}, 0) + + negs := res.Neg.Columns() + pos := res.Pos.Columns() + for _, n := range negs { + result = append(result, -(int64(n))) + } + for _, n := range pos { + result = append(result, int64(n)) + } + i.result = result + + case pilosa.DistinctTimestamp: + result := make([]interface{}, 0) + for _, n := range res.Values { + if tm, err := time.ParseInLocation(time.RFC3339Nano, n, time.UTC); err == nil { + result = append(result, tm) + } else { + return nil, sql3.NewErrInternalf("unable to convert to time.Time: %v", n) + } + } + i.result = result + + default: + return nil, sql3.NewErrInternalf("unexpected Distinct() result type: %T", res) + } + } + + if len(i.result) > 0 { + row := make([]interface{}, i.rowWidth) + + result := i.result[0] + + switch t := i.columnDataType.(type) { + + case *parser.DataTypeBool: + val, ok := result.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type for column value '%T'", result) + } + row[0] = val == 1 + + case *parser.DataTypeDecimal: + val, ok := result.(int64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type for column value '%T'", result) + } + row[0] = pql.NewDecimal(val, t.Scale) + + case *parser.DataTypeIDSet: + //empty sets are null + val, ok := result.([]uint64) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type for column value '%T'", result) + } + if len(val) == 0 { + row[0] = nil + } else { + row[0] = val + } + + case *parser.DataTypeStringSet: + //empty sets are null + val, ok := result.([]string) + if !ok { + return nil, sql3.NewErrInternalf("unexpected type for column value '%T'", result) + } + if len(val) == 0 { + row[0] = nil + } else { + row[0] = val + } + + default: + row[0] = result + } + + // Move to next result element. + i.result = i.result[1:] + return row, nil + } + return nil, types.ErrNoMoreRows +} diff --git a/sql3/planner/oppqlfiltereddelete.go b/sql3/planner/oppqlfiltereddelete.go new file mode 100644 index 000000000..d76feaca4 --- /dev/null +++ b/sql3/planner/oppqlfiltereddelete.go @@ -0,0 +1,109 @@ +// Copyright 2022 Molecula Corp. All rights reserved. + +package planner + +import ( + "context" + "fmt" + + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/pql" + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/planner/types" +) + +// PlanOpPQLFilteredDelete plan operator to delete rows from a table based on a filter expression. +type PlanOpPQLFilteredDelete struct { + planner *ExecutionPlanner + tableName string + filter types.PlanExpression + warnings []string +} + +func NewPlanOpPQLFilteredDelete(p *ExecutionPlanner, tableName string, filter types.PlanExpression) *PlanOpPQLFilteredDelete { + return &PlanOpPQLFilteredDelete{ + planner: p, + tableName: tableName, + filter: filter, + warnings: make([]string, 0), + } +} + +func (p *PlanOpPQLFilteredDelete) Plan() map[string]interface{} { + result := make(map[string]interface{}) + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() + result["filter"] = p.filter.Plan() + result["tableName"] = p.tableName + return result +} + +func (p *PlanOpPQLFilteredDelete) String() string { + return "" +} + +func (p *PlanOpPQLFilteredDelete) AddWarning(warning string) { + p.warnings = append(p.warnings, warning) +} + +func (p *PlanOpPQLFilteredDelete) Warnings() []string { + return p.warnings +} + +func (p *PlanOpPQLFilteredDelete) Schema() types.Schema { + return types.Schema{} +} + +func (p *PlanOpPQLFilteredDelete) Children() []types.PlanOperator { + return []types.PlanOperator{} +} + +func (p *PlanOpPQLFilteredDelete) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { + return &filteredDeleteRowIter{ + planner: p.planner, + tableName: p.tableName, + filter: p.filter, + }, nil +} + +func (p *PlanOpPQLFilteredDelete) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { + if len(children) != 0 { + return nil, sql3.NewErrInternalf("unexpected number of children '%d'", len(children)) + } + return NewPlanOpPQLFilteredDelete(p.planner, p.tableName, p.filter), nil +} + +type filteredDeleteRowIter struct { + planner *ExecutionPlanner + tableName string + filter types.PlanExpression +} + +var _ types.RowIterator = (*filteredDeleteRowIter)(nil) + +func (i *filteredDeleteRowIter) Next(ctx context.Context) (types.Row, error) { + var err error + + err = i.planner.checkAccess(ctx, i.tableName, accessTypeWriteData) + if err != nil { + return nil, err + } + + cond, err := i.planner.generatePQLCallFromExpr(ctx, i.filter) + if err != nil { + return nil, err + } + call := &pql.Call{Name: "Delete", Children: []*pql.Call{cond}} + + tbl, err := i.planner.schemaAPI.TableByName(ctx, dax.TableName(i.tableName)) + if err != nil { + return nil, sql3.NewErrTableNotFound(0, 0, i.tableName) + } + + _, err = i.planner.executor.Execute(ctx, tbl, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + if err != nil { + return nil, err + } + + return nil, types.ErrNoMoreRows +} diff --git a/sql3/planner/oppqlgroupby.go b/sql3/planner/oppqlgroupby.go index 71f01f502..86b444f94 100644 --- a/sql3/planner/oppqlgroupby.go +++ b/sql3/planner/oppqlgroupby.go @@ -7,6 +7,7 @@ import ( "fmt" pilosa "github.com/featurebasedb/featurebase/v3" + "github.com/featurebasedb/featurebase/v3/dax" "github.com/featurebasedb/featurebase/v3/pql" "github.com/featurebasedb/featurebase/v3/sql3" "github.com/featurebasedb/featurebase/v3/sql3/parser" @@ -38,15 +39,10 @@ func NewPlanOpPQLGroupBy(p *ExecutionPlanner, tableName string, groupByExprs []t func (p *PlanOpPQLGroupBy) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc + result["_schema"] = p.Schema().Plan() result["tableName"] = p.tableName if p.filter != nil { result["filter"] = p.filter.Plan() - } result["aggregate"] = p.aggregate.AggExpression().Plan() ps := make([]interface{}, 0) @@ -205,15 +201,22 @@ func (i *pqlGroupByRowIter) Next(ctx context.Context) (types.Row, error) { call.Args["filter"] = cond } - queryResponse, err := i.planner.executor.Execute(ctx, i.tableName, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + tbl, err := i.planner.schemaAPI.TableByName(ctx, dax.TableName(i.tableName)) + if err != nil { + return nil, sql3.NewErrTableNotFound(0, 0, i.tableName) + } + + queryResponse, err := i.planner.executor.Execute(ctx, tbl, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) if err != nil { return nil, err } - tbl, ok := queryResponse.Results[0].(*pilosa.GroupCounts) + + gcs, ok := queryResponse.Results[0].(*pilosa.GroupCounts) if !ok { return nil, sql3.NewErrInternalf("unexpected Extract() result type: %T", queryResponse.Results[0]) } - i.result = tbl.Groups() + + i.result = gcs.Groups() } if len(i.result) > 0 { diff --git a/sql3/planner/oppqlmultiaggregate.go b/sql3/planner/oppqlmultiaggregate.go index 53712fdf0..634cfa185 100644 --- a/sql3/planner/oppqlmultiaggregate.go +++ b/sql3/planner/oppqlmultiaggregate.go @@ -27,12 +27,7 @@ func NewPlanOpPQLMultiAggregate(p *ExecutionPlanner, operators []*PlanOpPQLAggre func (p *PlanOpPQLMultiAggregate) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() ps := make([]interface{}, 0) for _, e := range p.operators { ps = append(ps, e.Plan()) @@ -59,7 +54,7 @@ func (p *PlanOpPQLMultiAggregate) Schema() types.Schema { s := &types.PlannerColumn{ ColumnName: aggOp.aggregate.String(), RelationName: "", - Type: aggOp.aggregate.AggExpression().Type(), + Type: aggOp.aggregate.Type(), } result[idx] = s } diff --git a/sql3/planner/oppqlmultigroupby.go b/sql3/planner/oppqlmultigroupby.go index 0719dbc7c..3ad787b2e 100644 --- a/sql3/planner/oppqlmultigroupby.go +++ b/sql3/planner/oppqlmultigroupby.go @@ -33,12 +33,7 @@ func NewPlanOpPQLMultiGroupBy(p *ExecutionPlanner, operators []*PlanOpPQLGroupBy func (p *PlanOpPQLMultiGroupBy) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() ps := make([]interface{}, 0) for _, e := range p.operators { ps = append(ps, e.Plan()) @@ -135,9 +130,9 @@ type pqlMultiGroupByRowIter struct { planner *ExecutionPlanner groupByColumns []types.PlanExpression iterators []types.RowIterator - groupCache KeyedRowCache + groupCache map[string]types.Row - groupKeys []uint64 + groupKeys []string } var _ types.RowIterator = (*pqlMultiGroupByRowIter)(nil) @@ -145,7 +140,7 @@ var _ types.RowIterator = (*pqlMultiGroupByRowIter)(nil) func (i *pqlMultiGroupByRowIter) Next(ctx context.Context) (types.Row, error) { if i.groupCache == nil { //consume all the rows from the child iterators - i.groupCache = newinMemoryKeyedRowCache() + i.groupCache = make(map[string]types.Row) if err := i.computeMultiGroupBy(ctx); err != nil { return nil, err } @@ -154,9 +149,9 @@ func (i *pqlMultiGroupByRowIter) Next(ctx context.Context) (types.Row, error) { if len(i.groupKeys) > 0 { key := i.groupKeys[0] - row, err := i.groupCache.Get(key) - if err != nil { - return nil, err + row, ok := i.groupCache[key] + if !ok { + return nil, sql3.NewErrInternalf("unexpected absence of key") } // Move to next result element. i.groupKeys = i.groupKeys[1:] @@ -180,19 +175,16 @@ func (i *pqlMultiGroupByRowIter) computeMultiGroupBy(ctx context.Context) error for { //build a key for the group by columns for this row - key, _, err := groupingKeyHash(ctx, i.groupByColumns, irow) - if err != nil { - return err - } - - // get the group from the cache - cachedRow, err := i.groupCache.Get(key) + key, _, err := groupingKey(ctx, i.groupByColumns, irow) if err != nil { return err } aggIndex := iteratorIdx + len(i.groupByColumns) - if cachedRow != nil { + + // get the group from the cache + cachedRow, ok := i.groupCache[key] + if ok { // if the group exists then update the row // NB: the aggregate for this iterator is at the end of irow cachedRow[aggIndex] = irow[len(irow)-1] @@ -207,7 +199,7 @@ func (i *pqlMultiGroupByRowIter) computeMultiGroupBy(ctx context.Context) error cachedRow[aggIndex] = irow[len(irow)-1] // write the row to the cache - err = i.groupCache.Put(key, cachedRow) + i.groupCache[key] = cachedRow if err != nil { return err } diff --git a/sql3/planner/oppqltablescan.go b/sql3/planner/oppqltablescan.go index db57adc50..542b513a2 100644 --- a/sql3/planner/oppqltablescan.go +++ b/sql3/planner/oppqltablescan.go @@ -38,12 +38,7 @@ func NewPlanOpPQLTableScan(p *ExecutionPlanner, tableName string, columns []stri func (p *PlanOpPQLTableScan) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() result["tableName"] = p.tableName if p.topExpr != nil { @@ -52,7 +47,6 @@ func (p *PlanOpPQLTableScan) Plan() map[string]interface{} { if p.filter != nil { result["filter"] = p.filter.Plan() } - result["columns"] = p.columns return result } @@ -216,18 +210,26 @@ func (i *tableScanRowIter) Next(ctx context.Context) (types.Row, error) { }, ) } - queryResponse, err := i.planner.executor.Execute(ctx, i.tableName, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + + tbl, err := i.planner.schemaAPI.TableByName(ctx, dax.TableName(i.tableName)) + if err != nil { + return nil, sql3.NewErrTableNotFound(0, 0, i.tableName) + } + + queryResponse, err := i.planner.executor.Execute(ctx, tbl, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) if err != nil { return nil, err } - tbl, ok := queryResponse.Results[0].(pilosa.ExtractedTable) + + extbl, ok := queryResponse.Results[0].(pilosa.ExtractedTable) if !ok { return nil, sql3.NewErrInternalf("unexpected Extract() result type: %T", queryResponse.Results[0]) } - i.result = tbl.Columns + + i.result = extbl.Columns //set the source index - for idx, fld := range tbl.Fields { + for idx, fld := range extbl.Fields { mappedColumn, ok := i.columnMap[fld.Name] if !ok { return nil, sql3.NewErrInternalf("mapped column not found for column named '%s'", fld.Name) diff --git a/sql3/planner/opprojection.go b/sql3/planner/opprojection.go index d3c99fd37..8aee7a879 100644 --- a/sql3/planner/opprojection.go +++ b/sql3/planner/opprojection.go @@ -60,12 +60,8 @@ func (p *PlanOpProjection) WithChildren(children ...types.PlanOperator) (types.P func (p *PlanOpProjection) Plan() map[string]interface{} { result := make(map[string]interface{}) - result["__op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["__schema"] = sc + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() result["child"] = p.ChildOp.Plan() @@ -73,7 +69,7 @@ func (p *PlanOpProjection) Plan() map[string]interface{} { for _, e := range p.Projections { ps = append(ps, e.Plan()) } - result["_projections"] = ps + result["projections"] = ps return result } @@ -107,22 +103,22 @@ func (p *PlanOpProjection) WithUpdatedExpressions(exprs ...types.PlanExpression) } func ExpressionToColumn(e types.PlanExpression) *types.PlannerColumn { - var name string - if n, ok := e.(types.IdentifiableByName); ok { - name = n.Name() - } else { - name = "" //e.String() - } + name := "" + relationName := "" + + switch thisExpr := e.(type) { + case *qualifiedRefPlanExpression: + name = thisExpr.columnName + relationName = thisExpr.tableName - var table string - if t, ok := e.(types.IdentifiableByName); ok { - table = t.Name() + case *aliasPlanExpression: + name = thisExpr.aliasName } return &types.PlannerColumn{ ColumnName: name, + RelationName: relationName, Type: e.Type(), - RelationName: table, } } diff --git a/sql3/planner/opqltruncate.go b/sql3/planner/opqltruncate.go new file mode 100644 index 000000000..3e89abf3f --- /dev/null +++ b/sql3/planner/opqltruncate.go @@ -0,0 +1,100 @@ +// Copyright 2022 Molecula Corp. All rights reserved. + +package planner + +import ( + "context" + "fmt" + + "github.com/featurebasedb/featurebase/v3/dax" + "github.com/featurebasedb/featurebase/v3/pql" + "github.com/featurebasedb/featurebase/v3/sql3" + "github.com/featurebasedb/featurebase/v3/sql3/planner/types" +) + +// TODO (pok) we should look at a drop and recreate, or an actual truncate PQL op + +// PlanOpPQLTruncateTable plan operator to delete rows from a table based on a single key. +type PlanOpPQLTruncateTable struct { + planner *ExecutionPlanner + tableName string + warnings []string +} + +func NewPlanOpPQLTruncateTable(p *ExecutionPlanner, tableName string) *PlanOpPQLTruncateTable { + return &PlanOpPQLTruncateTable{ + planner: p, + tableName: tableName, + warnings: make([]string, 0), + } +} + +func (p *PlanOpPQLTruncateTable) Plan() map[string]interface{} { + result := make(map[string]interface{}) + result["_op"] = fmt.Sprintf("%T", p) + result["_schema"] = p.Schema().Plan() + result["tableName"] = p.tableName + return result +} + +func (p *PlanOpPQLTruncateTable) String() string { + return "" +} + +func (p *PlanOpPQLTruncateTable) AddWarning(warning string) { + p.warnings = append(p.warnings, warning) +} + +func (p *PlanOpPQLTruncateTable) Warnings() []string { + return p.warnings +} + +func (p *PlanOpPQLTruncateTable) Schema() types.Schema { + return types.Schema{} +} + +func (p *PlanOpPQLTruncateTable) Children() []types.PlanOperator { + return []types.PlanOperator{} +} + +func (p *PlanOpPQLTruncateTable) Iterator(ctx context.Context, row types.Row) (types.RowIterator, error) { + return &truncateTableRowIter{ + planner: p.planner, + tableName: p.tableName, + }, nil +} + +func (p *PlanOpPQLTruncateTable) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { + return p, nil +} + +type truncateTableRowIter struct { + planner *ExecutionPlanner + tableName string +} + +var _ types.RowIterator = (*truncateTableRowIter)(nil) + +func (i *truncateTableRowIter) Next(ctx context.Context) (types.Row, error) { + err := i.planner.checkAccess(ctx, i.tableName, accessTypeWriteData) + if err != nil { + return nil, err + } + + cond := &pql.Call{ + Name: "All", + } + call := &pql.Call{Name: "Delete", Children: []*pql.Call{cond}} + + tbl, err := i.planner.schemaAPI.TableByName(ctx, dax.TableName(i.tableName)) + if err != nil { + return nil, sql3.NewErrTableNotFound(0, 0, i.tableName) + } + + _, err = i.planner.executor.Execute(ctx, tbl, &pql.Query{Calls: []*pql.Call{call}}, nil, nil) + if err != nil { + return nil, err + } + + return nil, types.ErrNoMoreRows +} diff --git a/sql3/planner/opquery.go b/sql3/planner/opquery.go index 6acfca7bc..6b34a2327 100644 --- a/sql3/planner/opquery.go +++ b/sql3/planner/opquery.go @@ -18,12 +18,6 @@ type PlanOpQuery struct { ChildOp types.PlanOperator - // the list of aggregate terms - aggregates []types.PlanExpression - - // all the identifiers that are referenced - referenceList []*qualifiedRefPlanExpression - sql string warnings []string } @@ -75,12 +69,7 @@ func (p *PlanOpQuery) WithChildren(children ...types.PlanOperator) (types.PlanOp func (p *PlanOpQuery) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() result["sql"] = p.sql result["warnings"] = p.warnings result["child"] = p.ChildOp.Plan() diff --git a/sql3/planner/oprelalias.go b/sql3/planner/oprelalias.go index 98fd7aa41..2e4d5610c 100644 --- a/sql3/planner/oprelalias.go +++ b/sql3/planner/oprelalias.go @@ -53,12 +53,7 @@ func (p *PlanOpRelAlias) WithChildren(children ...types.PlanOperator) (types.Pla func (p *PlanOpRelAlias) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() result["alias"] = p.alias result["child"] = p.ChildOp.Plan() return result diff --git a/sql3/planner/opsubquery.go b/sql3/planner/opsubquery.go index d2259663c..e056df4ea 100644 --- a/sql3/planner/opsubquery.go +++ b/sql3/planner/opsubquery.go @@ -6,6 +6,7 @@ import ( "context" "fmt" + "github.com/featurebasedb/featurebase/v3/sql3" "github.com/featurebasedb/featurebase/v3/sql3/planner/types" ) @@ -37,18 +38,16 @@ func (p *PlanOpSubquery) Children() []types.PlanOperator { } func (p *PlanOpSubquery) WithChildren(children ...types.PlanOperator) (types.PlanOperator, error) { - return nil, nil + if len(children) != 1 { + return nil, sql3.NewErrInternalf("unexpected number of children '%d'", len(children)) + } + return NewPlanOpSubquery(children[0]), nil } func (p *PlanOpSubquery) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() result["child"] = p.ChildOp.Plan() return result } diff --git a/sql3/planner/opsystemtable.go b/sql3/planner/opsystemtable.go index 0e41e64b4..949b63bfe 100644 --- a/sql3/planner/opsystemtable.go +++ b/sql3/planner/opsystemtable.go @@ -68,11 +68,6 @@ var systemTables = map[string]*systemTable{ ColumnName: "node_count", Type: parser.NewDataTypeInt(), }, - &types.PlannerColumn{ - RelationName: fbClusterInfo, - ColumnName: "shard_width", - Type: parser.NewDataTypeInt(), - }, &types.PlannerColumn{ RelationName: fbClusterInfo, ColumnName: "replica_count", @@ -108,6 +103,11 @@ var systemTables = map[string]*systemTable{ ColumnName: "is_primary", Type: parser.NewDataTypeBool(), }, + &types.PlannerColumn{ + RelationName: fbClusterNodes, + ColumnName: "space_used", + Type: parser.NewDataTypeBool(), + }, }, }, @@ -237,11 +237,7 @@ func NewPlanOpSystemTable(p *ExecutionPlanner, table *systemTable) *PlanOpSystem func (p *PlanOpSystemTable) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - ps := make([]string, 0) - for _, e := range p.Schema() { - ps = append(ps, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = ps + result["_schema"] = p.Schema().Plan() return result } @@ -312,7 +308,6 @@ func (i *fbClusterInfoRowIter) Next(ctx context.Context) (types.Row, error) { i.planner.systemAPI.Version(), i.planner.systemAPI.ClusterState(), i.planner.systemAPI.ClusterNodeCount(), - i.planner.systemAPI.ShardWidth(), i.planner.systemAPI.ClusterReplicaCount(), } i.rowIndex += 1 @@ -333,6 +328,12 @@ func (i *fbClusterNodesRowIter) Next(ctx context.Context) (types.Row, error) { i.result = i.planner.systemAPI.ClusterNodes() } + u := i.planner.systemAPI.DataDir() + spaceUsed, err := pilosa.GetDiskUsage(u) + if err != nil { + return nil, err + } + if len(i.result) > 0 { n := i.result[0] row := []interface{}{ @@ -341,6 +342,7 @@ func (i *fbClusterNodesRowIter) Next(ctx context.Context) (types.Row, error) { n.URI, n.GRPCURI, n.IsPrimary, + spaceUsed.Usage, } // Move to next result element. i.result = i.result[1:] @@ -436,7 +438,11 @@ func (i *fbTableDDLRowIter) Next(ctx context.Context) (types.Row, error) { fmt.Fprintf(&buf, " cachetype %s", col.Options.CacheType) } if col.Options.CacheSize != pilosa.DefaultCacheSize && col.Options.CacheSize > 0 { - fmt.Fprintf(&buf, " cachesize %d", col.Options.CacheSize) + // if we still have the default, we need to print that out if we have a non-default size + if col.Options.CacheType == pilosa.DefaultCacheType && len(col.Options.CacheType) > 0 { + fmt.Fprintf(&buf, " cachetype %s", col.Options.CacheType) + } + fmt.Fprintf(&buf, " size %d", col.Options.CacheSize) } case *parser.DataTypeIDSet, *parser.DataTypeStringSet: @@ -444,7 +450,11 @@ func (i *fbTableDDLRowIter) Next(ctx context.Context) (types.Row, error) { fmt.Fprintf(&buf, " cachetype %s", col.Options.CacheType) } if col.Options.CacheSize != pilosa.DefaultCacheSize && col.Options.CacheSize > 0 { - fmt.Fprintf(&buf, " cachesize %d", col.Options.CacheSize) + // if we still have the default, we need to print that out if we have a non-default size + if col.Options.CacheType == pilosa.DefaultCacheType && len(col.Options.CacheType) > 0 { + fmt.Fprintf(&buf, " cachetype %s", col.Options.CacheType) + } + fmt.Fprintf(&buf, " size %d", col.Options.CacheSize) } case *parser.DataTypeIDSetQuantum, *parser.DataTypeStringSetQuantum: @@ -452,7 +462,11 @@ func (i *fbTableDDLRowIter) Next(ctx context.Context) (types.Row, error) { fmt.Fprintf(&buf, " cachetype %s", col.Options.CacheType) } if col.Options.CacheSize != pilosa.DefaultCacheSize && col.Options.CacheSize > 0 { - fmt.Fprintf(&buf, " cachesize %d", col.Options.CacheSize) + // if we still have the default, we need to print that out if we have a non-default size + if col.Options.CacheType == pilosa.DefaultCacheType && len(col.Options.CacheType) > 0 { + fmt.Fprintf(&buf, " cachetype %s", col.Options.CacheType) + } + fmt.Fprintf(&buf, " size %d", col.Options.CacheSize) } if !col.Options.TimeQuantum.IsEmpty() { fmt.Fprintf(&buf, " timequantum '%s'", col.Options.TimeQuantum) diff --git a/sql3/planner/optablevaluedfunction.go b/sql3/planner/optablevaluedfunction.go index 9f7ff23d0..1619fcdac 100644 --- a/sql3/planner/optablevaluedfunction.go +++ b/sql3/planner/optablevaluedfunction.go @@ -57,12 +57,7 @@ func (p *PlanOpTableValuedFunction) WithChildren(children ...types.PlanOperator) func (p *PlanOpTableValuedFunction) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() return result } diff --git a/sql3/planner/optop.go b/sql3/planner/optop.go index 2d6814c87..0e7d44269 100644 --- a/sql3/planner/optop.go +++ b/sql3/planner/optop.go @@ -53,12 +53,7 @@ func (p *PlanOpTop) WithChildren(children ...types.PlanOperator) (types.PlanOper func (p *PlanOpTop) Plan() map[string]interface{} { result := make(map[string]interface{}) result["_op"] = fmt.Sprintf("%T", p) - sc := make([]string, 0) - for _, e := range p.Schema() { - sc = append(sc, fmt.Sprintf("'%s', '%s', '%s'", e.ColumnName, e.RelationName, e.Type.TypeDescription())) - } - result["_schema"] = sc - + result["_schema"] = p.Schema().Plan() result["expr"] = p.expr result["child"] = p.ChildOp.Plan() return result diff --git a/sql3/planner/planoptimizer.go b/sql3/planner/planoptimizer.go index 3c004cf1c..7b8d44b3b 100644 --- a/sql3/planner/planoptimizer.go +++ b/sql3/planner/planoptimizer.go @@ -23,9 +23,22 @@ type OptimizerFunc func(context.Context, *ExecutionPlanner, types.PlanOperator, // a list of optimzer rules; order can be important important var optimizerFunctions = []OptimizerFunc{ + // fix expression references for having + removeUnusedExtractColumnReferences, + + // if we have a distinct operator over a single projection, + // where the projection is on a table scan, use a PQL Distinct scan operator + tryToReplaceDistinctWithPQLDistinct, + + // fix expression references for having + fixHavingReferences, + // push down filter predicates as far as possible, pushdownFilters, + // try to use a PlanOpPQLFilteredDelete instead of PlanOpPQLConstRowDelete + tryToReplaceConstRowDeleteWithFilteredDelete, + // if we have a group by that has one TableScanOperator, // try to use a PQL(multi)groupby operator instead tryToReplaceGroupByWithPQLGroupBy, @@ -55,15 +68,26 @@ var optimizerFunctions = []OptimizerFunc{ type OptimizerScope struct { } +func dumpPlan(prefix []string, root types.PlanOperator, suffix string) { + // DEBUG !! + // for _, s := range prefix { + // log.Println(s) + // } + // jplan := root.Plan() + // a, _ := json.MarshalIndent(jplan, "", " ") + // log.Println(string(a)) + // log.Println() + // DEBUG !! +} + // optimizePlan takes a plan from the compiler and executes a series of transforms on it to optimize it func (p *ExecutionPlanner) optimizePlan(ctx context.Context, plan types.PlanOperator) (types.PlanOperator, error) { - // log.Println("================================================================================") - // log.Println("plan pre-optimzation") - // jplan := plan.Plan() - // a, _ := json.MarshalIndent(jplan, "", " ") - // log.Println(string(a)) - // log.Println("--------------------------------------------------------------------------------") + dumpPlan( + []string{"================================================================================", "plan pre-optimzation"}, + plan, + "--------------------------------------------------------------------------------", + ) var err error var result = plan @@ -74,12 +98,17 @@ func (p *ExecutionPlanner) optimizePlan(ctx context.Context, plan types.PlanOper } } - // log.Println("================================================================================") - // log.Println("plan ppst-optimzation") - // jplan = result.Plan() - // a, _ = json.MarshalIndent(jplan, "", " ") - // log.Println(string(a)) - // log.Println("--------------------------------------------------------------------------------") + dumpPlan( + []string{"================================================================================", "plan post-optimzation"}, + plan, + "--------------------------------------------------------------------------------", + ) + + // check that result is a PlanOpQuery + _, ok := result.(*PlanOpQuery) + if !ok { + return nil, sql3.NewErrInternalf("unexpected root operator type '%T'", result) + } return result, nil } @@ -165,38 +194,42 @@ func (ta RelationAliasesMap) addAlias(alias types.IdentifiableByName, target typ return nil } +// build a map of alias names to relations func getRelationAliases(n types.PlanOperator, scope *OptimizerScope) (RelationAliasesMap, error) { - var aliases RelationAliasesMap - var aliasFn func(node types.PlanOperator) bool var inspectErr error - aliasFn = func(node types.PlanOperator) bool { + + aliases := make(RelationAliasesMap) + InspectPlan(n, func(node types.PlanOperator) bool { if node == nil { return false } - if at, ok := node.(*PlanOpRelAlias); ok { - switch t := at.ChildOp.(type) { + switch node := node.(type) { + case *PlanOpRelAlias: + switch t := node.ChildOp.(type) { case *PlanOpPQLTableScan: - inspectErr = aliases.addAlias(at, t) + inspectErr = aliases.addAlias(node, t) + case *PlanOpPQLDistinctScan: + inspectErr = aliases.addAlias(node, t) case *PlanOpSubquery: - inspectErr = aliases.addAlias(at, t) + inspectErr = aliases.addAlias(node, t) default: - panic(fmt.Sprintf("unexpected child node '%T'", at.ChildOp)) + inspectErr = sql3.NewErrInternalf("unexpected alias child type '%T", node.ChildOp) } return false - } - switch node := node.(type) { case *PlanOpPQLTableScan: inspectErr = aliases.addAlias(node, node) return false - } + case *PlanOpPQLDistinctScan: + inspectErr = aliases.addAlias(node, node) + return false + + } return true - } + }) - aliases = make(RelationAliasesMap) - InspectPlan(n, aliasFn) if inspectErr != nil { return nil, inspectErr } @@ -221,7 +254,7 @@ func filterPushdownAboveTablesChildSelector(c ParentContext) bool { switch c.Parent.(type) { case *PlanOpFilter: switch c.Operator.(type) { - case *PlanOpRelAlias, *PlanOpPQLTableScan: + case *PlanOpRelAlias, *PlanOpPQLTableScan, *PlanOpPQLDistinctScan: return false } } @@ -229,6 +262,61 @@ func filterPushdownAboveTablesChildSelector(c ParentContext) bool { return true } +// when we compile and create a PlanOpPQLTableScan we just add all the columns to the underlying extract. This is a bad idea, since +// extracts are expensive, more so when we are askign for columns we don't actually need. This function removes those uneeded references. +func removeUnusedExtractColumnReferences(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (types.PlanOperator, bool, error) { + // get all the qualifiedRefs across the plan + // using a map to eliminate dupes and we don't + // care about the order when iterating + refs := make(map[string]*qualifiedRefPlanExpression) + InspectOperatorExpressions(n, func(pe types.PlanExpression) bool { + switch qref := pe.(type) { + case *qualifiedRefPlanExpression: + refs[qref.String()] = qref + return false + } + return true + }) + + return TransformPlanOpWithParent(n, func(c ParentContext) bool { return true }, func(c ParentContext) (types.PlanOperator, bool, error) { + switch thisNode := c.Operator.(type) { + case *PlanOpPQLTableScan: + + newExtractList := make([]string, 0) + + // loop thru the extract list and make a new extract list + // with just the columns we need + alias, ok := c.Parent.(*PlanOpRelAlias) + if ok { + // handle the case where the parent is an alias + for _, ex := range thisNode.columns { + for _, ref := range refs { + if (strings.EqualFold(ref.tableName, thisNode.tableName) || strings.EqualFold(ref.tableName, alias.alias)) && strings.EqualFold(ex, ref.columnName) { + newExtractList = append(newExtractList, ex) + break + } + } + } + } else { + for _, ex := range thisNode.columns { + for _, ref := range refs { + if strings.EqualFold(ref.tableName, thisNode.tableName) && strings.EqualFold(ex, ref.columnName) { + newExtractList = append(newExtractList, ex) + break + } + } + } + } + + // newExtractList should now contain just the cols that are referenced + return NewPlanOpPQLTableScan(a, thisNode.tableName, newExtractList), false, nil + + default: + return thisNode, true, nil + } + }) +} + // returns an expression given a list of expressions, if the list is > 2 expressions, all the individual // expressions are ANDed together func joinExprsWithAnd(exprs ...types.PlanExpression) types.PlanExpression { @@ -260,43 +348,45 @@ func removePushedDownConditions(ctx context.Context, a *ExecutionPlanner, node * return NewPlanOpFilter(a, joinedExpr, node.ChildOp), false, nil } -func getRelation(node types.PlanOperator) types.IdentifiableByName { - var relation types.IdentifiableByName - InspectPlan(node, func(node types.PlanOperator) bool { - switch n := node.(type) { - case *PlanOpPQLTableScan: - relation = n - return false - } - return true - }) - return relation -} - func pushdownFiltersToFilterableRelations(ctx context.Context, a *ExecutionPlanner, tableNode types.PlanOperator, scope *OptimizerScope, filters *filterSet, tableAliases RelationAliasesMap) (types.PlanOperator, bool, error) { - // only do this if it is an alias or a pql table scan - switch tableNode.(type) { - case *PlanOpRelAlias, *PlanOpPQLTableScan: - // continue + var table types.IdentifiableByName + + // only do this if it is a pql table scan + switch rel := tableNode.(type) { + case *PlanOpPQLTableScan: + table = rel + case *PlanOpPQLDistinctScan: + table = rel default: - return nil, true, sql3.NewErrInternalf("unexpected op type '%T'", tableNode) - } - - table := getRelation(tableNode) - if table == nil { return tableNode, true, nil } + // is the thing filterable? ft, ok := table.(types.FilteredRelation) if !ok { return tableNode, true, nil } // do we have any filters for this table? if not, bail... - tableFilters := filters.availableFiltersForTable(table.Name()) + availableFilters := filters.availableFiltersForTable(table.Name()) + if len(availableFilters) == 0 { + return tableNode, true, nil + } + + tableFilters := make([]types.PlanExpression, 0) + // can the filters be pushed down? + for _, tf := range availableFilters { + // try and generate a pql call graph, if we can't we can't push the filter down + _, err := a.generatePQLCallFromExpr(ctx, tf) + if err == nil { + tableFilters = append(tableFilters, tf) + } + } + // did we end up with any filters? if len(tableFilters) == 0 { return tableNode, true, nil } + filters.markFiltersHandled(tableFilters...) // fix the field refs @@ -313,8 +403,15 @@ func pushdownFiltersToFilterableRelations(ctx context.Context, a *ExecutionPlann } func pushdownFiltersToAboveRelation(ctx context.Context, a *ExecutionPlanner, tableNode types.PlanOperator, scope *OptimizerScope, filters *filterSet) (types.PlanOperator, bool, error) { - table := getRelation(tableNode) - if table == nil { + var table types.IdentifiableByName + + // only do this if it is a pql table scan + switch rel := tableNode.(type) { + case *PlanOpPQLTableScan: + table = rel + case *PlanOpPQLDistinctScan: + table = rel + default: return tableNode, true, nil } @@ -333,12 +430,12 @@ func pushdownFiltersToAboveRelation(ctx context.Context, a *ExecutionPlanner, ta } switch tableNode.(type) { - case *PlanOpRelAlias, *PlanOpPQLTableScan: + case *PlanOpRelAlias, *PlanOpPQLTableScan, *PlanOpPQLDistinctScan: node := tableNode if pushedDownFilterExpression != nil { return NewPlanOpFilter(a, pushedDownFilterExpression, node), false, nil } - return node, false, nil + return node, true, nil default: return nil, true, sql3.NewErrInternalf("unexpected op type '%T'", tableNode) } @@ -351,9 +448,12 @@ func pushdownFilters(ctx context.Context, a *ExecutionPlanner, n types.PlanOpera return nil, true, err } + // push filter terms down into anything that supports being filtered directly pushdownFiltersForFilterableRelations := func(n *PlanOpFilter, filters *filterSet) (types.PlanOperator, bool, error) { return TransformPlanOpWithParent(n, filterPushdownChildSelector, func(c ParentContext) (types.PlanOperator, bool, error) { switch node := c.Operator.(type) { + + // for the filter in question remove any terms that have been pushed down case *PlanOpFilter: n, samePred, err := removePushedDownConditions(ctx, a, node, filters) if err != nil { @@ -361,7 +461,8 @@ func pushdownFilters(ctx context.Context, a *ExecutionPlanner, n types.PlanOpera } return n, samePred, nil - case *PlanOpRelAlias, *PlanOpPQLTableScan: + // PlanOpPQLTableScan supports being filtered, PlanOpRelAlias is included here as a "transparent" op + case *PlanOpRelAlias, *PlanOpPQLTableScan, *PlanOpPQLDistinctScan: n, samePred, err := pushdownFiltersToFilterableRelations(ctx, a, node, scope, filters, tableAliases) if err != nil { return nil, true, err @@ -376,6 +477,7 @@ func pushdownFilters(ctx context.Context, a *ExecutionPlanner, n types.PlanOpera pushdownFiltersCloseToRelations := func(n types.PlanOperator, filters *filterSet) (types.PlanOperator, bool, error) { return TransformPlanOpWithParent(n, filterPushdownAboveTablesChildSelector, func(c ParentContext) (types.PlanOperator, bool, error) { switch node := c.Operator.(type) { + case *PlanOpFilter: n, same, err := removePushedDownConditions(ctx, a, node, filters) if err != nil { @@ -385,7 +487,8 @@ func pushdownFilters(ctx context.Context, a *ExecutionPlanner, n types.PlanOpera return n, true, nil } return n, false, nil - case *PlanOpRelAlias, *PlanOpPQLTableScan: + + case *PlanOpRelAlias, *PlanOpPQLTableScan, *PlanOpPQLDistinctScan: _, same, err := pushdownFiltersToAboveRelation(ctx, a, node, scope, filters) if err != nil { return nil, true, err @@ -400,27 +503,34 @@ func pushdownFilters(ctx context.Context, a *ExecutionPlanner, n types.PlanOpera }) } + // look for filter ops and push the conditions within them down to things that can be filtered return TransformPlanOp(n, func(node types.PlanOperator) (types.PlanOperator, bool, error) { - switch n := node.(type) { + switch thisNode := node.(type) { case *PlanOpFilter: + + // get the filter conditions from this filter in a map by table filtersByTable := getFiltersByRelation(n) - filters := newFilterSet(n.Predicate, filtersByTable, tableAliases) + + // make a struct to hold the expression for this filter, the broken up filter conditions + // and a map of alias name to relations + filters := newFilterSet(thisNode.Predicate, filtersByTable, tableAliases) // first push down filters to any op that supports a filter - node, sameA, err := pushdownFiltersForFilterableRelations(n, filters) + newNode, sameA, err := pushdownFiltersForFilterableRelations(thisNode, filters) if err != nil { return nil, true, err } // second push down filters as close as possible to the relations they apply to - node, sameB, err := pushdownFiltersCloseToRelations(node, filters) + var sameB bool + newNode, sameB, err = pushdownFiltersCloseToRelations(newNode, filters) if err != nil { return nil, true, err } - return node, sameA && sameB, nil + return newNode, sameA && sameB, nil default: - return n, true, nil + return node, true, nil } }) } @@ -430,9 +540,9 @@ func getFiltersByRelation(n types.PlanOperator) map[string][]types.PlanExpressio filters := make(map[string][]types.PlanExpression) InspectPlan(n, func(node types.PlanOperator) bool { - switch nd := node.(type) { + switch thisNode := node.(type) { case *PlanOpFilter: - fs := exprToRelationFilters(nd.Predicate) + fs := exprToRelationFilters(thisNode.Predicate) for k, exprs := range fs { filters[k] = append(filters[k], exprs...) @@ -455,13 +565,13 @@ func exprToRelationFilters(expr types.PlanExpression) map[string][]types.PlanExp hasSubquery := false InspectExpression(expr, func(e types.PlanExpression) bool { - f, ok := e.(*qualifiedRefPlanExpression) - if ok { - if !seenTables[f.tableName] { - seenTables[f.tableName] = true - lastTable = f.tableName + switch thisExpr := e.(type) { + case *qualifiedRefPlanExpression: + if !seenTables[thisExpr.tableName] { + seenTables[thisExpr.tableName] = true + lastTable = thisExpr.tableName } - } else if _, isSubquery := e.(*subqueryPlanExpression); isSubquery { + case *subqueryPlanExpression: hasSubquery = true return false } @@ -473,7 +583,6 @@ func exprToRelationFilters(expr types.PlanExpression) map[string][]types.PlanExp filters[lastTable] = append(filters[lastTable], expr) } } - return filters } @@ -541,6 +650,113 @@ func tryToReplaceGroupByWithPQLAggregate(ctx context.Context, a *ExecutionPlanne return n, true, nil } +func tryToReplaceDistinctWithPQLDistinct(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (types.PlanOperator, bool, error) { + // bail if no distinct + hasDistinct := false + InspectPlan(n, func(node types.PlanOperator) bool { + switch node.(type) { + case *PlanOpDistinct: + hasDistinct = true + return false + } + return true + }) + if !hasDistinct { + return n, true, nil + } + + // bail if has a group by + hasGroupBy := false + InspectPlan(n, func(node types.PlanOperator) bool { + switch node.(type) { + case *PlanOpGroupBy: + hasGroupBy = true + return false + } + return true + }) + if hasGroupBy { + return n, true, nil + } + + //bail if there are any joins + joins, err := hasJoins(ctx, a, n, scope) + if err != nil { + return nil, false, err + } + if joins { + return n, true, nil + } + + //go find the table scan operators + tables := getTableScanOperators(ctx, a, n, scope) + + //only do this if we have one TableScanOperator + if len(tables) == 1 { + replacedWithDistinct := false + // replace the scan with the distinct scan + return TransformPlanOp(n, func(node types.PlanOperator) (types.PlanOperator, bool, error) { + switch thisNode := node.(type) { + case *PlanOpDistinct: + if replacedWithDistinct { + return thisNode.ChildOp, false, nil + } + return thisNode, true, nil + + case *PlanOpPQLTableScan: + // bail if there is more than one output column + if len(thisNode.columns) != 1 { + return thisNode, true, nil + } + + // make sure it's not the _id column + if strings.EqualFold(thisNode.columns[0], "_id") { + return thisNode, true, nil + } + + // make sure it's not a set type + s := thisNode.Schema() + switch s[0].Type.(type) { + case *parser.DataTypeIDSet, *parser.DataTypeStringSet: + return thisNode, true, nil + } + + newOp, err := NewPlanOpPQLDistinctScan(a, thisNode.tableName, thisNode.columns[0]) + if err != nil { + return nil, false, err + } + replacedWithDistinct = true + return newOp, false, nil + default: + return thisNode, true, nil + } + }) + } + return n, true, nil +} + +func tryToReplaceConstRowDeleteWithFilteredDelete(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (types.PlanOperator, bool, error) { + return TransformPlanOp(n, func(node types.PlanOperator) (types.PlanOperator, bool, error) { + switch node := node.(type) { + case *PlanOpPQLConstRowDelete: + switch child := node.ChildOp.(type) { + case *PlanOpPQLTableScan: + if child.filter != nil { + _, err := a.generatePQLCallFromExpr(ctx, child.filter) + if err == nil { + return NewPlanOpPQLFilteredDelete(a, node.tableName, child.filter), false, nil + } + } + return node, true, nil + default: + return node, true, nil + } + default: + return node, true, nil + } + }) +} + func tryToReplaceGroupByWithPQLGroupBy(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (types.PlanOperator, bool, error) { //bail if there are any joins joins, err := hasJoins(ctx, a, n, scope) @@ -716,7 +932,7 @@ func fixProjectionReferences(ctx context.Context, a *ExecutionPlanner, n types.P case *PlanOpProjection: switch childOp := thisNode.ChildOp.(type) { - case *PlanOpGroupBy, *PlanOpPQLGroupBy, *PlanOpPQLMultiAggregate, *PlanOpPQLMultiGroupBy: + case *PlanOpGroupBy, *PlanOpHaving, *PlanOpPQLGroupBy, *PlanOpPQLMultiAggregate, *PlanOpPQLMultiGroupBy: childSchema := childOp.Schema() for idx, pj := range thisNode.Projections { @@ -724,7 +940,7 @@ func fixProjectionReferences(ctx context.Context, a *ExecutionPlanner, n types.P switch thisAggregate := e.(type) { case types.Aggregable: // if we have a Aggregable, the AggExpression() will be a qualified ref - // given we are in the context of a PlanOpProjection with a PlanOpGroupBy + // given we are in the context of a PlanOpProjection with a PlanOpGroupBy/Having // we can use the ordinal position of the projection as the column index for idx, sc := range childSchema { if strings.EqualFold(thisAggregate.String(), sc.ColumnName) { @@ -770,7 +986,7 @@ func fixProjectionReferences(ctx context.Context, a *ExecutionPlanner, n types.P return thisNode, false, nil // everything else that can be a child of projection - case *PlanOpRelAlias, *PlanOpFilter, *PlanOpPQLTableScan, *PlanOpNestedLoops: + case *PlanOpRelAlias, *PlanOpFilter, *PlanOpPQLTableScan, *PlanOpPQLDistinctScan, *PlanOpNestedLoops: exprs, same, err := fixFieldRefIndexesOnExpressions(ctx, scope, a, childOp.Schema(), thisNode.Projections...) if err != nil { return thisNode, true, err @@ -865,26 +1081,27 @@ func fixFieldRefs(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator }) } -// hasTop inspects a plan op tree and returns true (or error) if there are Top -// operators. -func hasTop(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (bool, error) { - result := false - InspectPlan(n, func(node types.PlanOperator) bool { - switch node.(type) { - case *PlanOpTop: - result = true - return false +func fixHavingReferences(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (types.PlanOperator, bool, error) { + return TransformPlanOp(n, func(node types.PlanOperator) (types.PlanOperator, bool, error) { + switch thisNode := node.(type) { + case *PlanOpHaving: + // fix references for the expressions referenced in the having predicate expression + schema := thisNode.Schema() + expressions := thisNode.Expressions() + fixed, same, err := fixFieldRefIndexesOnExpressionsForHaving(ctx, scope, a, schema, expressions...) + if err != nil { + return nil, true, err + } + newNode, err := thisNode.WithUpdatedExpressions(fixed...) + if err != nil { + return nil, true, err + } + return newNode, same, nil + + default: + return node, true, nil } - return true }) - return result, nil -} - -// hasTopN inspects a plan op tree and returns true (or error) if there are TopN -// operators. -func hasTopN(ctx context.Context, a *ExecutionPlanner, n types.PlanOperator, scope *OptimizerScope) (bool, error) { - //TODO(pok) implement this - return false, nil } // inspects a plan op tree and returns false (or error) if there are read join operators @@ -1004,3 +1221,69 @@ func fixFieldRefIndexes(ctx context.Context, scope *OptimizerScope, a *Execution return true }) } + +// for a list of expressions and an operator schema, fix the references for any qualifiedRef expressions +func fixFieldRefIndexesOnExpressionsForHaving(ctx context.Context, scope *OptimizerScope, a *ExecutionPlanner, schema types.Schema, expressions ...types.PlanExpression) ([]types.PlanExpression, bool, error) { + var result []types.PlanExpression + var res types.PlanExpression + var same bool + var err error + for i := range expressions { + e := expressions[i] + res, same, err = fixFieldRefIndexesForHaving(ctx, scope, a, schema, e) + if err != nil { + return nil, true, err + } + if !same { + if result == nil { + result = make([]types.PlanExpression, len(expressions)) + copy(result, expressions) + } + result[i] = res + } + } + if len(result) > 0 { + return result, false, nil + } + return expressions, true, nil +} + +func fixFieldRefIndexesForHaving(ctx context.Context, scope *OptimizerScope, a *ExecutionPlanner, schema types.Schema, exp types.PlanExpression) (types.PlanExpression, bool, error) { + return TransformExpr(exp, func(e types.PlanExpression) (types.PlanExpression, bool, error) { + switch typedExpr := e.(type) { + case *sumPlanExpression, *countPlanExpression, *countDistinctPlanExpression, + *avgPlanExpression, *minPlanExpression, *maxPlanExpression, + *percentilePlanExpression: + for i, col := range schema { + if strings.EqualFold(typedExpr.String(), col.ColumnName) { + e := newQualifiedRefPlanExpression("", "", i, typedExpr.Type()) + return e, false, nil + } + } + return nil, true, sql3.NewErrColumnNotFound(0, 0, typedExpr.String()) + + case *qualifiedRefPlanExpression: + for i, col := range schema { + newIndex := i + if matchesSchema(typedExpr, col) { + if newIndex != typedExpr.columnIndex { + // update the column index + return newQualifiedRefPlanExpression(typedExpr.tableName, typedExpr.columnName, newIndex, typedExpr.dataType), false, nil + } + return e, true, nil + } + } + return nil, true, sql3.NewErrColumnNotFound(0, 0, typedExpr.Name()) + } + return e, true, nil + }, func(parentExpr, childExpr types.PlanExpression) bool { + switch parentExpr.(type) { + case *sumPlanExpression, *countPlanExpression, *countDistinctPlanExpression, + *avgPlanExpression, *minPlanExpression, *maxPlanExpression, + *percentilePlanExpression: + return false + default: + return true + } + }) +} diff --git a/sql3/planner/planwalker.go b/sql3/planner/planwalker.go index bc71b82cb..8e7e50c29 100644 --- a/sql3/planner/planwalker.go +++ b/sql3/planner/planwalker.go @@ -78,8 +78,8 @@ func (f exprInspector) VisitExpr(e types.PlanExpression) ExprVisitor { return nil } -// WalkExpressions traverses the plan and calls ExprWalk on any expression it finds -func WalkExpressions(v ExprVisitor, op types.PlanOperator) { +// walkExpressions traverses the plan and calls ExprWalk on any expression it finds +func walkExpressions(v ExprVisitor, op types.PlanOperator) { InspectPlan(op, func(operator types.PlanOperator) bool { if n, ok := operator.(types.ContainsExpressions); ok { for _, e := range n.Expressions() { @@ -90,13 +90,13 @@ func WalkExpressions(v ExprVisitor, op types.PlanOperator) { }) } -// InspectExpressions traverses the plan and calls WalkExpressions on any +// InspectOperatorExpressions traverses the plan and calls WalkExpressions on any // expression it finds. -func InspectExpressions(op types.PlanOperator, f exprInspector) { - WalkExpressions(f, op) +func InspectOperatorExpressions(op types.PlanOperator, f exprInspector) { + walkExpressions(f, op) } -// InspectExpression traverses expressoins in depth-first order +// InspectExpression traverses expressions in depth-first order func InspectExpression(expr types.PlanExpression, f func(expr types.PlanExpression) bool) { ExprWalk(exprInspector(f), expr) } diff --git a/sql3/planner/types/operator.go b/sql3/planner/types/operator.go index dac15002c..ef36f1650 100644 --- a/sql3/planner/types/operator.go +++ b/sql3/planner/types/operator.go @@ -67,12 +67,26 @@ type FilteredRelation interface { // Schema is the definition a set of columns from each operator type Schema []*PlannerColumn +func (r Schema) Plan() []map[string]interface{} { + result := make([]map[string]interface{}, len(r)) + for i, s := range r { + m := make(map[string]interface{}) + m["name"] = s.ColumnName + m["alias"] = s.AliasName + m["relation"] = s.RelationName + m["type"] = s.Type.TypeDescription() + result[i] = m + } + return result +} + // Row is a tuple of values type Row []interface{} // Append appends all the values in r2 to this row and returns the result func (r Row) Append(r2 Row) Row { row := make(Row, len(r)+len(r2)) + // TODO(pok) use a copy here for i := range r { row[i] = r[i] } diff --git a/sql3/planner/types/planexpression.go b/sql3/planner/types/planexpression.go index 2a49677d7..49e6a6069 100644 --- a/sql3/planner/types/planexpression.go +++ b/sql3/planner/types/planexpression.go @@ -51,7 +51,7 @@ type AggregationBuffer interface { Update(ctx context.Context, row Row) error } -// Interface to an expression that is a an aggregate +// interface to an expression that is a an aggregate type Aggregable interface { fmt.Stringer @@ -59,9 +59,10 @@ type Aggregable interface { AggType() AggregateFunctionType AggExpression() PlanExpression AggAdditionalExpr() []PlanExpression + Type() parser.ExprDataType } -// Interface to something that can be identified by a name +// interface to something that can be identified by a name type IdentifiableByName interface { Name() string } diff --git a/sql3/sql_complex_test.go b/sql3/sql_complex_test.go index 5169e5ae4..38bd3dac4 100644 --- a/sql3/sql_complex_test.go +++ b/sql3/sql_complex_test.go @@ -5,16 +5,12 @@ import ( "context" "fmt" "os" + "reflect" + "sort" "strings" "testing" "time" - pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/pql" - "github.com/featurebasedb/featurebase/v3/sql3/parser" - planner_types "github.com/featurebasedb/featurebase/v3/sql3/planner/types" - sql_test "github.com/featurebasedb/featurebase/v3/sql3/test" - "github.com/featurebasedb/featurebase/v3/test" "github.com/google/go-cmp/cmp" pilosa "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/dax" @@ -22,10 +18,9 @@ import ( sql_test "github.com/featurebasedb/featurebase/v3/sql3/test" "github.com/featurebasedb/featurebase/v3/test" "github.com/stretchr/testify/assert" -) + ) func TestPlanner_Misc(t *testing.T) { - d, err := pql.ParseDecimal("12.345678") if err != nil { t.Fatal(err) @@ -68,7 +63,7 @@ func TestPlanner_Show(t *testing.T) { } t.Run("SystemTablesInfo", func(t *testing.T) { - results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, `select name, platform, platform_version, db_version, state, node_count, shard_width, replica_count from fb_cluster_info`) + results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, `select name, platform, platform_version, db_version, state, node_count, replica_count from fb_cluster_info`) if err != nil { t.Fatal(err) } @@ -83,7 +78,6 @@ func TestPlanner_Show(t *testing.T) { wireQueryFieldString("db_version"), wireQueryFieldString("state"), wireQueryFieldInt("node_count"), - wireQueryFieldInt("shard_width"), wireQueryFieldInt("replica_count"), }, columns); diff != "" { t.Fatal(diff) @@ -102,6 +96,7 @@ func TestPlanner_Show(t *testing.T) { wireQueryFieldString("uri"), wireQueryFieldString("grpc_uri"), wireQueryFieldBool("is_primary"), + wireQueryFieldBool("space_used"), }, columns); diff != "" { t.Fatal(diff) } @@ -172,11 +167,11 @@ func TestPlanner_Show(t *testing.T) { wireQueryFieldString("_id"), wireQueryFieldString("name"), wireQueryFieldString("owner"), - wireQueryFieldString("last_updated_user"), + wireQueryFieldString("updated_by"), wireQueryFieldTimestamp("created_at"), - wireQueryFieldBool("track_existence"), + wireQueryFieldTimestamp("updated_at"), wireQueryFieldBool("keys"), - wireQueryFieldInt("shard_width"), + wireQueryFieldInt("space_used"), wireQueryFieldString("description"), }, columns); diff != "" { t.Fatal(diff) @@ -205,6 +200,42 @@ func TestPlanner_Show(t *testing.T) { } }) + t.Run("ShowCreateTableCacheTypes", func(t *testing.T) { + _, _, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, `create table iris1 ( + _id id, + speciesid id cachetype ranked size 1000 + species string cachetype ranked size 1000 + speciesids idset cachetype ranked size 1000 + speciess stringset cachetype ranked size 1000 + speciesidsq idset timequantum 'YMD' + speciessq stringset timequantum 'YMD' + ) keypartitions 12 + `) + if err != nil { + t.Fatal(err) + } + + results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, `SHOW CREATE TABLE iris1`) + if err != nil { + t.Fatal(err) + } + if len(results) != 1 { + t.Fatal(fmt.Errorf("unexpected result set length: %d", len(results))) + } + + if diff := cmp.Diff([][]interface{}{ + {string("create table iris1 (_id id, speciesid id cachetype ranked size 1000, species string cachetype ranked size 1000, speciesids idset cachetype ranked size 1000, speciess stringset cachetype ranked size 1000, speciesidsq idset timequantum 'YMD', speciessq stringset timequantum 'YMD');")}, + }, results); diff != "" { + t.Fatal(diff) + } + + if diff := cmp.Diff([]*pilosa.WireQueryField{ + wireQueryFieldString("ddl"), + }, columns); diff != "" { + t.Fatal(diff) + } + }) + t.Run("ShowColumns", func(t *testing.T) { results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, fmt.Sprintf(`SHOW COLUMNS FROM %i`, c)) if err != nil { @@ -274,6 +305,7 @@ func TestPlanner_Show(t *testing.T) { } }) } + func TestPlanner_CoverCreateTable(t *testing.T) { c := test.MustRunCluster(t, 1) defer c.Close() @@ -314,13 +346,13 @@ func TestPlanner_CoverCreateTable(t *testing.T) { // Build the create table statement based on the fields slice above. sql := "create table " + tableName + "_" + fld.name + " (_id id, " sql += fld.name + " " + fld.typ + " " + fld.constraints - sql += `) keypartitions 12 shardwidth 1024` + sql += `) keypartitions 12` // Run the create table statement. _, _, err := sql_test.MustQueryRows(t, server, sql) if assert.Error(t, err) { assert.Equal(t, fld.expErr, err.Error()) - //sql3.SQLErrConflictingColumnConstraint.Message + // sql3.SQLErrConflictingColumnConstraint.Message } } }) @@ -479,7 +511,7 @@ func TestPlanner_CoverCreateTable(t *testing.T) { } } sql += strings.Join(fieldDefs, ", ") - sql += `) keypartitions 12 shardwidth 65536` + sql += `) keypartitions 12` // Run the create table statement. results, columns, err := sql_test.MustQueryRows(t, server, sql) @@ -494,7 +526,7 @@ func TestPlanner_CoverCreateTable(t *testing.T) { schema, err := api.Schema(ctx, false) assert.NoError(t, err) - //spew.Dump(schema) + // spew.Dump(schema) // Get the fields from the FeatureBase schema. // fbFields is a map of fieldName to FieldInfo. @@ -554,7 +586,7 @@ func TestPlanner_CreateTable(t *testing.T) { stringcol string, stringsetcol stringset, idcol id, - idsetcol idset) keypartitions 12 shardwidth 65536`) + idsetcol idset) keypartitions 12`) if err != nil { t.Fatal(err) } @@ -577,11 +609,11 @@ func TestPlanner_CreateTable(t *testing.T) { stringcol string, stringsetcol stringset, idcol id, - idsetcol idset) keypartitions 12 shardwidth 65536`) + idsetcol idset) keypartitions 12`) if err == nil { t.Fatal("expected error") } else { - if err.Error() != "creating index: index already exists" { + if err.Error() != "[0:0] table 'allcoltypes' already exists" { t.Fatal(err) } } @@ -607,7 +639,7 @@ func TestPlanner_CreateTable(t *testing.T) { idcol id cachetype ranked size 1000, idsetcol idset cachetype lru, idsetcolsz idset cachetype lru size 1000, - idsetcolq idset timequantum 'YMD' ttl '24h') keypartitions 12 shardwidth 65536`) + idsetcolq idset timequantum 'YMD' ttl '24h') keypartitions 12`) if err != nil { t.Fatal(err) } @@ -731,7 +763,6 @@ func TestPlanner_AlterTable(t *testing.T) { t.Fatal(diff) } }) - } func TestPlanner_DropTable(t *testing.T) { @@ -791,7 +822,8 @@ func TestPlanner_ExpressionsInSelectListParen(t *testing.T) { Set(1, b=100) Set(2, a=20) Set(2, b=200) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -870,7 +902,8 @@ func TestPlanner_ExpressionsInSelectListLiterals(t *testing.T) { Set(1, d=10.3) Set(1, ts='2022-02-22T22:22:22Z') Set(1, str='foo') - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -1016,7 +1049,8 @@ func TestPlanner_ExpressionsInSelectListCase(t *testing.T) { Set(1, d=10.3) Set(1, ts='2022-02-22T22:22:22Z') Set(1, str='foo') - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -1099,7 +1133,8 @@ func TestPlanner_Select(t *testing.T) { Set(1, b=100) Set(2, a=20) Set(2, b=200) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -1287,7 +1322,8 @@ func TestPlanner_SelectOrderBy(t *testing.T) { Set(1, b=100) Set(2, a=20) Set(2, b=200) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -1364,7 +1400,7 @@ func TestPlanner_BulkInsert(t *testing.T) { t.Run("BulkBadWith", func(t *testing.T) { _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert into j (_id, a, b) map (0 id, 1 int, 2 int) from '/Users/bar/foo.csv' WITH UNICORNS AND RAINBOWS;`) - if err == nil || !strings.Contains(err.Error(), `expected BATCHSIZE, ROWSLIMIT, FORMAT, INPUT or HEADER_ROW, found UNICORNS`) { + if err == nil || !strings.Contains(err.Error(), `expected BATCHSIZE, ROWSLIMIT, FORMAT, INPUT, ALLOW_MISSING_VALUES or HEADER_ROW, found UNICORNS`) { t.Fatalf("unexpected error: %v", err) } }) @@ -1472,7 +1508,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkCSVBadMap", func(t *testing.T) { - _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert into j (_id, a, b) map (0 id, 1 int, 10 int) from x'1,10,20 2,11,21 3,12,22 @@ -1489,7 +1524,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkCSVFileDefault", func(t *testing.T) { - tmpfile, err := os.CreateTemp("", "BulkCSVFileDefault.*.csv") if err != nil { t.Fatal(err) @@ -1512,7 +1546,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkCSVFileNoColumns", func(t *testing.T) { - tmpfile, err := os.CreateTemp("", "BulkCSVFileNoColumns.*.csv") if err != nil { t.Fatal(err) @@ -1542,7 +1575,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkCSVFileRowsLimit", func(t *testing.T) { - tmpfile, err := os.CreateTemp("", "BulkCSVFileDefault.*.csv") if err != nil { t.Fatal(err) @@ -1579,7 +1611,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }, columns); diff != "" { t.Fatal(diff) } - }) t.Run("BulkCSVBlobDefault", func(t *testing.T) { @@ -1590,7 +1621,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkNDJsonBlobDefault", func(t *testing.T) { - _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert into j (_id, a, b) map ('$._id' id, '$.a' int, '$.b' int) from x'{ "_id": 1, "a": 10, "b": 20 } { "_id": 2, "a": 10, "b": 20 } @@ -1625,7 +1655,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkNDJsonFileDefault", func(t *testing.T) { - tmpfile, err := os.CreateTemp("", "BulkNDJsonFileDefault.*.csv") if err != nil { t.Fatal(err) @@ -1649,7 +1678,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkNDJsonFileTransform", func(t *testing.T) { - tmpfile, err := os.CreateTemp("", "BulkNDJsonFileTransform.*.csv") if err != nil { t.Fatal(err) @@ -1673,7 +1701,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkNDJsonAllTypes", func(t *testing.T) { - _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert into alltypes (_id, id1, i1, ids1, ss1, ts1, s1, b1, d1) @@ -1693,7 +1720,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkNDJsonBadJsonPath", func(t *testing.T) { - _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert into alltypes (_id, id1, i1, ids1, ss1, ts1, s1, b1, d1) @@ -1713,7 +1739,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkNDJsonBadJson", func(t *testing.T) { - _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert into alltypes (_id, id1, i1, ids1, ss1, ts1, s1, b1, d1) @@ -1733,7 +1758,6 @@ func TestPlanner_BulkInsert(t *testing.T) { }) t.Run("BulkInsertDecimals", func(t *testing.T) { - _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `create table iris ( _id id, sepallength decimal(2), @@ -1741,7 +1765,7 @@ func TestPlanner_BulkInsert(t *testing.T) { petallength decimal(2), petalwidth decimal(2), species string cachetype ranked size 1000 - ) keypartitions 12 shardwidth 65536;`) + ) keypartitions 12;`) if err != nil { t.Fatal(err) } @@ -1755,7 +1779,7 @@ func TestPlanner_BulkInsert(t *testing.T) { 'petalWidth' DECIMAL, 'species' STRING) from - '{"id": 1, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + x'{"id": 1, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} {"id": 2, "sepalLength": "4.9", "sepalWidth": "3.0", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} {"id": 3, "sepalLength": "4.7", "sepalWidth": "3.2", "petalLength": "1.3", "petalWidth": "0.2", "species": "setosa"}' with @@ -1774,7 +1798,7 @@ func TestPlanner_BulkInsert(t *testing.T) { 'petalWidth' DECIMAL(2), 'species' STRING) from - '{"id": 1, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + x'{"id": 1, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} {"id": 2, "sepalLength": "4.9", "sepalWidth": "3.0", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} {"id": 3, "sepalLength": "4.7", "sepalWidth": "3.2", "petalLength": "1.3", "petalWidth": "0.2", "species": "setosa"}' with @@ -1783,7 +1807,6 @@ func TestPlanner_BulkInsert(t *testing.T) { if err != nil { t.Fatal(err) } - }) t.Run("BulkInsertDupeColumnPlusNullsInJson", func(t *testing.T) { @@ -1810,6 +1833,213 @@ func TestPlanner_BulkInsert(t *testing.T) { } }) + t.Run("BulkInsertCSVStringIDSet", func(t *testing.T) { + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `create table greg-test ( + _id STRING, + id_col ID, + string_col STRING cachetype ranked size 1000, + int_col int, + decimal_col DECIMAL(2), + bool_col BOOL + time_col TIMESTAMP, + stringset_col STRINGSET, + ideset_col IDSET + );`) + if err != nil { + t.Fatal(err) + } + + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `BULK INSERT INTO greg-test ( + _id, + id_col, + string_col, + int_col, + decimal_col, + bool_col, + time_col, + stringset_col, + ideset_col) + map ( + 0 ID, + 1 STRING, + 2 INT, + 3 DECIMAL(2), + 4 BOOL, + 5 TIMESTAMP, + 6 STRINGSET, + 7 IDSET) + transform( + @1, + @0, + @1, + @2, + @3, + @4, + @5, + @6, + @7) + FROM + x'8924809397503602651,TEST,-123,1.12,0,2013-07-15T01:18:46Z,stringset1,1 + 64575677503602651,TEST2,321,31.2,1,2014-07-15T01:18:46Z,stringset1,1 + 8924809397503602651,TEST,-123,1.12,0,2013-07-15T01:18:46Z,stringset2,2' + with + BATCHSIZE 10000 + format 'CSV' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + }) + + t.Run("BulkInsertAllowMissingValues", func(t *testing.T) { + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `create table greg-test-amv ( + _id STRING, + id_col ID, + string_col STRING cachetype ranked size 1000, + int_col int, + decimal_col DECIMAL(2), + bool_col BOOL + time_col TIMESTAMP, + stringset_col STRINGSET, + ideset_col IDSET + );`) + if err != nil { + t.Fatal(err) + } + + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `BULK INSERT INTO greg-test-amv ( + _id, + id_col, + string_col, + int_col, + decimal_col, + bool_col, + time_col, + stringset_col, + ideset_col) + map ( + '$.id_col' ID, + '$.string_col' STRING, + '$.int_col' INT, + '$.decimal_col' DECIMAL(2), + '$.bool_col' BOOL, + '$.time_col' TIMESTAMP, + '$.stringset_col' STRINGSET, + '$.ideset_col' IDSET) + transform( + @1, + @0, + @1, + @2, + @3, + @4, + @5, + @6, + @7) + FROM x'{"id_col": "3", "string_col": "TEST", "decimal_col": "1.12", "bool_col": false, "time_col": "2013-07-15T01:18:46Z", "stringset_col": "stringset1","ideset_col": 1} + {"id_col": "4", "string_col": "TEST2", "decimal_col": "1.12", "bool_col": false, "time_col": "2013-07-15T01:18:46Z", "stringset_col": ["stringset1","stringset3"],"ideset_col": [1,2]} + {"id_col": "5", "string_col": "TEST", "int_col": "321", "decimal_col": "12.1", "bool_col": 1, "time_col": "2014-07-15T01:18:46Z", "stringset_col": "stringset2","ideset_col": [1,3]}' + with + BATCHSIZE 10000 + format 'NDJSON' + input 'STREAM' + allow_missing_values;`) + if err != nil { + t.Fatal(err) + } + }) + t.Run("BulkInsertNDJSONStringIDSet", func(t *testing.T) { + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `create table greg-test-01 ( + _id STRING, + id_col ID, + string_col STRING cachetype ranked size 1000, + int_col int, + decimal_col DECIMAL(2), + bool_col BOOL + time_col TIMESTAMP, + stringset_col STRINGSET, + ideset_col IDSET + );`) + if err != nil { + t.Fatal(err) + } + + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `BULK INSERT INTO greg-test-01 ( + _id, + id_col, + string_col, + int_col, + decimal_col, + bool_col, + time_col, + stringset_col, + ideset_col) + map ( + 'id_col' ID, + 'string_col' STRING, + 'int_col' INT, + 'decimal_col' DECIMAL(2), + 'bool_col' BOOL, + 'time_col' TIMESTAMP, + 'stringset_col' STRINGSET, + 'ideset_col' IDSET) + transform( + @1, + @0, + @1, + @2, + @3, + @4, + @5, + @6, + @7) + FROM '{"id_col": "3", "string_col": "TEST", "int_col": "-123", "decimal_col": "1.12", "bool_col": false, "time_col": "2013-07-15T01:18:46Z", "stringset_col": "stringset1","ideset_col": "1"}' + with + BATCHSIZE 10000 + format 'NDJSON' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `BULK INSERT INTO greg-test-01 ( + _id, + id_col, + string_col, + int_col, + decimal_col, + bool_col, + time_col, + stringset_col, + ideset_col) + map ( + 'id_col' ID, + 'string_col' STRING, + 'int_col' INT, + 'decimal_col' DECIMAL(2), + 'bool_col' BOOL, + 'time_col' TIMESTAMP, + 'stringset_col' STRINGSET, + 'ideset_col' IDSET) + transform( + @1, + @0, + @1, + @2, + @3, + @4, + @5, + @6, + @7) + FROM '{"id_col": "3", "string_col": "TEST", "int_col": "-123", "decimal_col": "1.12", "bool_col": false, "time_col": "2013-07-15T01:18:46Z", "stringset_col": "stringset1","ideset_col": ["1","2"]}' + with + BATCHSIZE 10000 + format 'NDJSON' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + }) } func TestPlanner_SelectSelectSource(t *testing.T) { @@ -1835,7 +2065,8 @@ func TestPlanner_SelectSelectSource(t *testing.T) { Set(1, b=100) Set(2, a=20) Set(2, b=200) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -1915,7 +2146,8 @@ func TestPlanner_In(t *testing.T) { Set(1, a=10) Set(2, a=20) Set(3, a=30) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -1930,15 +2162,16 @@ func TestPlanner_In(t *testing.T) { Set(3, parentid=2) Set(3, x=300) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } t.Run("Count", func(t *testing.T) { t.Skip("Need to add join conditions to get this to pass") results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, fmt.Sprintf(`SELECT %j._id, %j.a, %k._id, %k.parentid, %k.x FROM %j INNER JOIN %k ON %j._id = %k.parentid`, c, c, c, c, c, c, c, c, c)) - //results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, fmt.Sprintf(`SELECT COUNT(*) FROM %j INNER JOIN %k ON %j._id = %k.parentid`, c, c, c, c)) - //results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, fmt.Sprintf(`SELECT a FROM %j where a = 20`, c)) // SELECT COUNT(*) FROM %j INNER JOIN %k ON %j._id = %k.parentid + // results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, fmt.Sprintf(`SELECT COUNT(*) FROM %j INNER JOIN %k ON %j._id = %k.parentid`, c, c, c, c)) + // results, columns, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, fmt.Sprintf(`SELECT a FROM %j where a = 20`, c)) // SELECT COUNT(*) FROM %j INNER JOIN %k ON %j._id = %k.parentid if err != nil { t.Fatal(err) } @@ -2046,7 +2279,8 @@ func TestPlanner_Distinct(t *testing.T) { Set(1, a=10) Set(2, a=20) Set(3, a=30) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -2061,7 +2295,8 @@ func TestPlanner_Distinct(t *testing.T) { Set(3, parentid=2) Set(3, x=300) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -2158,7 +2393,8 @@ func TestPlanner_SelectTop(t *testing.T) { Set(1, b=100) Set(2, b=200) Set(3, b=300) - `}); err != nil { + `, + }); err != nil { t.Fatal(err) } @@ -2216,6 +2452,7 @@ func wireQueryFieldID(name string) *pilosa.WireQueryField { BaseType: dax.BaseTypeID, } } + func wireQueryFieldBool(name string) *pilosa.WireQueryField { return &pilosa.WireQueryField{ Name: dax.FieldName(name), @@ -2223,6 +2460,7 @@ func wireQueryFieldBool(name string) *pilosa.WireQueryField { BaseType: dax.BaseTypeBool, } } + func wireQueryFieldString(name string) *pilosa.WireQueryField { return &pilosa.WireQueryField{ Name: dax.FieldName(name), @@ -2230,6 +2468,7 @@ func wireQueryFieldString(name string) *pilosa.WireQueryField { BaseType: dax.BaseTypeString, } } + func wireQueryFieldInt(name string) *pilosa.WireQueryField { return &pilosa.WireQueryField{ Name: dax.FieldName(name), @@ -2237,6 +2476,7 @@ func wireQueryFieldInt(name string) *pilosa.WireQueryField { BaseType: dax.BaseTypeInt, } } + func wireQueryFieldTimestamp(name string) *pilosa.WireQueryField { return &pilosa.WireQueryField{ Name: dax.FieldName(name), @@ -2244,6 +2484,7 @@ func wireQueryFieldTimestamp(name string) *pilosa.WireQueryField { BaseType: dax.BaseTypeTimestamp, } } + func wireQueryFieldDecimal(name string, scale int64) *pilosa.WireQueryField { return &pilosa.WireQueryField{ Name: dax.FieldName(name), @@ -2254,3 +2495,102 @@ func wireQueryFieldDecimal(name string, scale int64) *pilosa.WireQueryField { }, } } + +// This test verifies that data sent to all nodes shows up in the results +func TestPlanner_BulkInsert_FB1831(t *testing.T) { + c := test.MustRunCluster(t, 3) + defer c.Close() + + _, _, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, `create table iris (_id id, sepallength decimal(2), sepalwidth decimal(2), petallength decimal(2), petalwidth decimal(2), species string cachetype ranked size 1000);`) + if err != nil { + t.Fatal(err) + } + + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert + into iris (_id, sepallength, sepalwidth, petallength, petalwidth, species) + map('id' id, + 'sepalLength' DECIMAL(2), + 'sepalWidth' DECIMAL(2), + 'petalLength' DECIMAL(2), + 'petalWidth' DECIMAL(2), + 'species' STRING) + from + x'{"id": 1, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 2, "sepalLength": "4.9", "sepalWidth": "3.0", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 3, "sepalLength": "4.7", "sepalWidth": "3.2", "petalLength": "1.3", "petalWidth": "0.2", "species": "setosa"}' + with + format 'NDJSON' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + _, _, err = sql_test.MustQueryRows(t, c.GetNode(1).Server, `bulk insert + into iris (_id, sepallength, sepalwidth, petallength, petalwidth, species) + map('id' id, + 'sepalLength' DECIMAL(2), + 'sepalWidth' DECIMAL(2), + 'petalLength' DECIMAL(2), + 'petalWidth' DECIMAL(2), + 'species' STRING) + from + x'{"id": 4, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 5, "sepalLength": "4.9", "sepalWidth": "3.0", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 6, "sepalLength": "4.7", "sepalWidth": "3.2", "petalLength": "1.3", "petalWidth": "0.2", "species": "setosa"}' + with + format 'NDJSON' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + _, _, err = sql_test.MustQueryRows(t, c.GetNode(2).Server, `bulk insert + into iris (_id, sepallength, sepalwidth, petallength, petalwidth, species) + map('id' id, + 'sepalLength' DECIMAL(2), + 'sepalWidth' DECIMAL(2), + 'petalLength' DECIMAL(2), + 'petalWidth' DECIMAL(2), + 'species' STRING) + from + x'{"id": 7, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 8, "sepalLength": "4.9", "sepalWidth": "3.0", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 9, "sepalLength": "4.7", "sepalWidth": "3.2", "petalLength": "1.3", "petalWidth": "0.2", "species": "setosa"}' + with + format 'NDJSON' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + _, _, err = sql_test.MustQueryRows(t, c.GetNode(0).Server, `bulk insert + into iris (_id, sepallength, sepalwidth, petallength, petalwidth, species) + map('id' id, + 'sepalLength' DECIMAL(2), + 'sepalWidth' DECIMAL(2), + 'petalLength' DECIMAL(2), + 'petalWidth' DECIMAL(2), + 'species' STRING) + from + x'{"id": 1048577, "sepalLength": "5.1", "sepalWidth": "3.5", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 2097153, "sepalLength": "4.9", "sepalWidth": "3.0", "petalLength": "1.4", "petalWidth": "0.2", "species": "setosa"} + {"id": 3145729, "sepalLength": "4.7", "sepalWidth": "3.2", "petalLength": "1.3", "petalWidth": "0.2", "species": "setosa"}' + with + format 'NDJSON' + input 'STREAM';`) + if err != nil { + t.Fatal(err) + } + results, _, err := sql_test.MustQueryRows(t, c.GetNode(0).Server, `select _id from iris`) + if err != nil { + t.Fatal(err) + } + got := make([]int64, 0) + for i := range results { + got = append(got, results[i][0].(int64)) + } + sort.Slice(got, func(i, j int) bool { + return got[i] < got[j] + }) + expected := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 1048577, 2097153, 3145729} + if !reflect.DeepEqual(got, expected) { + t.Fatal("Expecting to be equal") + } +} diff --git a/sql3/test/defs/defs.go b/sql3/test/defs/defs.go index c6c8d0d69..e25e14510 100644 --- a/sql3/test/defs/defs.go +++ b/sql3/test/defs/defs.go @@ -15,10 +15,16 @@ var TableTests []TableTest = []TableTest{ selectTests, selectKeyedTests, + selectHavingTests, orderByTests, + distinctTests, + + subqueryTests, topTests, + deleteTests, + setLiteralTests, setFunctionTests, setParameterTests, diff --git a/sql3/test/defs/defs_create_table.go b/sql3/test/defs/defs_create_table.go index 7d9b8f22f..724dd03f2 100644 --- a/sql3/test/defs/defs_create_table.go +++ b/sql3/test/defs/defs_create_table.go @@ -17,33 +17,6 @@ var createTable = TableTest{ ), ExpErr: "invalid value '10001' for key partitions (should be a number between 1-10000)", }, - { - name: "shardWidthSetTo0", - SQLs: sqls( - "create table foo (_id id, i1 int) shardwidth 0", - ), - ExpErr: "invalid value '0' for shardwidth (should be a number that is a power of 2 and greater or equal to 2^16)", - }, - { - name: "shardWidthSetTo11", - SQLs: sqls( - "create table foo (_id id, i1 int) shardwidth 11", - ), - ExpErr: "invalid value '11' for shardwidth (should be a number that is a power of 2 and greater or equal to 2^16)", - }, - { - name: "shardWidthSetTo11", - SQLs: sqls( - "create table foo (_id id, i1 int) shardwidth 32", - ), - ExpErr: "invalid value '32' for shardwidth (should be a number that is a power of 2 and greater or equal to 2^16)", - }, - { - name: "shardWidthSetTo131072", - SQLs: sqls( - "create table foo (_id id, i1 int) shardwidth 131072", - ), - }, { name: "commentInt", SQLs: sqls( diff --git a/sql3/test/defs/defs_delete.go b/sql3/test/defs/defs_delete.go new file mode 100644 index 000000000..68d2ff851 --- /dev/null +++ b/sql3/test/defs/defs_delete.go @@ -0,0 +1,389 @@ +// Copyright 2021 Molecula Corp. All rights reserved. +package defs + +import "time" + +func earlyMay2022() time.Time { + tm, err := time.ParseInLocation(time.RFC3339, "2022-05-05T13:00:00+00:00", time.UTC) + if err != nil { + panic(err.Error()) + } + return tm +} + +func lateMay2022() time.Time { + tm, err := time.ParseInLocation(time.RFC3339, "2022-05-28T13:00:00+00:00", time.UTC) + if err != nil { + panic(err.Error()) + } + return tm +} + +// DELETE tests +var deleteTests = TableTest{ + name: "delete_tests", + Table: tbl( + "del_all_types", + srcHdrs( + srcHdr("_id", fldTypeID), + srcHdr("i1", fldTypeInt, "min 0", "max 1000"), + srcHdr("b1", fldTypeBool), + srcHdr("d1", fldTypeDecimal2), + srcHdr("id1", fldTypeID), + srcHdr("ids1", fldTypeIDSet), + srcHdr("s1", fldTypeString), + srcHdr("ss1", fldTypeStringSet), + srcHdr("t1", fldTypeTimestamp), + ), + srcRows( + srcRow(int64(1), int64(1000), bool(true), float64(12.34), int64(20), []int64{101, 102}, string("foo"), []string{"101", "102"}, earlyMay2022()), + srcRow(int64(2), int64(1000), bool(true), float64(12.34), int64(20), []int64{101, 102}, string("foo"), []string{"101", "102"}, earlyMay2022()), + srcRow(int64(3), int64(1000), bool(true), float64(12.34), int64(20), []int64{101, 102}, string("foo"), []string{"101", "102"}, earlyMay2022()), + srcRow(int64(4), int64(1000), bool(true), float64(12.34), int64(20), []int64{101, 102}, string("foo"), []string{"101", "102"}, earlyMay2022()), + srcRow(int64(5), int64(1000), bool(true), float64(12.34), int64(20), []int64{101, 102}, string("foo"), []string{"101", "102"}, lateMay2022()), + ), + ), + SQLTests: []SQLTest{ + { + SQLs: sqls( + "delete from del_all_types where _id = 1;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + // ordering is important here - this test validates the previous delete happened + SQLs: sqls( + "select _id from del_all_types where _id = 1;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where _id in (2, 3);", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + // ordering is important here - this test validates the previous delete happened + SQLs: sqls( + "select _id from del_all_types where _id = 2 or _id = 3;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // delete with in + { + SQLs: sqls( + "create table sub_query (_id id, i1 int min 0 max 1000);", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "insert into sub_query values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6);", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where _id in (select _id from sub_query where i1 > 3) and i1 > 10;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where _id > 4;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // dates + { + SQLs: sqls( + `insert into del_all_types + values + (1,1000,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,1000,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (3,1000,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (4,1000,true,12.34,20,[101,102],'foo',['101','102'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where t1 > '2010-01-01T00:00:00Z';", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where _id > 1;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // ints + { + SQLs: sqls( + `insert into del_all_types + values + (1,100,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,200,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (3,300,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (4,400,true,12.34,20,[101,102],'foo',['101','102'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where i1 > 200;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where i1 > 200;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where i1 < 300;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // bool + { + SQLs: sqls( + `insert into del_all_types + values + (1,100,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,200,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (3,300,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (4,400,true,12.34,20,[101,102],'foo',['101','102'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where b1 = true;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // id sets + { + SQLs: sqls( + `insert into del_all_types + values + (1,100,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,200,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (3,300,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (4,400,true,12.34,20,[101,102,103],'foo',['101','102'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where setcontains(ids1, 103);", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where _id = 4;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // compound expressions + { + SQLs: sqls( + `insert into del_all_types + values + (1,100,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,200,true,12.35,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (3,300,true,12.36,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (4,400,true,12.37,20,[101,102,103],'foo',['101','102'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where d1 = 12.36 and i1 = 300;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where _id = 3;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where d1 = 12.34 or i1 = 200;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where _id = 1 or _id = 2;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // scalar function filters + { + SQLs: sqls( + `insert into del_all_types + values + (1,100,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,200,true,12.35,20,[101,102],'bar',['101','102'],'2012-11-01T22:08:41Z'), + (3,300,true,12.36,20,[101,102],'zoo',['101','102'],'2012-11-01T22:08:41Z'), + (4,400,true,12.37,20,[101,102,103],'raz',['101','102','103'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types where substring(s1, 0, 1) = 'f';", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types where _id = 1;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + + // delete everything + { + SQLs: sqls( + `insert into del_all_types + values + (1,100,true,12.34,20,[101,102],'foo',['101','102'],'2010-01-01T00:00:00Z'), + (2,200,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (3,300,true,12.34,20,[101,102],'foo',['101','102'],'2012-11-01T22:08:41Z'), + (4,400,true,12.34,20,[101,102],'foo',['101','102'],'2020-01-01T00:00:00Z');`, + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "delete from del_all_types;", + ), + ExpHdrs: hdrs(), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select _id from del_all_types;", + ), + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, + }, + }, +} diff --git a/sql3/test/defs/defs_distinct.go b/sql3/test/defs/defs_distinct.go new file mode 100644 index 000000000..9bd80ca24 --- /dev/null +++ b/sql3/test/defs/defs_distinct.go @@ -0,0 +1,157 @@ +package defs + +import "github.com/featurebasedb/featurebase/v3/pql" + +// distinct tests +var distinctTests = TableTest{ + Table: tbl( + "distinct_test", + srcHdrs( + srcHdr("_id", fldTypeID), + srcHdr("i1", fldTypeInt), + srcHdr("b1", fldTypeBool), + srcHdr("id1", fldTypeID), + srcHdr("ids1", fldTypeIDSet), + srcHdr("d1", fldTypeDecimal2), + srcHdr("s1", fldTypeString), + srcHdr("ss1", fldTypeStringSet), + srcHdr("ts1", fldTypeTimestamp), + ), + srcRows( + srcRow(int64(1), int64(10), bool(false), int64(1), []int64{10, 20, 30}, float64(10.00), string("10"), []string{"10", "20", "30"}, knownTimestamp()), + srcRow(int64(2), int64(20), bool(true), int64(2), []int64{11, 21, 31}, float64(20.00), string("20"), []string{"11", "21", "31"}, knownTimestamp()), + srcRow(int64(3), int64(30), bool(false), int64(3), []int64{12, 22, 32}, float64(30.00), string("30"), []string{"12", "22", "32"}, knownTimestamp()), + srcRow(int64(4), int64(10), bool(false), int64(1), []int64{10, 20, 30}, float64(10.00), string("10"), []string{"10", "20", "30"}, knownTimestamp()), + srcRow(int64(5), int64(20), bool(true), int64(2), []int64{11, 21, 31}, float64(20.00), string("20"), []string{"11", "21", "31"}, knownTimestamp()), + srcRow(int64(6), int64(30), bool(false), int64(3), []int64{12, 22, 32}, float64(30.00), string("30"), []string{"12", "22", "32"}, knownTimestamp()), + ), + ), + SQLTests: []SQLTest{ + { + SQLs: sqls( + "select distinct i1, b1, id1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("i1", fldTypeInt), + hdr("b1", fldTypeBool), + hdr("id1", fldTypeID), + ), + ExpRows: rows( + row(int64(10), bool(false), int64(1)), + row(int64(20), bool(true), int64(2)), + row(int64(30), bool(false), int64(3)), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct i1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("i1", fldTypeInt), + ), + ExpRows: rows( + row(int64(10)), + row(int64(20)), + row(int64(30)), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct b1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("b1", fldTypeBool), + ), + ExpRows: rows( + row(bool(false)), + row(bool(true)), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct id1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("id1", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + row(int64(2)), + row(int64(3)), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct ids1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("ids1", fldTypeIDSet), + ), + ExpRows: rows( + row([]int64{10, 20, 30}), + row([]int64{11, 21, 31}), + row([]int64{12, 22, 32}), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct d1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("d1", fldTypeDecimal2), + ), + ExpRows: rows( + row(pql.NewDecimal(1000, 2)), + row(pql.NewDecimal(2000, 2)), + row(pql.NewDecimal(3000, 2)), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct s1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("s1", fldTypeString), + ), + ExpRows: rows( + row(string("10")), + row(string("20")), + row(string("30")), + ), + Compare: CompareExactUnordered, + }, + { + SQLs: sqls( + "select distinct ss1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("ss1", fldTypeStringSet), + ), + ExpRows: rows( + row([]string{"10", "20", "30"}), + row([]string{"11", "21", "31"}), + row([]string{"12", "22", "32"}), + ), + Compare: CompareExactUnordered, + SortStringKeys: true, + }, + { + SQLs: sqls( + "select distinct ts1 from distinct_test", + ), + ExpHdrs: hdrs( + hdr("ts1", fldTypeTimestamp), + ), + ExpRows: rows( + row(knownTimestamp()), + ), + Compare: CompareExactUnordered, + }, + }, +} diff --git a/sql3/test/defs/defs_having.go b/sql3/test/defs/defs_having.go new file mode 100644 index 000000000..a9aa2abec --- /dev/null +++ b/sql3/test/defs/defs_having.go @@ -0,0 +1,43 @@ +package defs + +var selectHavingTests = TableTest{ + name: "select-having", + Table: tbl( + "having_test", + srcHdrs( + srcHdr("_id", fldTypeID), + srcHdr("an_int", fldTypeInt, "min 0", "max 100"), + srcHdr("an_id_set", fldTypeIDSet), + srcHdr("an_id", fldTypeID), + srcHdr("a_string", fldTypeString), + srcHdr("a_string_set", fldTypeStringSet), + srcHdr("a_decimal", fldTypeDecimal2), + ), + srcRows( + srcRow(int64(1), int64(11), []int64{11, 12, 13}, int64(101), "str1", []string{"a1", "b1", "c1"}, float64(123.45)), + srcRow(int64(2), int64(22), []int64{21, 22, 23}, int64(201), "str2", []string{"a2", "b2", "c2"}, float64(234.56)), + srcRow(int64(3), int64(33), []int64{31, 32, 33}, int64(301), "str3", []string{"a3", "b3", "c3"}, float64(345.67)), + srcRow(int64(4), int64(44), []int64{41, 42, 43}, int64(401), "str4", []string{"a4", "b4", "c4"}, float64(456.78)), + ), + ), + SQLTests: []SQLTest{ + { + name: "select-having", + SQLs: sqls( + "select count(*), an_int from having_test group by an_int having count(*) > 0", + ), + ExpHdrs: hdrs( + hdr("", fldTypeInt), + hdr("an_int", fldTypeInt), + ), + ExpRows: rows( + row(int64(1), int64(11)), + row(int64(1), int64(22)), + row(int64(1), int64(33)), + row(int64(1), int64(44)), + ), + Compare: CompareExactUnordered, + SortStringKeys: true, + }, + }, +} diff --git a/sql3/test/defs/defs_join.go b/sql3/test/defs/defs_join.go index 49d156070..d1269fb18 100644 --- a/sql3/test/defs/defs_join.go +++ b/sql3/test/defs/defs_join.go @@ -28,7 +28,7 @@ var joinTestsOrders = TableTest{ "orders", srcHdrs( srcHdr("_id", fldTypeID), - srcHdr("userid", fldTypeID), + srcHdr("userid", fldTypeInt), srcHdr("price", fldTypeDecimal2), ), srcRows( @@ -103,4 +103,18 @@ var joinTests = TableTest{ Compare: CompareExactOrdered, }, }, + PQLTests: []PQLTest{ + { + name: "distinctjoin", + Table: "users", + PQLs: []string{"Intersect(Distinct(Row(price > 10), index=orders, field=userid))"}, + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + row(int64(2)), + ), + }, + }, } diff --git a/sql3/test/defs/defs_null.go b/sql3/test/defs/defs_null.go index 5b65478d1..1d8ec6c5d 100644 --- a/sql3/test/defs/defs_null.go +++ b/sql3/test/defs/defs_null.go @@ -302,13 +302,24 @@ var nullFilterTests = TableTest{ SQLs: sqls( "select _id from null_filter_all_types where _id is null", ), - ExpErr: "'_id' column cannot be used in a is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows(), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where _id is not null", ), - ExpErr: "'_id' column cannot be used in a is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + row(int64(2)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( @@ -338,13 +349,25 @@ var nullFilterTests = TableTest{ SQLs: sqls( "select _id from null_filter_all_types where b1 is null", ), - ExpErr: "unsupported type 'bool' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where b1 is not null", ), - ExpErr: "unsupported type 'bool' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(2)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( @@ -374,49 +397,97 @@ var nullFilterTests = TableTest{ SQLs: sqls( "select _id from null_filter_all_types where id1 is null", ), - ExpErr: "unsupported type 'id' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where id1 is not null", ), - ExpErr: "unsupported type 'id' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(2)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where ids1 is null", ), - ExpErr: "unsupported type 'idset' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where ids1 is not null", ), - ExpErr: "unsupported type 'idset' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(2)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where s1 is null", ), - ExpErr: "unsupported type 'string' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where s1 is not null", ), - ExpErr: "unsupported type 'string' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(2)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where ss1 is null", ), - ExpErr: "unsupported type 'stringset' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(1)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( "select _id from null_filter_all_types where ss1 is not null", ), - ExpErr: "unsupported type 'stringset' for is/is not null filter expression", + ExpHdrs: hdrs( + hdr("_id", fldTypeID), + ), + ExpRows: rows( + row(int64(2)), + ), + Compare: CompareExactUnordered, }, { SQLs: sqls( diff --git a/sql3/test/defs/defs_string_functions.go b/sql3/test/defs/defs_string_functions.go index 596199cd3..1790fd8e4 100644 --- a/sql3/test/defs/defs_string_functions.go +++ b/sql3/test/defs/defs_string_functions.go @@ -17,6 +17,32 @@ var stringScalarFunctionsTests = TableTest{ ), ), SQLTests: []SQLTest{ + { + name: "ReverseNull", + SQLs: sqls( + "select reverse(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactUnordered, + }, + { + name: "ReverseEmpty", + SQLs: sqls( + "select reverse('')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("")), + ), + Compare: CompareExactUnordered, + }, { name: "ReverseString", SQLs: sqls( @@ -44,44 +70,58 @@ var stringScalarFunctionsTests = TableTest{ Compare: CompareExactUnordered, }, { - name: "SubstringPositiveIndex", + name: "SubstringNull", SQLs: sqls( - "select substring('testing', 1, 3)", + "select substring(null, 1, 3)", ), ExpHdrs: hdrs( hdr("", fldTypeString), ), ExpRows: rows( - row(string("est")), + row(nil), ), Compare: CompareExactUnordered, }, { - name: "SubstringNegativeIndex", + name: "SubstringNullInt", SQLs: sqls( - "select substring('testing', -10, 14)", + "select substring('some_string', null, 3)", ), ExpHdrs: hdrs( hdr("", fldTypeString), ), ExpRows: rows( - row(string("test")), + row(nil), ), Compare: CompareExactUnordered, }, { - name: "SubstringNoLength", + name: "SubstringPositiveIndex", SQLs: sqls( - "select substring('testing', -5)", + "select substring('testing', 1, 3)", ), ExpHdrs: hdrs( hdr("", fldTypeString), ), ExpRows: rows( - row(string("testing")), + row(string("est")), ), Compare: CompareExactUnordered, }, + { + name: "SubstringNegativeIndex", + SQLs: sqls( + "select substring('testing', -10, 14)", + ), + ExpErr: "[0:0] value '-10' out of range", + }, + { + name: "SubstringNoLength", + SQLs: sqls( + "select substring('testing', -5)", + ), + ExpErr: "[0:0] value '-5' out of range", + }, { name: "ReverseSubstring", SQLs: sqls( @@ -108,6 +148,131 @@ var stringScalarFunctionsTests = TableTest{ ), Compare: CompareExactUnordered, }, + { + name: "StringSplitNull", + SQLs: sqls( + "select stringsplit(null, ',')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactUnordered, + }, + { + name: "StringSplitNoPos", + SQLs: sqls( + "select stringsplit('string,split', ',')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("string")), + ), + Compare: CompareExactUnordered, + }, + { + name: "StringSplitPos", + SQLs: sqls( + "select stringsplit('string,split,now', stringsplit(',mid,', 'mid', 1), 2)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("now")), + ), + Compare: CompareExactUnordered, + }, + { + name: "CharNull", + SQLs: sqls( + "select char(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactUnordered, + }, + { + name: "CharInt", + SQLs: sqls( + "select char(82)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("R")), + ), + Compare: CompareExactUnordered, + }, + { + name: "CharString", + SQLs: sqls( + "select char('R')", + ), + ExpErr: "integer expression expected", + }, + { + name: "ASCIINull", + SQLs: sqls( + "select ascii(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeInt), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactUnordered, + }, + { + name: "ASCIILengthMisMatch", + SQLs: sqls( + "select ascii('longer')", + ), + ExpErr: "[0:0] value 'longer' should be of the length 1", + }, + { + name: "ASCIIString", + SQLs: sqls( + "select ascii('R')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeInt), + ), + ExpRows: rows( + row(int64(82)), + ), + Compare: CompareExactUnordered, + }, + { + name: "ASCIIInt", + SQLs: sqls( + "select ascii(32)", + ), + ExpErr: "string expression expected", + }, + { + name: "UpperNull", + SQLs: sqls( + "select upper(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, { name: "ConvertingStringtoUpper", SQLs: sqls( @@ -135,5 +300,432 @@ var stringScalarFunctionsTests = TableTest{ ), ExpErr: "string expression expected", }, + { + name: "LowerNull", + SQLs: sqls( + "select lower(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, + { + name: "StringLower", + SQLs: sqls( + "select lower('AaBbCcDdEeFfGg-_0123')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("aabbccddeeffgg-_0123")), + ), + Compare: CompareExactOrdered, + }, + { + name: "IncorrectArgumentsforLower", + SQLs: sqls( + "select lower('LOWER','lower')", + ), + ExpErr: "'lower': count of formal parameters (1) does not match count of actual parameters (2)", + }, + { + name: "IncorrectInputforLower", + SQLs: sqls( + "select lower(1234)", + ), + ExpErr: "string expression expected", + }, + { + name: "ReplaceAllNullString", + SQLs: sqls( + "select replaceall(null,'data','feature')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactUnordered, + }, + { + name: "ReplaceAllNullArg", + SQLs: sqls( + "select replaceall('hello database',null,'feature')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactUnordered, + }, + { + name: "ReplaceAllString", + SQLs: sqls( + "select replaceall('hello database','data','feature')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("hello featurebase")), + ), + Compare: CompareExactUnordered, + }, + { + name: "ReplaceAllStringMultiple", + SQLs: sqls( + "select replaceall('Buffalo Buffalo buffalo buffalo Buffalo', 'Buffalo', 'Buffalo buffalo');", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("Buffalo buffalo Buffalo buffalo buffalo buffalo Buffalo buffalo")), + ), + Compare: CompareExactUnordered, + }, + { + name: "ReplaceAllReverseSubstringUpper", + SQLs: sqls( + "select replaceall(reverse('gnitset'),substring('testing',4),upper('ed'));", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("testED")), + ), + Compare: CompareExactUnordered, + }, + { + name: "IncorrectArgumentsReplaceAll", + SQLs: sqls( + "select replaceall('ab','b')", + ), + ExpErr: "'replaceall': count of formal parameters (3) does not match count of actual parameters (2)", + }, + { + name: "IncorrectInputforReplaceAll", + SQLs: sqls( + "select replaceall('test','e',1)", + ), + ExpErr: "string expression expected", + }, + { + name: "TrimNull", + SQLs: sqls( + "select trim(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, + { + name: "RemovingWhitespacefromStringusingTrim", + SQLs: sqls( + "select trim(' this ')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("this")), + ), + Compare: CompareExactOrdered, + }, + { + name: "IncorrectArgumentsforTrim", + SQLs: sqls( + "select trim(' a ','b')", + ), + ExpErr: "'trim': count of formal parameters (1) does not match count of actual parameters (2)", + }, + { + name: "IncorrectInputforTrim", + SQLs: sqls( + "select trim(1)", + ), + ExpErr: "string expression expected", + }, + //Prefix() + { + name: "PrefixNull", + SQLs: sqls( + "SELECT PREFIX(NULL, 34)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, + { + name: "IncorrectArgumentsforPrefix", + SQLs: sqls( + "SELECT PREFIX('string')", + ), + ExpErr: "'PREFIX': count of formal parameters (2) does not match count of actual parameters (1)", + }, + { + name: "IncorrectInputforPrefix", + SQLs: sqls( + "SELECT PREFIX(1,'string')", + ), + ExpErr: "string expression expected", + }, + { + name: "LengthLargerThanStringforPrefix", + SQLs: sqls( + "SELECT PREFIX('string', 7)", + ), + ExpErr: "[0:0] value '7' out of range", + }, + { + name: "NegativeLengthforPrefix", + SQLs: sqls( + "SELECT PREFIX('string', -1)", + ), + ExpErr: "[0:0] value '-1' out of range", + }, + { + name: "ZeroLengthforPrefix", + SQLs: sqls( + "SELECT PREFIX('string', 0)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("")), + ), + Compare: CompareExactOrdered, + }, + { + name: "GetFirstThreeforPrefix", + SQLs: sqls( + "SELECT PREFIX('string', 3)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("str")), + ), + Compare: CompareExactOrdered, + }, + { + name: "FullStringforPrefix", + SQLs: sqls( + "SELECT PREFIX('string', 6)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("string")), + ), + Compare: CompareExactOrdered, + }, + //Suffix() + { + name: "SuffixNull", + SQLs: sqls( + "SELECT SUFFIX(NULL, 23)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, + { + name: "IncorrectArgumentsforSuffix", + SQLs: sqls( + "SELECT SUFFIX('string')", + ), + ExpErr: "'SUFFIX': count of formal parameters (2) does not match count of actual parameters (1)", + }, + { + name: "IncorrectInputforSuffix", + SQLs: sqls( + "SELECT SUFFIX(1,'string')", + ), + ExpErr: "string expression expected", + }, + { + name: "LengthLargerThanStringforSuffix", + SQLs: sqls( + "SELECT SUFFIX('string', 7)", + ), + ExpErr: "[0:0] value '7' out of range", + }, + { + name: "NegativeLengthforSuffix", + SQLs: sqls( + "SELECT SUFFIX('string', -1)", + ), + ExpErr: "[0:0] value '-1' out of range", + }, + { + name: "ZeroLengthforSuffix", + SQLs: sqls( + "SELECT SUFFIX('string', 0)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("")), + ), + Compare: CompareExactOrdered, + }, + { + name: "GetFirstThreeforSuffix", + SQLs: sqls( + "SELECT SUFFIX('string', 3)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("ing")), + ), + Compare: CompareExactOrdered, + }, + { + name: "FullStringforSuffix", + SQLs: sqls( + "SELECT SUFFIX('string', 6)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("string")), + ), + Compare: CompareExactOrdered, + }, + { + name: "RTrimNull", + SQLs: sqls( + "select rtrim(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, + { + name: "RemovingTrailingspacefromStringusingRTrim", + SQLs: sqls( + "select rtrim(' this ')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string(" this")), + ), + Compare: CompareExactOrdered, + }, + { + name: "IncorrectArgumentsforRTrim", + SQLs: sqls( + "select rtrim(' a ',' b ')", + ), + ExpErr: "'rtrim': count of formal parameters (1) does not match count of actual parameters (2)", + }, + { + name: "IncorrectInputforRTrim", + SQLs: sqls( + "select rtrim(1)", + ), + ExpErr: "string expression expected", + }, + { + name: "LTrimNull", + SQLs: sqls( + "select ltrim(null)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(nil), + ), + Compare: CompareExactOrdered, + }, + { + name: "RemovingLeadingspacefromStringusingLTrim", + SQLs: sqls( + "select ltrim(' this ')", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("this ")), + ), + Compare: CompareExactOrdered, + }, + { + name: "IncorrectArgumentsforLTrim", + SQLs: sqls( + "select ltrim(' a ',' b ')", + ), + ExpErr: "'ltrim': count of formal parameters (1) does not match count of actual parameters (2)", + }, + { + name: "IncorrectInputforLTrim", + SQLs: sqls( + "select ltrim(1)", + ), + ExpErr: "string expression expected", + }, + { + name: "SpaceZero", + SQLs: sqls( + "select space(0)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string("")), + ), + Compare: CompareExactOrdered, + }, + { + name: "SpaceFive", + SQLs: sqls( + "select space(5)", + ), + ExpHdrs: hdrs( + hdr("", fldTypeString), + ), + ExpRows: rows( + row(string(" ")), + ), + Compare: CompareExactOrdered, + }, }, } diff --git a/sql3/test/defs/defs_subquery.go b/sql3/test/defs/defs_subquery.go new file mode 100644 index 000000000..007046b56 --- /dev/null +++ b/sql3/test/defs/defs_subquery.go @@ -0,0 +1,49 @@ +package defs + +var subqueryTests = TableTest{ + name: "subquerytable", + Table: tbl( + "subquerytable", + srcHdrs( + srcHdr("_id", fldTypeID), + srcHdr("a_string", fldTypeString), + ), + srcRows( + srcRow(int64(1), "str1"), + srcRow(int64(2), "str1"), + srcRow(int64(3), "str2"), + srcRow(int64(4), "str2"), + srcRow(int64(5), "str3"), + ), + ), + SQLTests: []SQLTest{ + { + name: "select-count", + SQLs: sqls( + "select sum(mycount) as thecount from (select count(a_string) as mycount, a_string from subquerytable group BY a_string);", + ), + ExpHdrs: hdrs( + hdr("thecount", fldTypeInt), + ), + ExpRows: rows( + row(int64(5)), + ), + Compare: CompareExactUnordered, + SortStringKeys: true, + }, + { + name: "select-count-distinct", + SQLs: sqls( + "select sum(mycount) as thecount from (select count(distinct a_string) as mycount, a_string from subquerytable group BY a_string);", + ), + ExpHdrs: hdrs( + hdr("thecount", fldTypeInt), + ), + ExpRows: rows( + row(int64(3)), + ), + Compare: CompareExactUnordered, + SortStringKeys: true, + }, + }, +} diff --git a/translate.go b/translate.go index 5d00d3eeb..852420abc 100644 --- a/translate.go +++ b/translate.go @@ -4,10 +4,9 @@ package pilosa import ( "context" - "encoding/json" "fmt" "io" - "sort" + "os" "sync" "github.com/featurebasedb/featurebase/v3/disco" @@ -77,9 +76,7 @@ type TranslateStore interface { // TODO: refactor this interface; readonly shoul // Returns a reader from the given ID offset. EntryReader(ctx context.Context, offset uint64) (TranslateEntryReader, error) - // WriteTo ensures that the TranslateStore implements io.WriterTo. - // It should write the contents of the store to the writer. - WriteTo(io.Writer) (int64, error) + Begin(write bool) (TranslatorTx, error) // ReadFrom ensures that the TranslateStore implements io.ReaderFrom. // It should read from the reader and replace the data store with @@ -89,68 +86,14 @@ type TranslateStore interface { // TODO: refactor this interface; readonly shoul Delete(records *roaring.Bitmap) (Commitor, error) } -// TranslatorSummary is returned, for example from the boltdb string key translators, -// by calling ComputeTranslatorSummary(). Non-boltdb mocks, etc no-op that method. -type TranslatorSummary struct { - Index string - - // ParitionID is filled for column keys - PartitionID int - - NodeID string - StorePath string - IsPrimary bool - IsReplica bool - - // PrimaryNodeIndex indexes into the cluster []node array to find the primary - PrimaryNodeIndex int - - // Field is filled for row keys - Field string - - // Checksum has a blake3 crypto hash of all the keys->ID and all the ID->key mappings - Checksum string - - // KeyCount has the number of Key->ID mappings - KeyCount int - - // IDCount has the number of ID->Key mappings - IDCount int - - // false for RowIDs, true for string-Key column IDs. - IsColKey bool -} - -func (s *TranslatorSummary) String() string { - return fmt.Sprintf(` -TranslatorSummary{ - Index : %v - PartitionID: %v - NodeID : %v - StorePath : %v - IsPrimary : %v - IsReplica : %v - PrimaryNodeIndex: %v - Field : %v - Checksum: %v - KeyCount: %v - IDCount : %v - IsColKey: %v -} -`, - s.Index, - s.PartitionID, - s.NodeID, - s.StorePath, - s.IsPrimary, - s.IsReplica, - s.PrimaryNodeIndex, - s.Field, - s.Checksum, - s.KeyCount, - s.IDCount, - s.IsColKey, - ) +// TranslatorTx reproduces a subset of the methods on the BoltDB Tx +// object. Others may be needed in the future and we should just add +// them here. The idea is not to scatter direct references to bolt +// stuff throughout the whole codebase. +type TranslatorTx interface { + WriteTo(io.Writer) (int64, error) + Rollback() error + // e.g. Commit() error } // OpenTranslateStoreFunc represents a function for instantiating and opening a TranslateStore. @@ -329,307 +272,44 @@ func NewIndexTranslateOffsetMap() *IndexTranslateOffsetMap { } } -// Ensure type implements interface. -var _ TranslateStore = &InMemTranslateStore{} - -// InMemTranslateStore is an in-memory storage engine for mapping keys to int values. -type InMemTranslateStore struct { - mu sync.RWMutex - index string - field string - partitionID int - partitionN int - readOnly bool - keysByID map[uint64]string - idsByKey map[string]uint64 - maxID uint64 - - writeNotify chan struct{} -} - -// NewInMemTranslateStore returns a new instance of InMemTranslateStore. -func NewInMemTranslateStore(index, field string, partitionID, partitionN int) *InMemTranslateStore { - return &InMemTranslateStore{ - index: index, - field: field, - partitionID: partitionID, - partitionN: partitionN, - keysByID: make(map[uint64]string), - idsByKey: make(map[string]uint64), - writeNotify: make(chan struct{}), - } -} - var _ OpenTranslateStoreFunc = OpenInMemTranslateStore -// OpenInMemTranslateStore returns a new instance of InMemTranslateStore. -// Implements OpenTranslateStoreFunc. +// OpenInMemTranslateStore returns a new instance of a BoltDB based +// TranslateStore which removes all its files when it's closed, and +// tries to operate off a RAM disk if one is configured and set in the +// environment. Implements OpenTranslateStoreFunc. func OpenInMemTranslateStore(rawurl, index, field string, partitionID, partitionN int, fsyncEnabled bool) (TranslateStore, error) { - return NewInMemTranslateStore(index, field, partitionID, partitionN), nil -} - -func (s *InMemTranslateStore) Close() error { - return nil -} - -// PartitionID returns the partition id the store was initialized with. -func (s *InMemTranslateStore) PartitionID() int { - return s.partitionID -} - -// ReadOnly returns true if the store is in read-only mode. -func (s *InMemTranslateStore) ReadOnly() bool { - s.mu.Lock() - defer s.mu.Unlock() - return s.readOnly -} - -// SetReadOnly toggles the read-only mode of the store. -func (s *InMemTranslateStore) SetReadOnly(v bool) { - s.mu.Lock() - defer s.mu.Unlock() - s.readOnly = v -} -func (s *InMemTranslateStore) Delete(records *roaring.Bitmap) (Commitor, error) { - s.mu.Lock() - defer s.mu.Unlock() - for _, id := range records.Slice() { - key := s.keysByID[id] - delete(s.keysByID, id) - delete(s.idsByKey, key) - } - return &NopCommitor{}, nil -} - -// FindKeys looks up the ID for each key. -// Keys are not created if they do not exist. -// Missing keys are not considered errors, so the length of the result may be less than that of the input. -func (s *InMemTranslateStore) FindKeys(keys ...string) (map[string]uint64, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - result := make(map[string]uint64, len(keys)) - for _, key := range keys { - id, ok := s.idsByKey[key] - if !ok { - // The key does not exist. - continue - } - - result[key] = id - } - - return result, nil -} - -// CreateKeys maps all keys to IDs, creating the IDs if they do not exist. -// If the translator is read-only, this will return an error. -func (s *InMemTranslateStore) CreateKeys(keys ...string) (map[string]uint64, error) { - s.mu.Lock() - defer s.mu.Unlock() - - if s.readOnly { - return nil, ErrTranslateStoreReadOnly - } - - result := make(map[string]uint64, len(keys)) - for _, key := range keys { - id, ok := s.idsByKey[key] - if !ok { - // The key does not exist. - // Generate a new id and update db. - if s.field == "" { - id = GenerateNextPartitionedID(s.index, s.maxID, s.partitionID, s.partitionN) - } else { - id = s.maxID + 1 - } - s.set(id, key) - } - - result[key] = id + bt := NewBoltTranslateStore(index, field, partitionID, partitionN, false) + iname := index + if len(iname) > 10 { + iname = iname[:10] } - - return result, nil -} - -func (s *InMemTranslateStore) Match(filter func([]byte) bool) ([]uint64, error) { - s.mu.RLock() - defer s.mu.RUnlock() - - var matches []uint64 - for key, id := range s.idsByKey { - if filter([]byte(key)) { - matches = append(matches, id) - } + fname := field + if len(fname) > 10 { + fname = fname[:10] } - sort.Slice(matches, func(i, j int) bool { - return matches[i] < matches[j] - }) - return matches, nil -} - -// TranslateID converts an integer ID to a string key. -// Returns a blank string if ID does not exist. -func (s *InMemTranslateStore) TranslateID(id uint64) (string, error) { - s.mu.Lock() - defer s.mu.Unlock() - return s.translateID(id), nil -} - -// TranslateIDs converts a list of integer IDs to a list of string keys. -func (s *InMemTranslateStore) TranslateIDs(ids []uint64) ([]string, error) { - s.mu.Lock() - defer s.mu.Unlock() - - keys := make([]string, len(ids)) - for i := range ids { - keys[i] = s.translateID(ids[i]) - } - return keys, nil -} - -func (s *InMemTranslateStore) translateID(id uint64) string { - return s.keysByID[id] -} - -// ForceSet writes the id/key pair to the db. Used by replication. -func (s *InMemTranslateStore) ForceSet(id uint64, key string) error { - s.mu.Lock() - defer s.mu.Unlock() - s.set(id, key) - return nil -} - -// set assigns the id/key pair to the store. -func (s *InMemTranslateStore) set(id uint64, key string) { - s.keysByID[id] = key - s.idsByKey[key] = id - if id > s.maxID { - s.maxID = id - } - s.notifyWrite() -} - -// WriteNotify returns a channel that is closed when a new entry is written. -func (s *InMemTranslateStore) WriteNotify() <-chan struct{} { - s.mu.RLock() - ch := s.writeNotify - s.mu.RUnlock() - return ch -} - -// notifyWrite sends a write notification under write lock. -func (s *InMemTranslateStore) notifyWrite() { - close(s.writeNotify) - s.writeNotify = make(chan struct{}) -} - -// EntryReader returns an error. Replication is not supported. -func (s *InMemTranslateStore) EntryReader(ctx context.Context, offset uint64) (TranslateEntryReader, error) { - s.mu.Lock() - defer s.mu.Unlock() - return newInMemTranslateEntryReader(ctx, s, offset), nil -} - -// WriteTo implements io.WriterTo. It's not efficient or careful, but we -// don't expect to use InMemTranslateStore much, it's mostly there to -// avoid disk load during testing. -func (s *InMemTranslateStore) WriteTo(w io.Writer) (int64, error) { - bytes, err := json.Marshal(s.keysByID) + tf, err := os.CreateTemp(os.Getenv("RAMDISK"), fmt.Sprintf("bolt-i%s-f%s-%d-%d-", iname, fname, partitionID, partitionN)) if err != nil { - return 0, err + return nil, errors.Wrap(err, "making temp file for boltdb key translation") } - n, err := w.Write(bytes) - return int64(n), err -} - -// ReadFrom implements io.ReaderFrom. It's not efficient or careful, but we -// don't expect to use InMemTranslateStore much, it's mostly there to -// avoid disk load during testing. -func (s *InMemTranslateStore) ReadFrom(r io.Reader) (count int64, err error) { - s.mu.Lock() - defer s.mu.Unlock() - var bytes []byte - bytes, err = io.ReadAll(r) - count = int64(len(bytes)) + bt.Path = tf.Name() + err = bt.Open() if err != nil { - return count, err + return nil, errors.Wrap(err, "opening in mem boltdb") } - var keysByID map[uint64]string - err = json.Unmarshal(bytes, &keysByID) - if err != nil { - return count, err - } - s.maxID = 0 - s.keysByID = keysByID - s.idsByKey = make(map[string]uint64, len(s.keysByID)) - for k, v := range s.keysByID { - s.idsByKey[v] = k - if k > s.maxID { - s.maxID = k - } - } - return count, nil + return &BoltInMemTranslateStore{bt}, err } -// MaxID returns the highest identifier in the store. -func (s *InMemTranslateStore) MaxID() (uint64, error) { - s.mu.RLock() - defer s.mu.RUnlock() - return s.maxID, nil -} - -// inMemEntryReader represents a stream of translation entries for an inmem translation store. -type inMemTranslateEntryReader struct { - ctx context.Context - cancel func() - - store *InMemTranslateStore - offset uint64 +type BoltInMemTranslateStore struct { + *BoltTranslateStore } -func newInMemTranslateEntryReader(ctx context.Context, store *InMemTranslateStore, offset uint64) *inMemTranslateEntryReader { - r := &inMemTranslateEntryReader{ - store: store, - offset: offset, +func (b *BoltInMemTranslateStore) Close() error { + defer os.RemoveAll(b.BoltTranslateStore.Path) + err := b.BoltTranslateStore.Close() + if err != nil { + return errors.Wrap(err, "closing in mem bolt translate store") } - r.ctx, r.cancel = context.WithCancel(ctx) - return r -} - -// Close stops the reader. -func (r *inMemTranslateEntryReader) Close() error { - r.cancel() return nil } - -// ReadEntry reads the next available entry. -func (r *inMemTranslateEntryReader) ReadEntry(entry *TranslateEntry) error { - for { - // Wait until our offset is less than the max id. - notify := r.store.WriteNotify() - if maxID, err := r.store.MaxID(); err != nil { - return err - } else if r.offset > maxID { - select { - case <-r.ctx.Done(): - return io.EOF - case <-notify: - continue // restart loop - } - } - - // Translate key for offset. - key, err := r.store.TranslateID(r.offset) - if err != nil { - return err - } - - // Copy id/key pair to entry argument and increment offset for next read. - entry.Index, entry.Field = r.store.index, r.store.field - entry.ID, entry.Key = r.offset, key - r.offset++ - return nil - } -} diff --git a/boltdb/translate.go b/translate_boltdb.go similarity index 85% rename from boltdb/translate.go rename to translate_boltdb.go index 55f4c43fd..7a3ccfbe7 100644 --- a/boltdb/translate.go +++ b/translate_boltdb.go @@ -1,6 +1,6 @@ // Copyright 2022 Molecula Corp. (DBA FeatureBase). // SPDX-License-Identifier: Apache-2.0 -package boltdb +package pilosa import ( "bytes" @@ -13,7 +13,6 @@ import ( "sync" "time" - pilosa "github.com/featurebasedb/featurebase/v3" "github.com/featurebasedb/featurebase/v3/roaring" "github.com/pkg/errors" bolt "go.etcd.io/bbolt" @@ -26,7 +25,7 @@ var _ = pprof.StartCPUProfile var ( // ErrTranslateStoreClosed is returned when reading from an TranslateEntryReader // and the underlying store is closed. - ErrTranslateStoreClosed = errors.New("boltdb: translate store closing") + ErrBoltTranslateStoreClosed = errors.New("boltdb: translate store closing") // ErrTranslateKeyNotFound is returned when translating key // and the underlying store returns an empty set @@ -46,8 +45,8 @@ const ( ) // OpenTranslateStore opens and initializes a boltdb translation store. -func OpenTranslateStore(path, index, field string, partitionID, partitionN int, fsyncEnabled bool) (pilosa.TranslateStore, error) { - s := NewTranslateStore(index, field, partitionID, partitionN, fsyncEnabled) +func OpenTranslateStore(path, index, field string, partitionID, partitionN int, fsyncEnabled bool) (TranslateStore, error) { + s := NewBoltTranslateStore(index, field, partitionID, partitionN, fsyncEnabled) s.Path = path if err := s.Open(); err != nil { return nil, err @@ -56,9 +55,9 @@ func OpenTranslateStore(path, index, field string, partitionID, partitionN int, } // Ensure type implements interface. -var _ pilosa.TranslateStore = &TranslateStore{} +var _ TranslateStore = &BoltTranslateStore{} -// TranslateStore is an on-disk storage engine for translating string-to-uint64 values. +// BoltTranslateStore is an on-disk storage engine for translating string-to-uint64 values. // An empty string will be converted into the sentinel byte slice: // // var emptyKey = []byte{ @@ -68,7 +67,7 @@ var _ pilosa.TranslateStore = &TranslateStore{} // 0xc2, 0xa0, // NO-BREAK SPACE // 0x00, // } -type TranslateStore struct { +type BoltTranslateStore struct { mu sync.RWMutex db *bolt.DB @@ -88,9 +87,9 @@ type TranslateStore struct { Path string } -// NewTranslateStore returns a new instance of TranslateStore. -func NewTranslateStore(index, field string, partitionID, partitionN int, fsyncEnabled bool) *TranslateStore { - return &TranslateStore{ +// NewBoltTranslateStore returns a new instance of TranslateStore. +func NewBoltTranslateStore(index, field string, partitionID, partitionN int, fsyncEnabled bool) *BoltTranslateStore { + return &BoltTranslateStore{ index: index, field: field, partitionID: partitionID, @@ -102,7 +101,7 @@ func NewTranslateStore(index, field string, partitionID, partitionN int, fsyncEn } // Open opens the translate file. -func (s *TranslateStore) Open() (err error) { +func (s *BoltTranslateStore) Open() (err error) { // add the path to the problem database if we panic handling it. defer func() { @@ -114,7 +113,7 @@ func (s *TranslateStore) Open() (err error) { if err := os.MkdirAll(filepath.Dir(s.Path), 0750); err != nil { return errors.Wrapf(err, "mkdir %s", filepath.Dir(s.Path)) - } else if s.db, err = bolt.Open(s.Path, 0600, &bolt.Options{Timeout: 1 * time.Second, NoSync: !s.fsyncEnabled}); err != nil { + } else if s.db, err = bolt.Open(s.Path, 0600, &bolt.Options{Timeout: 1 * time.Second, NoSync: !s.fsyncEnabled, InitialMmapSize: 0}); err != nil { return errors.Wrapf(err, "open file: %s", err) } @@ -137,7 +136,7 @@ func (s *TranslateStore) Open() (err error) { } // Close closes the underlying database. -func (s *TranslateStore) Close() (err error) { +func (s *BoltTranslateStore) Close() (err error) { s.once.Do(func() { close(s.closing) }) if s.db != nil { @@ -149,26 +148,26 @@ func (s *TranslateStore) Close() (err error) { } // PartitionID returns the partition id the store was initialized with. -func (s *TranslateStore) PartitionID() int { +func (s *BoltTranslateStore) PartitionID() int { return s.partitionID } // ReadOnly returns true if the store is in read-only mode. -func (s *TranslateStore) ReadOnly() bool { +func (s *BoltTranslateStore) ReadOnly() bool { s.mu.RLock() defer s.mu.RUnlock() return s.readOnly } // SetReadOnly toggles whether store is in read-only mode. -func (s *TranslateStore) SetReadOnly(v bool) { +func (s *BoltTranslateStore) SetReadOnly(v bool) { s.mu.Lock() defer s.mu.Unlock() s.readOnly = v } // Size returns the number of bytes in the data file. -func (s *TranslateStore) Size() int64 { +func (s *BoltTranslateStore) Size() int64 { if s.db == nil { return 0 } @@ -183,7 +182,7 @@ func (s *TranslateStore) Size() int64 { // FindKeys looks up the ID for each key. // Keys are not created if they do not exist. // Missing keys are not considered errors, so the length of the result may be less than that of the input. -func (s *TranslateStore) FindKeys(keys ...string) (map[string]uint64, error) { +func (s *BoltTranslateStore) FindKeys(keys ...string) (map[string]uint64, error) { result := make(map[string]uint64, len(keys)) err := s.db.View(func(tx *bolt.Tx) error { bkt := tx.Bucket(bucketKeys) @@ -217,9 +216,9 @@ const translateTransactionSize = 16384 // CreateKeys maps all keys to IDs, creating the IDs if they do not exist. // If the translator is read-only, this will return an error. -func (s *TranslateStore) CreateKeys(keys ...string) (map[string]uint64, error) { +func (s *BoltTranslateStore) CreateKeys(keys ...string) (map[string]uint64, error) { if s.ReadOnly() { - return nil, pilosa.ErrTranslateStoreReadOnly + return nil, ErrTranslateStoreReadOnly } written := false @@ -255,7 +254,7 @@ func (s *TranslateStore) CreateKeys(keys ...string) (map[string]uint64, error) { } // see if we can re-use any IDs first if id = getter.GetFreeID(); id == 0 { - id = pilosa.GenerateNextPartitionedID(s.index, maxID(tx), s.partitionID, s.partitionN) + id = GenerateNextPartitionedID(s.index, maxID(tx), s.partitionID, s.partitionN) } idBytes := idScratch[puts*8 : puts*8+8] binary.BigEndian.PutUint64(idBytes, id) @@ -287,7 +286,7 @@ func (s *TranslateStore) CreateKeys(keys ...string) (map[string]uint64, error) { } // Match finds the IDs of all keys matching a filter. -func (s *TranslateStore) Match(filter func([]byte) bool) ([]uint64, error) { +func (s *BoltTranslateStore) Match(filter func([]byte) bool) ([]uint64, error) { var matches []uint64 err := s.db.View(func(tx *bolt.Tx) error { // This uses the id bucket instead of the key bucket so that matches are produced in sorted order. @@ -317,7 +316,7 @@ func (s *TranslateStore) Match(filter func([]byte) bool) ([]uint64, error) { // TranslateID converts an integer ID to a string key. // Returns a blank string if ID does not exist. -func (s *TranslateStore) TranslateID(id uint64) (string, error) { +func (s *BoltTranslateStore) TranslateID(id uint64) (string, error) { tx, err := s.db.Begin(false) if err != nil { return "", err @@ -327,7 +326,7 @@ func (s *TranslateStore) TranslateID(id uint64) (string, error) { } // TranslateIDs converts a list of integer IDs to a list of string keys. -func (s *TranslateStore) TranslateIDs(ids []uint64) ([]string, error) { +func (s *BoltTranslateStore) TranslateIDs(ids []uint64) ([]string, error) { if len(ids) == 0 { return nil, nil } @@ -348,7 +347,7 @@ func (s *TranslateStore) TranslateIDs(ids []uint64) ([]string, error) { } // ForceSet writes the id/key pair to the store even if read only. Used by replication. -func (s *TranslateStore) ForceSet(id uint64, key string) error { +func (s *BoltTranslateStore) ForceSet(id uint64, key string) error { if err := s.db.Update(func(tx *bolt.Tx) (err error) { if err := tx.Bucket(bucketKeys).Put([]byte(key), u64tob(id)); err != nil { return err @@ -364,13 +363,13 @@ func (s *TranslateStore) ForceSet(id uint64, key string) error { } // EntryReader returns a reader that streams the underlying data file. -func (s *TranslateStore) EntryReader(ctx context.Context, offset uint64) (pilosa.TranslateEntryReader, error) { +func (s *BoltTranslateStore) EntryReader(ctx context.Context, offset uint64) (TranslateEntryReader, error) { ctx, cancel := context.WithCancel(ctx) - return &TranslateEntryReader{ctx: ctx, cancel: cancel, store: s, offset: offset}, nil + return &BoltTranslateEntryReader{ctx: ctx, cancel: cancel, store: s, offset: offset}, nil } // WriteNotify returns a channel that is closed when a new entry is written. -func (s *TranslateStore) WriteNotify() <-chan struct{} { +func (s *BoltTranslateStore) WriteNotify() <-chan struct{} { s.mu.RLock() ch := s.writeNotify s.mu.RUnlock() @@ -378,7 +377,7 @@ func (s *TranslateStore) WriteNotify() <-chan struct{} { } // notifyWrite sends a write notification under write lock. -func (s *TranslateStore) notifyWrite() { +func (s *BoltTranslateStore) notifyWrite() { s.mu.Lock() defer s.mu.Unlock() close(s.writeNotify) @@ -386,7 +385,7 @@ func (s *TranslateStore) notifyWrite() { } // MaxID returns the highest id in the store. -func (s *TranslateStore) MaxID() (max uint64, err error) { +func (s *BoltTranslateStore) MaxID() (max uint64, err error) { if err := s.db.View(func(tx *bolt.Tx) error { max = maxID(tx) return nil @@ -396,18 +395,13 @@ func (s *TranslateStore) MaxID() (max uint64, err error) { return max, nil } -// WriteTo writes the contents of the store to the writer. -func (s *TranslateStore) WriteTo(w io.Writer) (int64, error) { - tx, err := s.db.Begin(false) - if err != nil { - return 0, err - } - defer func() { _ = tx.Rollback() }() - return tx.WriteTo(w) +// Begin starts and returns a transaction on the underlying store. +func (s *BoltTranslateStore) Begin(write bool) (TranslatorTx, error) { + return s.db.Begin(write) } // ReadFrom reads the content and overwrites the existing store. -func (s *TranslateStore) ReadFrom(r io.Reader) (n int64, err error) { +func (s *BoltTranslateStore) ReadFrom(r io.Reader) (n int64, err error) { // Close store. if err := s.Close(); err != nil { return 0, errors.Wrap(err, "closing store") @@ -451,27 +445,27 @@ func maxID(tx *bolt.Tx) uint64 { return 0 } -type TranslateEntryReader struct { +type BoltTranslateEntryReader struct { ctx context.Context - store *TranslateStore + store *BoltTranslateStore offset uint64 cancel func() } // Close closes the reader. -func (r *TranslateEntryReader) Close() error { +func (r *BoltTranslateEntryReader) Close() error { r.cancel() return nil } // ReadEntry reads the next entry from the underlying translate store. -func (r *TranslateEntryReader) ReadEntry(entry *pilosa.TranslateEntry) error { +func (r *BoltTranslateEntryReader) ReadEntry(entry *TranslateEntry) error { // Ensure reader has not been closed before read. select { case <-r.ctx.Done(): return r.ctx.Err() case <-r.store.closing: - return ErrTranslateStoreClosed + return ErrBoltTranslateStoreClosed default: } @@ -511,7 +505,7 @@ func (r *TranslateEntryReader) ReadEntry(entry *pilosa.TranslateEntry) error { case <-r.ctx.Done(): return r.ctx.Err() case <-r.store.closing: - return ErrTranslateStoreClosed + return ErrBoltTranslateStoreClosed case <-writeNotify: } } @@ -534,7 +528,7 @@ func (w *boltWrapper) Rollback() { w.tx.Rollback() } } -func (s *TranslateStore) FreeIDs() (*roaring.Bitmap, error) { +func (s *BoltTranslateStore) FreeIDs() (*roaring.Bitmap, error) { result := roaring.NewBitmap() err := s.db.View(func(tx *bolt.Tx) error { bkt := tx.Bucket(bucketFree) @@ -550,7 +544,7 @@ func (s *TranslateStore) FreeIDs() (*roaring.Bitmap, error) { }) return result, err } -func (s *TranslateStore) MergeFree(tx *bolt.Tx, newIDs *roaring.Bitmap) error { +func (s *BoltTranslateStore) MergeFree(tx *bolt.Tx, newIDs *roaring.Bitmap) error { bkt := tx.Bucket(bucketFree) b := bkt.Get(freeKey) buf := new(bytes.Buffer) @@ -573,7 +567,7 @@ func (s *TranslateStore) MergeFree(tx *bolt.Tx, newIDs *roaring.Bitmap) error { // Delete removes the lookeup pairs in order to make avialble for reuse but doesn't commit the // transaction for that is tied to the associated rbf transaction being successful -func (s *TranslateStore) Delete(records *roaring.Bitmap) (pilosa.Commitor, error) { +func (s *BoltTranslateStore) Delete(records *roaring.Bitmap) (Commitor, error) { tx, err := s.db.Begin(true) if err != nil { return nil, err diff --git a/boltdb/translate_internal_test.go b/translate_boltdb_internal_test.go similarity index 99% rename from boltdb/translate_internal_test.go rename to translate_boltdb_internal_test.go index 9faa3a7d7..c6e4577e6 100644 --- a/boltdb/translate_internal_test.go +++ b/translate_boltdb_internal_test.go @@ -1,4 +1,4 @@ -package boltdb +package pilosa import ( "path/filepath" diff --git a/boltdb/translate_test.go b/translate_boltdb_test.go similarity index 93% rename from boltdb/translate_test.go rename to translate_boltdb_test.go index a31d874cc..4dc92b2e9 100644 --- a/boltdb/translate_test.go +++ b/translate_boltdb_test.go @@ -1,6 +1,6 @@ // Copyright 2022 Molecula Corp. (DBA FeatureBase). // SPDX-License-Identifier: Apache-2.0 -package boltdb_test +package pilosa_test import ( "bytes" @@ -12,10 +12,10 @@ import ( "time" pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/boltdb" "github.com/featurebasedb/featurebase/v3/disco" "github.com/featurebasedb/featurebase/v3/roaring" "github.com/featurebasedb/featurebase/v3/testhook" + "github.com/stretchr/testify/require" ) //var vv = pilosa.VV @@ -204,7 +204,7 @@ func TestTranslateStore_MaxID(t *testing.T) { } } -func TestTranslateStore_EntryReader(t *testing.T) { +func TestBoltTranslateStore_EntryReader(t *testing.T) { t.Run("OK", func(t *testing.T) { s := MustOpenNewTranslateStore(t) defer MustCloseTranslateStore(s) @@ -362,7 +362,7 @@ func TestTranslateStore_EntryReader(t *testing.T) { }() var entry pilosa.TranslateEntry - if err := r.ReadEntry(&entry); err != boltdb.ErrTranslateStoreClosed { + if err := r.ReadEntry(&entry); err != pilosa.ErrBoltTranslateStoreClosed { t.Fatalf("unexpected error: %#v", err) } @@ -375,7 +375,7 @@ func TestTranslateStore_EntryReader(t *testing.T) { } // MustNewTranslateStore returns a new TranslateStore with a temporary path. -func MustNewTranslateStore(tb testing.TB) *boltdb.TranslateStore { +func MustNewTranslateStore(tb testing.TB) *pilosa.BoltTranslateStore { f, err := testhook.TempFile(tb, "translate-store") if err != nil { panic(err) @@ -383,7 +383,7 @@ func MustNewTranslateStore(tb testing.TB) *boltdb.TranslateStore { panic(err) } - s := boltdb.NewTranslateStore("I", "F", 0, disco.DefaultPartitionN, false) + s := pilosa.NewBoltTranslateStore("I", "F", 0, disco.DefaultPartitionN, false) s.Path = f.Name() return s } @@ -458,12 +458,19 @@ func TestTranslateStore_ReadWrite(t *testing.T) { buf := bytes.NewBuffer(nil) expN := s.Size() - // After this, the buffer should contain batch0. - if n, err := s.WriteTo(buf); err != nil { - t.Fatalf("writing to buffer: %s", err) - } else if n != expN { - t.Fatalf("expected buffer size: %d, but got: %d", expN, n) - } + // wrap in a func so we can defer rollback. Need rollback to + // happen before the end of the test. I'm not entirely sure + // why, but it hangs if you don't. + func() { + tx, err := s.Begin(false) + require.NoError(t, err) + defer tx.Rollback() + + // After this, the buffer should contain batch0. + n, err := tx.WriteTo(buf) + require.NoError(t, err) + require.Equal(t, expN, n) + }() // Populate the store with the keys in batch1. batch1IDs, err := s.CreateKeys(batch1...) @@ -503,7 +510,7 @@ func TestTranslateStore_ReadWrite(t *testing.T) { } // MustOpenNewTranslateStore returns a new, opened TranslateStore. -func MustOpenNewTranslateStore(tb testing.TB) *boltdb.TranslateStore { +func MustOpenNewTranslateStore(tb testing.TB) *pilosa.BoltTranslateStore { s := MustNewTranslateStore(tb) if err := s.Open(); err != nil { tb.Fatalf("opening s: %v", err) @@ -512,7 +519,7 @@ func MustOpenNewTranslateStore(tb testing.TB) *boltdb.TranslateStore { } // MustCloseTranslateStore closes s and removes the underlying data file. -func MustCloseTranslateStore(s *boltdb.TranslateStore) { +func MustCloseTranslateStore(s *pilosa.BoltTranslateStore) { if err := s.Close(); err != nil { panic(err) } diff --git a/translator_test.go b/translator_test.go index d3d2fac67..4e2255762 100644 --- a/translator_test.go +++ b/translator_test.go @@ -12,7 +12,6 @@ import ( "time" pilosa "github.com/featurebasedb/featurebase/v3" - "github.com/featurebasedb/featurebase/v3/disco" "github.com/featurebasedb/featurebase/v3/mock" "github.com/featurebasedb/featurebase/v3/test" "github.com/google/go-cmp/cmp" @@ -20,30 +19,6 @@ import ( "golang.org/x/sync/errgroup" ) -func TestInMemTranslateStore_TranslateID(t *testing.T) { - s := pilosa.NewInMemTranslateStore("IDX", "FLD", 0, disco.DefaultPartitionN) - - // Setup initial keys. - if _, err := s.CreateKeys("foo"); err != nil { - t.Fatal(err) - } else if _, err := s.CreateKeys("bar"); err != nil { - t.Fatal(err) - } - - // Ensure IDs can be translated back to keys. - if key, err := s.TranslateID(1); err != nil { - t.Fatal(err) - } else if got, want := key, "foo"; got != want { - t.Fatalf("TranslateID()=%s, want %s", got, want) - } - - if key, err := s.TranslateID(2); err != nil { - t.Fatal(err) - } else if got, want := key, "bar"; got != want { - t.Fatalf("TranslateID()=%s, want %s", got, want) - } -} - func TestMultiTranslateEntryReader(t *testing.T) { t.Run("None", func(t *testing.T) { r := pilosa.NewMultiTranslateEntryReader(context.Background(), nil) @@ -245,97 +220,6 @@ func TestTranslation_KeyNotFound(t *testing.T) { } } -func TestInMemTranslateStore_ReadKey(t *testing.T) { - s := pilosa.NewInMemTranslateStore("IDX", "FLD", 0, disco.DefaultPartitionN) - - ids, err := s.FindKeys("foo") - if err != nil { - t.Fatal(err) - } - if len(ids) != 0 { - t.Errorf("unexpected IDs: %v", ids) - } - - // Ensure next key autoincrements. - ids, err = s.CreateKeys("foo") - if err != nil { - t.Fatal(err) - } - if got, want := ids["foo"], uint64(1); got != want { - t.Fatalf("TranslateKey()=%d, want %d", got, want) - } - - ids, err = s.FindKeys("foo") - if err != nil { - t.Fatal(err) - } - if got, want := ids["foo"], uint64(1); got != want { - t.Fatalf("TranslateKey()=%d, want %d", got, want) - } -} - -// Test key translation with multiple nodes. -func TestTranslation_Primary(t *testing.T) { - // Ensure that field key translations requests sent to - // non-primary nodes are forwarded to the primary. - t.Run("ForwardFieldKey", func(t *testing.T) { - // Start a 2-node cluster. - c := test.MustRunCluster(t, 3) - defer c.Close() - - node0 := c.GetPrimary() - node1 := c.GetNonPrimary() - - ctx := context.Background() - index := c.Idx() - fld := "f" - - // Create an index without keys. - if _, err := node1.API.CreateIndex(ctx, index, - pilosa.IndexOptions{ - Keys: false, - }); err != nil { - t.Fatal(err) - } - - // Create a field with keys. - if _, err := node1.API.CreateField(ctx, index, fld, - pilosa.OptFieldKeys(), - ); err != nil { - t.Fatal(err) - } - - keys := []string{"one", "two", "three"} - for i := range keys { - pql := fmt.Sprintf(`Set(%d, %s="%s")`, i+1, fld, keys[i]) - - // Send a translation request to node1 (non-primary). - _, err := node1.API.Query(ctx, - &pilosa.QueryRequest{Index: index, Query: pql}, - ) - if err != nil { - t.Fatal(err) - } - } - - for i := len(keys) - 1; i >= 0; i-- { - // Read the row and ensure the key was set. - qry := fmt.Sprintf(`Row(%s="%s")`, fld, keys[i]) - resp, err := node0.API.Query(ctx, - &pilosa.QueryRequest{Index: index, Query: qry}, - ) - if err != nil { - t.Fatal(err) - } - row := resp.Results[0].(*pilosa.Row) - val := uint64(i + 1) - if cols := row.Columns(); !reflect.DeepEqual(cols, []uint64{val}) { - t.Fatalf("unexpected columns: %+v", cols) - } - } - }) -} - func TestTranslation_TranslateIDsOnCluster(t *testing.T) { c := test.MustRunCluster(t, 4) defer c.Close() diff --git a/view.go b/view.go index df9b269e2..bba4927c2 100644 --- a/view.go +++ b/view.go @@ -213,8 +213,6 @@ func (v *view) openEmpty() error { return errors.Wrap(err, "creating fragments directory") } - v.holder.Logger.Debugf("open fragments for index/field/view: %s/%s/%s", v.index, v.field, v.name) - return nil }(); err != nil { v.close() @@ -222,7 +220,6 @@ func (v *view) openEmpty() error { } _ = testhook.Opened(v.holder.Auditor, v, nil) - v.holder.Logger.Debugf("successfully opened index/field/view: %s/%s/%s", v.index, v.field, v.name) return nil } @@ -533,7 +530,7 @@ func (v *view) clearBit(qcx *Qcx, rowID, columnID uint64) (changed bool, err err // value uses a column of bits to read a multi-bit value. func (v *view) value(qcx *Qcx, columnID uint64, bitDepth uint64) (value int64, exists bool, err error) { shard := columnID / ShardWidth - tx, finisher, err := qcx.GetTx(Txo{Write: true, Index: v.idx, Shard: shard}) + tx, finisher, err := qcx.GetTx(Txo{Write: false, Index: v.idx, Shard: shard}) defer finisher(&err) frag, err := v.CreateFragmentIfNotExists(shard) if err != nil {