From c898e4d0c0c1ed1b6c886fec97cfe886f4558c94 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Thu, 15 Apr 2010 15:27:29 -0700 Subject: [PATCH] Move Dataset code into appropriate files, add RDoc sections Before, the Sequel::Dataset RDoc page was pretty messy, as just listed the methods in alphabetical order without any sort of grouping. I just recently discovered RDoc sections, which allow you to group related methods into different sections on the RDoc page, which allows the user to easily focus on only the methods they are probably interested in. This commit adds sections to all of the Dataset code, separating the methods the users probably don't care about with the methods they probably do care about. Unfortunately, I haven't yet discovered how to to set the order of the sections, so that the most important sections are first, but hopefully that can be added later. Even just this is a big improvement. To make sure each method was in the correct section, I moved all of the methods out of dataset.rb and dataset/convenience.rb and placed them in the appropriate file. I also added dataset/misc.rb and dataset/mutation.rb to group the related methods together. I discovered some more methods in the wrong section, and moved those to the appropriate section. --- lib/sequel/core.rb | 1 - lib/sequel/dataset.rb | 192 +--------- lib/sequel/dataset/actions.rb | 324 ++++++++++++++++- lib/sequel/dataset/convenience.rb | 326 ----------------- lib/sequel/dataset/features.rb | 9 + lib/sequel/dataset/graph.rb | 7 + lib/sequel/dataset/misc.rb | 119 +++++++ lib/sequel/dataset/mutation.rb | 64 ++++ lib/sequel/dataset/prepared_statements.rb | 6 + lib/sequel/dataset/query.rb | 84 +++++ lib/sequel/dataset/sql.rb | 416 ++++++++++------------ 11 files changed, 799 insertions(+), 749 deletions(-) delete mode 100644 lib/sequel/dataset/convenience.rb create mode 100644 lib/sequel/dataset/misc.rb create mode 100644 lib/sequel/dataset/mutation.rb diff --git a/lib/sequel/core.rb b/lib/sequel/core.rb index e8ed5b0023..b1dd6d1740 100644 --- a/lib/sequel/core.rb +++ b/lib/sequel/core.rb @@ -282,7 +282,6 @@ def self.def_adapter_method(*adapters) # :nodoc: require(%w"metaprogramming sql connection_pool exceptions dataset database timezones version") require(%w"schema_generator schema_methods schema_sql", 'database') - require(%w"actions convenience features graph prepared_statements query sql", 'dataset') require('core_sql') if !defined?(::SEQUEL_NO_CORE_EXTENSIONS) && !ENV.has_key?('SEQUEL_NO_CORE_EXTENSIONS') # Add the database adapter class methods to Sequel via metaprogramming diff --git a/lib/sequel/dataset.rb b/lib/sequel/dataset.rb index f09b118ec1..fe9893f598 100644 --- a/lib/sequel/dataset.rb +++ b/lib/sequel/dataset.rb @@ -24,195 +24,7 @@ class Dataset extend Metaprogramming include Metaprogramming include Enumerable - - # The dataset options that require the removal of cached columns - # if changed. - COLUMN_CHANGE_OPTS = [:select, :sql, :from, :join].freeze - - # All methods that should have a ! method added that modifies - # the receiver. - MUTATION_METHODS = %w'add_graph_aliases and cross_join distinct except exclude - filter for_update from from_self full_join full_outer_join graph - group group_and_count group_by having inner_join intersect invert join join_table left_join - left_outer_join limit lock_style naked natural_full_join natural_join - natural_left_join natural_right_join or order order_by order_more paginate qualify query - reverse reverse_order right_join right_outer_join select select_all select_append select_more server - set_defaults set_graph_aliases set_overrides unfiltered ungraphed ungrouped union - unlimited unordered where with with_recursive with_sql'.collect{|x| x.to_sym} - - # Which options don't affect the SQL generation. Used by simple_select_all? - # to determine if this is a simple SELECT * FROM table. - NON_SQL_OPTIONS = [:server, :defaults, :overrides, :graph, :eager_graph, :graph_aliases] - - NOTIMPL_MSG = "This method must be overridden in Sequel adapters".freeze - WITH_SUPPORTED=:select_with_sql - - # The database that corresponds to this dataset - attr_accessor :db - - # Set the method to call on identifiers going into the database for this dataset - attr_accessor :identifier_input_method - - # Set the method to call on identifiers coming the database for this dataset - attr_accessor :identifier_output_method - - # The hash of options for this dataset, keys are symbols. - attr_accessor :opts - - # Whether to quote identifiers for this dataset - attr_writer :quote_identifiers - - # The row_proc for this database, should be a Proc that takes - # a single hash argument and returns the object you want - # each to return. - attr_accessor :row_proc - - # Constructs a new Dataset instance with an associated database and - # options. Datasets are usually constructed by invoking the Database#[] method: - # - # DB[:posts] - # - # Sequel::Dataset is an abstract class that is not useful by itself. Each - # database adaptor should provide a subclass of Sequel::Dataset, and have - # the Database#dataset method return an instance of that class. - def initialize(db, opts = nil) - @db = db - @quote_identifiers = db.quote_identifiers? if db.respond_to?(:quote_identifiers?) - @identifier_input_method = db.identifier_input_method if db.respond_to?(:identifier_input_method) - @identifier_output_method = db.identifier_output_method if db.respond_to?(:identifier_output_method) - @opts = opts || {} - @row_proc = nil - end - - ### Class Methods ### - - # Setup mutation (e.g. filter!) methods. These operate the same as the - # non-! methods, but replace the options of the current dataset with the - # options of the resulting dataset. - def self.def_mutation_method(*meths) - meths.each do |meth| - class_eval("def #{meth}!(*args, &block); mutation_method(:#{meth}, *args, &block) end", __FILE__, __LINE__) - end - end - - ### Instance Methods ### - - # Return the dataset as an aliased expression with the given alias. You can - # use this as a FROM or JOIN dataset, or as a column if this dataset - # returns a single row and column. - def as(aliaz) - ::Sequel::SQL::AliasedExpression.new(self, aliaz) - end - - # Returns a new clone of the dataset with with the given options merged. - # If the options changed include options in COLUMN_CHANGE_OPTS, the cached - # columns are deleted. - def clone(opts = {}) - c = super() - c.opts = @opts.merge(opts) - c.instance_variable_set(:@columns, nil) if opts.keys.any?{|o| COLUMN_CHANGE_OPTS.include?(o)} - c - end - - # Add a mutation method to this dataset instance. - def def_mutation_method(*meths) - meths.each do |meth| - instance_eval("def #{meth}!(*args, &block); mutation_method(:#{meth}, *args, &block) end", __FILE__, __LINE__) - end - end - - # Yield a dataset for each server in the connection pool that is tied to that server. - # Intended for use in sharded environments where all servers need to be modified - # with the same data: - # - # DB[:configs].where(:key=>'setting').each_server{|ds| ds.update(:value=>'new_value')} - def each_server - db.servers.each{|s| yield server(s)} - end - - # Returns a string representation of the dataset including the class name - # and the corresponding SQL select statement. - def inspect - "#<#{self.class}: #{sql.inspect}>" - end - - # Returns a naked dataset clone - i.e. a dataset that returns records as - # hashes instead of calling the row proc. - def naked - ds = clone - ds.row_proc = nil - ds - end - - # Set the server for this dataset to use. Used to pick a specific database - # shard to run a query against, or to override the default (which is SELECT uses - # :read_only database and all other queries use the :default database). - def server(servr) - clone(:server=>servr) - end - - # Set the default values for insert and update statements. The values hash passed - # to insert or update are merged into this hash. - def set_defaults(hash) - clone(:defaults=>(@opts[:defaults]||{}).merge(hash)) - end - - # Set values that override hash arguments given to insert and update statements. - # This hash is merged into the hash provided to insert or update. - def set_overrides(hash) - clone(:overrides=>hash.merge(@opts[:overrides]||{})) - end - - # Add the mutation methods via metaprogramming - def_mutation_method(*MUTATION_METHODS) - - protected - - # Return true if the dataset has a non-nil value for any key in opts. - def options_overlap(opts) - !(@opts.collect{|k,v| k unless v.nil?}.compact & opts).empty? - end - - # Whether this dataset is a simple SELECT * FROM table. - def simple_select_all? - o = @opts.reject{|k,v| v.nil? || NON_SQL_OPTIONS.include?(k)} - o.length == 1 && (f = o[:from]) && f.length == 1 && f.first.is_a?(Symbol) - end - - private - - # Set the server to use to :default unless it is already set in the passed opts - def default_server_opts(opts) - {:server=>@opts[:server] || :default}.merge(opts) - end - - # Modify the identifier returned from the database based on the - # identifier_output_method. - def input_identifier(v) - (i = identifier_input_method) ? v.to_s.send(i) : v.to_s - end - - # Modify the receiver with the results of sending the meth, args, and block - # to the receiver and merging the options of the resulting dataset into - # the receiver's options. - def mutation_method(meth, *args, &block) - copy = send(meth, *args, &block) - @opts.merge!(copy.opts) - self - end - - # Modify the identifier returned from the database based on the - # identifier_output_method. - def output_identifier(v) - v = 'untitled' if v == '' - (i = identifier_output_method) ? v.to_s.send(i).to_sym : v.to_sym - end - - # This is run inside .all, after all of the records have been loaded - # via .each, but before any block passed to all is called. It is called with - # a single argument, an array of all returned records. Does nothing by - # default, added to make the model eager loading code simpler. - def post_load(all_records) - end end + + require(%w"query actions features graph prepared_statements misc mutation sql", 'dataset') end diff --git a/lib/sequel/dataset/actions.rb b/lib/sequel/dataset/actions.rb index 5f297d3029..225dfab402 100644 --- a/lib/sequel/dataset/actions.rb +++ b/lib/sequel/dataset/actions.rb @@ -1,10 +1,33 @@ module Sequel class Dataset + # --------------------- + # :section: Methods that execute code on the database + # These methods all execute the dataset's SQL on the database. + # They don't return modified datasets, so if used in a method chain + # they should be the last method called. + # --------------------- + # Alias for insert, but not aliased directly so subclasses # don't have to override both methods. def <<(*args) insert(*args) end + + # Returns the first record matching the conditions. Examples: + # + # ds[:id=>1] => {:id=1} + def [](*conditions) + raise(Error, ARRAY_ACCESS_ERROR_MSG) if (conditions.length == 1 and conditions.first.is_a?(Integer)) or conditions.length == 0 + first(*conditions) + end + + # Update all records matching the conditions + # with the values specified. Examples: + # + # ds[:id=>1] = {:id=>2} # SQL: UPDATE ... SET id = 2 WHERE id = 1 + def []=(conditions, values) + filter(conditions).update(values) + end # Returns an array with all records in the dataset. If a block is given, # the array is iterated over after all items have been loaded. @@ -15,6 +38,11 @@ def all(&block) a.each(&block) if block a end + + # Returns the average value for the given column. + def avg(column) + aggregate_dataset.get{avg(column)} + end # Returns the columns in the result set in order. # If the columns are currently cached, returns the cached value. Otherwise, @@ -32,7 +60,7 @@ def columns @columns = ds.instance_variable_get(:@columns) @columns || [] end - + # Remove the cached list of columns and do a SELECT query to find # the columns. def columns! @@ -40,6 +68,11 @@ def columns! columns end + # Returns the number of records in the dataset. + def count + aggregate_dataset.get{COUNT(:*){}.as(count)}.to_i + end + # Deletes the records in the dataset. The returned value is generally the # number of records deleted, but that is adapter dependent. See delete_sql. def delete @@ -62,12 +95,102 @@ def each(&block) end self end + + # Returns true if no records exist in the dataset, false otherwise + def empty? + get(1).nil? + end # Executes a select query and fetches records, passing each record to the # supplied block. The yielded records should be hashes with symbol keys. def fetch_rows(sql, &block) raise NotImplementedError, NOTIMPL_MSG end + + # If a integer argument is + # given, it is interpreted as a limit, and then returns all + # matching records up to that limit. If no argument is passed, + # it returns the first matching record. If any other type of + # argument(s) is passed, it is given to filter and the + # first matching record is returned. If a block is given, it is used + # to filter the dataset before returning anything. Examples: + # + # ds.first => {:id=>7} + # ds.first(2) => [{:id=>6}, {:id=>4}] + # ds.order(:id).first(2) => [{:id=>1}, {:id=>2}] + # ds.first(:id=>2) => {:id=>2} + # ds.first("id = 3") => {:id=>3} + # ds.first("id = ?", 4) => {:id=>4} + # ds.first{|o| o.id > 2} => {:id=>5} + # ds.order(:id).first{|o| o.id > 2} => {:id=>3} + # ds.first{|o| o.id > 2} => {:id=>5} + # ds.first("id > ?", 4){|o| o.id < 6} => {:id=>5} + # ds.order(:id).first(2){|o| o.id < 2} => [{:id=>1}] + def first(*args, &block) + ds = block ? filter(&block) : self + + if args.empty? + ds.single_record + else + args = (args.size == 1) ? args.first : args + if Integer === args + ds.limit(args).all + else + ds.filter(args).single_record + end + end + end + + # Return the column value for the first matching record in the dataset. + # Raises an error if both an argument and block is given. + # + # ds.get(:id) + # ds.get{|o| o.sum(:id)} + def get(column=nil, &block) + if column + raise(Error, ARG_BLOCK_ERROR_MSG) if block + select(column).single_value + else + select(&block).single_value + end + end + + # Inserts multiple records into the associated table. This method can be + # to efficiently insert a large amounts of records into a table. Inserts + # are automatically wrapped in a transaction. + # + # This method is called with a columns array and an array of value arrays: + # + # dataset.import([:x, :y], [[1, 2], [3, 4]]) + # + # This method also accepts a dataset instead of an array of value arrays: + # + # dataset.import([:x, :y], other_dataset.select(:a___x, :b___y)) + # + # The method also accepts a :slice or :commit_every option that specifies + # the number of records to insert per transaction. This is useful especially + # when inserting a large number of records, e.g.: + # + # # this will commit every 50 records + # dataset.import([:x, :y], [[1, 2], [3, 4], ...], :slice => 50) + def import(columns, values, opts={}) + return @db.transaction{insert(columns, values)} if values.is_a?(Dataset) + + return if values.empty? + raise(Error, IMPORT_ERROR_MSG) if columns.empty? + + if slice_size = opts[:commit_every] || opts[:slice] + offset = 0 + loop do + @db.transaction(opts){multi_insert_sql(columns, values[offset, slice_size]).each{|st| execute_dui(st)}} + offset += slice_size + break if offset >= values.length + end + else + statements = multi_insert_sql(columns, values) + @db.transaction{statements.each{|st| execute_dui(st)}} + end + end # Inserts values into the associated table. The returned value is generally # the value of the primary key for the inserted row, but that is adapter dependent. @@ -75,12 +198,168 @@ def fetch_rows(sql, &block) def insert(*values) execute_insert(insert_sql(*values)) end + + # Inserts multiple values. If a block is given it is invoked for each + # item in the given array before inserting it. See #multi_insert as + # a possible faster version that inserts multiple records in one + # SQL statement. + def insert_multiple(array, &block) + if block + array.each {|i| insert(block[i])} + else + array.each {|i| insert(i)} + end + end + + # Returns the interval between minimum and maximum values for the given + # column. + def interval(column) + aggregate_dataset.get{max(column) - min(column)} + end + + # Reverses the order and then runs first. Note that this + # will not necessarily give you the last record in the dataset, + # unless you have an unambiguous order. If there is not + # currently an order for this dataset, raises an Error. + def last(*args, &block) + raise(Error, 'No order specified') unless @opts[:order] + reverse.first(*args, &block) + end + + # Maps column values for each record in the dataset (if a column name is + # given), or performs the stock mapping functionality of Enumerable. + # Raises an error if both an argument and block are given. Examples: + # + # ds.map(:id) => [1, 2, 3, ...] + # ds.map{|r| r[:id] * 2} => [2, 4, 6, ...] + def map(column=nil, &block) + if column + raise(Error, ARG_BLOCK_ERROR_MSG) if block + super(){|r| r[column]} + else + super(&block) + end + end + + # Returns the maximum value for the given column. + def max(column) + aggregate_dataset.get{max(column)} + end + + # Returns the minimum value for the given column. + def min(column) + aggregate_dataset.get{min(column)} + end + + # This is a front end for import that allows you to submit an array of + # hashes instead of arrays of columns and values: + # + # dataset.multi_insert([{:x => 1}, {:x => 2}]) + # + # Be aware that all hashes should have the same keys if you use this calling method, + # otherwise some columns could be missed or set to null instead of to default + # values. + # + # You can also use the :slice or :commit_every option that import accepts. + def multi_insert(hashes, opts={}) + return if hashes.empty? + columns = hashes.first.keys + import(columns, hashes.map{|h| columns.map{|c| h[c]}}, opts) + end + + # Returns a Range object made from the minimum and maximum values for the + # given column. + def range(column) + if r = aggregate_dataset.select{[min(column).as(v1), max(column).as(v2)]}.first + (r[:v1]..r[:v2]) + end + end + + # Returns a hash with key_column values as keys and value_column values as + # values. Similar to to_hash, but only selects the two columns. + def select_hash(key_column, value_column) + select(key_column, value_column).to_hash(hash_key_symbol(key_column), hash_key_symbol(value_column)) + end + + # Selects the column given (either as an argument or as a block), and + # returns an array of all values of that column in the dataset. If you + # give a block argument that returns an array with multiple entries, + # the contents of the resulting array are undefined. + def select_map(column=nil, &block) + ds = naked.ungraphed + ds = if column + raise(Error, ARG_BLOCK_ERROR_MSG) if block + ds.select(column) + else + ds.select(&block) + end + ds.map{|r| r.values.first} + end + + # The same as select_map, but in addition orders the array by the column. + def select_order_map(column=nil, &block) + ds = naked.ungraphed + ds = if column + raise(Error, ARG_BLOCK_ERROR_MSG) if block + ds.select(column).order(unaliased_identifier(column)) + else + ds.select(&block).order(&block) + end + ds.map{|r| r.values.first} + end # Alias for update, but not aliased directly so subclasses # don't have to override both methods. def set(*args) update(*args) end + + # Returns the first record in the dataset. + def single_record + clone(:limit=>1).each{|r| return r} + nil + end + + # Returns the first value of the first record in the dataset. + # Returns nil if dataset is empty. + def single_value + if r = naked.ungraphed.single_record + r.values.first + end + end + + # Returns the sum for the given column. + def sum(column) + aggregate_dataset.get{sum(column)} + end + + # Returns a string in CSV format containing the dataset records. By + # default the CSV representation includes the column titles in the + # first line. You can turn that off by passing false as the + # include_column_titles argument. + # + # This does not use a CSV library or handle quoting of values in + # any way. If any values in any of the rows could include commas or line + # endings, you shouldn't use this. + def to_csv(include_column_titles = true) + n = naked + cols = n.columns + csv = '' + csv << "#{cols.join(COMMA_SEPARATOR)}\r\n" if include_column_titles + n.each{|r| csv << "#{cols.collect{|c| r[c]}.join(COMMA_SEPARATOR)}\r\n"} + csv + end + + # Returns a hash with one column used as key and another used as value. + # If rows have duplicate values for the key column, the latter row(s) + # will overwrite the value of the previous row(s). If the value_column + # is not given or nil, uses the entire hash as the value. + def to_hash(key_column, value_column = nil) + inject({}) do |m, r| + m[r[key_column]] = value_column ? r[value_column] : r + m + end + end # Truncates the dataset. Returns nil. def truncate @@ -94,6 +373,11 @@ def update(values={}) end private + + # Set the server to use to :default unless it is already set in the passed opts + def default_server_opts(opts) + {:server=>@opts[:server] || :default}.merge(opts) + end # Execute the given SQL on the database using execute. def execute(sql, opts={}, &block) @@ -115,5 +399,43 @@ def execute_dui(sql, opts={}, &block) def execute_insert(sql, opts={}, &block) @db.execute_insert(sql, default_server_opts(opts), &block) end + + # Return a plain symbol given a potentially qualified or aliased symbol, + # specifying the symbol that is likely to be used as the hash key + # for the column when records are returned. + def hash_key_symbol(s) + raise(Error, "#{s.inspect} is not a symbol") unless s.is_a?(Symbol) + _, c, a = split_symbol(s) + (a || c).to_sym + end + + # Modify the identifier returned from the database based on the + # identifier_output_method. + def output_identifier(v) + v = 'untitled' if v == '' + (i = identifier_output_method) ? v.to_s.send(i).to_sym : v.to_sym + end + + # This is run inside .all, after all of the records have been loaded + # via .each, but before any block passed to all is called. It is called with + # a single argument, an array of all returned records. Does nothing by + # default, added to make the model eager loading code simpler. + def post_load(all_records) + end + + # Return the unaliased part of the identifier. Handles both + # implicit aliases in symbols, as well as SQL::AliasedExpression + # objects. Other objects are returned as is. + def unaliased_identifier(c) + case c + when Symbol + c_table, column, _ = split_symbol(c) + c_table ? column.to_sym.qualify(c_table) : column.to_sym + when SQL::AliasedExpression + c.expression + else + c + end + end end end diff --git a/lib/sequel/dataset/convenience.rb b/lib/sequel/dataset/convenience.rb deleted file mode 100644 index 3c010db8ae..0000000000 --- a/lib/sequel/dataset/convenience.rb +++ /dev/null @@ -1,326 +0,0 @@ -module Sequel - class Dataset - COMMA_SEPARATOR = ', '.freeze - COUNT_OF_ALL_AS_COUNT = SQL::Function.new(:count, LiteralString.new('*'.freeze)).as(:count) - ARRAY_ACCESS_ERROR_MSG = 'You cannot call Dataset#[] with an integer or with no arguments.'.freeze - ARG_BLOCK_ERROR_MSG = 'Must use either an argument or a block, not both'.freeze - IMPORT_ERROR_MSG = 'Using Sequel::Dataset#import an empty column array is not allowed'.freeze - - # Returns the first record matching the conditions. Examples: - # - # ds[:id=>1] => {:id=1} - def [](*conditions) - raise(Error, ARRAY_ACCESS_ERROR_MSG) if (conditions.length == 1 and conditions.first.is_a?(Integer)) or conditions.length == 0 - first(*conditions) - end - - # Update all records matching the conditions - # with the values specified. Examples: - # - # ds[:id=>1] = {:id=>2} # SQL: UPDATE ... SET id = 2 WHERE id = 1 - def []=(conditions, values) - filter(conditions).update(values) - end - - # Returns the average value for the given column. - def avg(column) - aggregate_dataset.get{avg(column)} - end - - # Returns true if no records exist in the dataset, false otherwise - def empty? - get(1).nil? - end - - # If a integer argument is - # given, it is interpreted as a limit, and then returns all - # matching records up to that limit. If no argument is passed, - # it returns the first matching record. If any other type of - # argument(s) is passed, it is given to filter and the - # first matching record is returned. If a block is given, it is used - # to filter the dataset before returning anything. Examples: - # - # ds.first => {:id=>7} - # ds.first(2) => [{:id=>6}, {:id=>4}] - # ds.order(:id).first(2) => [{:id=>1}, {:id=>2}] - # ds.first(:id=>2) => {:id=>2} - # ds.first("id = 3") => {:id=>3} - # ds.first("id = ?", 4) => {:id=>4} - # ds.first{|o| o.id > 2} => {:id=>5} - # ds.order(:id).first{|o| o.id > 2} => {:id=>3} - # ds.first{|o| o.id > 2} => {:id=>5} - # ds.first("id > ?", 4){|o| o.id < 6} => {:id=>5} - # ds.order(:id).first(2){|o| o.id < 2} => [{:id=>1}] - def first(*args, &block) - ds = block ? filter(&block) : self - - if args.empty? - ds.single_record - else - args = (args.size == 1) ? args.first : args - if Integer === args - ds.limit(args).all - else - ds.filter(args).single_record - end - end - end - - # Return the column value for the first matching record in the dataset. - # Raises an error if both an argument and block is given. - # - # ds.get(:id) - # ds.get{|o| o.sum(:id)} - def get(column=nil, &block) - if column - raise(Error, ARG_BLOCK_ERROR_MSG) if block - select(column).single_value - else - select(&block).single_value - end - end - - # Returns a dataset grouped by the given column with count by group, - # order by the count of records. Column aliases may be supplied, and will - # be included in the select clause. - # - # Examples: - # - # ds.group_and_count(:name).all => [{:name=>'a', :count=>1}, ...] - # ds.group_and_count(:first_name, :last_name).all => [{:first_name=>'a', :last_name=>'b', :count=>1}, ...] - # ds.group_and_count(:first_name___name).all => [{:name=>'a', :count=>1}, ...] - def group_and_count(*columns) - group(*columns.map{|c| unaliased_identifier(c)}).select(*(columns + [COUNT_OF_ALL_AS_COUNT])) - end - - # Inserts multiple records into the associated table. This method can be - # to efficiently insert a large amounts of records into a table. Inserts - # are automatically wrapped in a transaction. - # - # This method is called with a columns array and an array of value arrays: - # - # dataset.import([:x, :y], [[1, 2], [3, 4]]) - # - # This method also accepts a dataset instead of an array of value arrays: - # - # dataset.import([:x, :y], other_dataset.select(:a___x, :b___y)) - # - # The method also accepts a :slice or :commit_every option that specifies - # the number of records to insert per transaction. This is useful especially - # when inserting a large number of records, e.g.: - # - # # this will commit every 50 records - # dataset.import([:x, :y], [[1, 2], [3, 4], ...], :slice => 50) - def import(columns, values, opts={}) - return @db.transaction{insert(columns, values)} if values.is_a?(Dataset) - - return if values.empty? - raise(Error, IMPORT_ERROR_MSG) if columns.empty? - - if slice_size = opts[:commit_every] || opts[:slice] - offset = 0 - loop do - @db.transaction(opts){multi_insert_sql(columns, values[offset, slice_size]).each{|st| execute_dui(st)}} - offset += slice_size - break if offset >= values.length - end - else - statements = multi_insert_sql(columns, values) - @db.transaction{statements.each{|st| execute_dui(st)}} - end - end - - # Returns the interval between minimum and maximum values for the given - # column. - def interval(column) - aggregate_dataset.get{max(column) - min(column)} - end - - # Reverses the order and then runs first. Note that this - # will not necessarily give you the last record in the dataset, - # unless you have an unambiguous order. If there is not - # currently an order for this dataset, raises an Error. - def last(*args, &block) - raise(Error, 'No order specified') unless @opts[:order] - reverse.first(*args, &block) - end - - # Maps column values for each record in the dataset (if a column name is - # given), or performs the stock mapping functionality of Enumerable. - # Raises an error if both an argument and block are given. Examples: - # - # ds.map(:id) => [1, 2, 3, ...] - # ds.map{|r| r[:id] * 2} => [2, 4, 6, ...] - def map(column=nil, &block) - if column - raise(Error, ARG_BLOCK_ERROR_MSG) if block - super(){|r| r[column]} - else - super(&block) - end - end - - # Returns the maximum value for the given column. - def max(column) - aggregate_dataset.get{max(column)} - end - - # Returns the minimum value for the given column. - def min(column) - aggregate_dataset.get{min(column)} - end - - # This is a front end for import that allows you to submit an array of - # hashes instead of arrays of columns and values: - # - # dataset.multi_insert([{:x => 1}, {:x => 2}]) - # - # Be aware that all hashes should have the same keys if you use this calling method, - # otherwise some columns could be missed or set to null instead of to default - # values. - # - # You can also use the :slice or :commit_every option that import accepts. - def multi_insert(hashes, opts={}) - return if hashes.empty? - columns = hashes.first.keys - import(columns, hashes.map{|h| columns.map{|c| h[c]}}, opts) - end - - # Returns a Range object made from the minimum and maximum values for the - # given column. - def range(column) - if r = aggregate_dataset.select{[min(column).as(v1), max(column).as(v2)]}.first - (r[:v1]..r[:v2]) - end - end - - # Returns a hash with key_column values as keys and value_column values as - # values. Similar to to_hash, but only selects the two columns. - def select_hash(key_column, value_column) - select(key_column, value_column).to_hash(hash_key_symbol(key_column), hash_key_symbol(value_column)) - end - - # Selects the column given (either as an argument or as a block), and - # returns an array of all values of that column in the dataset. If you - # give a block argument that returns an array with multiple entries, - # the contents of the resulting array are undefined. - def select_map(column=nil, &block) - ds = naked.ungraphed - ds = if column - raise(Error, ARG_BLOCK_ERROR_MSG) if block - ds.select(column) - else - ds.select(&block) - end - ds.map{|r| r.values.first} - end - - # The same as select_map, but in addition orders the array by the column. - def select_order_map(column=nil, &block) - ds = naked.ungraphed - ds = if column - raise(Error, ARG_BLOCK_ERROR_MSG) if block - ds.select(column).order(unaliased_identifier(column)) - else - ds.select(&block).order(&block) - end - ds.map{|r| r.values.first} - end - - # Returns the first record in the dataset. - def single_record - clone(:limit=>1).each{|r| return r} - nil - end - - # Returns the first value of the first record in the dataset. - # Returns nil if dataset is empty. - def single_value - if r = naked.ungraphed.single_record - r.values.first - end - end - - # Returns the sum for the given column. - def sum(column) - aggregate_dataset.get{sum(column)} - end - - # Returns a string in CSV format containing the dataset records. By - # default the CSV representation includes the column titles in the - # first line. You can turn that off by passing false as the - # include_column_titles argument. - # - # This does not use a CSV library or handle quoting of values in - # any way. If any values in any of the rows could include commas or line - # endings, you shouldn't use this. - def to_csv(include_column_titles = true) - n = naked - cols = n.columns - csv = '' - csv << "#{cols.join(COMMA_SEPARATOR)}\r\n" if include_column_titles - n.each{|r| csv << "#{cols.collect{|c| r[c]}.join(COMMA_SEPARATOR)}\r\n"} - csv - end - - # Returns a hash with one column used as key and another used as value. - # If rows have duplicate values for the key column, the latter row(s) - # will overwrite the value of the previous row(s). If the value_column - # is not given or nil, uses the entire hash as the value. - def to_hash(key_column, value_column = nil) - inject({}) do |m, r| - m[r[key_column]] = value_column ? r[value_column] : r - m - end - end - - # Creates a unique table alias that hasn't already been used in the dataset. - # table_alias can be any type of object accepted by alias_symbol. - # The symbol returned will be the implicit alias in the argument, - # possibly appended with "_N" if the implicit alias has already been - # used, where N is an integer starting at 0 and increasing until an - # unused one is found. - def unused_table_alias(table_alias) - table_alias = alias_symbol(table_alias) - used_aliases = [] - used_aliases += opts[:from].map{|t| alias_symbol(t)} if opts[:from] - used_aliases += opts[:join].map{|j| j.table_alias ? alias_alias_symbol(j.table_alias) : alias_symbol(j.table)} if opts[:join] - if used_aliases.include?(table_alias) - i = 0 - loop do - ta = :"#{table_alias}_#{i}" - return ta unless used_aliases.include?(ta) - i += 1 - end - else - table_alias - end - end - - private - - # Return a plain symbol given a potentially qualified or aliased symbol, - # specifying the symbol that is likely to be used as the hash key - # for the column when records are returned. - def hash_key_symbol(s) - raise(Error, "#{s.inspect} is not a symbol") unless s.is_a?(Symbol) - _, c, a = split_symbol(s) - (a || c).to_sym - end - - # Return the unaliased part of the identifier. Handles both - # implicit aliases in symbols, as well as SQL::AliasedExpression - # objects. Other objects are returned as is. - def unaliased_identifier(c) - case c - when Symbol - c_table, column, _ = split_symbol(c) - c_table ? column.to_sym.qualify(c_table) : column.to_sym - when SQL::AliasedExpression - c.expression - else - c - end - end - end -end diff --git a/lib/sequel/dataset/features.rb b/lib/sequel/dataset/features.rb index 6df0939e54..868c811310 100644 --- a/lib/sequel/dataset/features.rb +++ b/lib/sequel/dataset/features.rb @@ -1,5 +1,14 @@ module Sequel class Dataset + # --------------------- + # :section: Methods that describe what the dataset supports + # These methods all return booleans, with most describing whether or not the + # dataset supports a feature. + # --------------------- + + # Method used to check if WITH is supported + WITH_SUPPORTED=:select_with_sql + # Whether this dataset quotes identifiers. def quote_identifiers? @quote_identifiers diff --git a/lib/sequel/dataset/graph.rb b/lib/sequel/dataset/graph.rb index cca7a16d63..760fb8316a 100644 --- a/lib/sequel/dataset/graph.rb +++ b/lib/sequel/dataset/graph.rb @@ -1,5 +1,12 @@ module Sequel class Dataset + # --------------------- + # :section: Methods related to dataset graphing + # Dataset graphing changes the dataset to yield hashes where keys are table + # name symbols and columns are hashes representing the values related to + # that table. All of these methods return modified copies of the receiver. + # --------------------- + # Adds the given graph aliases to the list of graph aliases to use, # unlike #set_graph_aliases, which replaces the list. See # #set_graph_aliases. diff --git a/lib/sequel/dataset/misc.rb b/lib/sequel/dataset/misc.rb new file mode 100644 index 0000000000..26af126b64 --- /dev/null +++ b/lib/sequel/dataset/misc.rb @@ -0,0 +1,119 @@ +module Sequel + class Dataset + # --------------------- + # :section: Miscellaneous methods + # These methods don't fit cleanly into another section. + # --------------------- + + NOTIMPL_MSG = "This method must be overridden in Sequel adapters".freeze + ARRAY_ACCESS_ERROR_MSG = 'You cannot call Dataset#[] with an integer or with no arguments.'.freeze + ARG_BLOCK_ERROR_MSG = 'Must use either an argument or a block, not both'.freeze + IMPORT_ERROR_MSG = 'Using Sequel::Dataset#import an empty column array is not allowed'.freeze + + # The database that corresponds to this dataset + attr_accessor :db + + # The hash of options for this dataset, keys are symbols. + attr_accessor :opts + + # Constructs a new Dataset instance with an associated database and + # options. Datasets are usually constructed by invoking the Database#[] method: + # + # DB[:posts] + # + # Sequel::Dataset is an abstract class that is not useful by itself. Each + # database adaptor should provide a subclass of Sequel::Dataset, and have + # the Database#dataset method return an instance of that class. + def initialize(db, opts = nil) + @db = db + @quote_identifiers = db.quote_identifiers? if db.respond_to?(:quote_identifiers?) + @identifier_input_method = db.identifier_input_method if db.respond_to?(:identifier_input_method) + @identifier_output_method = db.identifier_output_method if db.respond_to?(:identifier_output_method) + @opts = opts || {} + @row_proc = nil + end + + # Return the dataset as an aliased expression with the given alias. You can + # use this as a FROM or JOIN dataset, or as a column if this dataset + # returns a single row and column. + def as(aliaz) + ::Sequel::SQL::AliasedExpression.new(self, aliaz) + end + + # Yield a dataset for each server in the connection pool that is tied to that server. + # Intended for use in sharded environments where all servers need to be modified + # with the same data: + # + # DB[:configs].where(:key=>'setting').each_server{|ds| ds.update(:value=>'new_value')} + def each_server + db.servers.each{|s| yield server(s)} + end + + # The first source (primary table) for this dataset. If the dataset doesn't + # have a table, raises an error. If the table is aliased, returns the aliased name. + def first_source_alias + source = @opts[:from] + if source.nil? || source.empty? + raise Error, 'No source specified for query' + end + case s = source.first + when SQL::AliasedExpression + s.aliaz + when Symbol + sch, table, aliaz = split_symbol(s) + aliaz ? aliaz.to_sym : s + else + s + end + end + alias first_source first_source_alias + + # The first source (primary table) for this dataset. If the dataset doesn't + # have a table, raises an error. If the table is aliased, returns the original + # table, not the alias + def first_source_table + source = @opts[:from] + if source.nil? || source.empty? + raise Error, 'No source specified for query' + end + case s = source.first + when SQL::AliasedExpression + s.expression + when Symbol + sch, table, aliaz = split_symbol(s) + aliaz ? (sch ? SQL::QualifiedIdentifier.new(sch, table) : table.to_sym) : s + else + s + end + end + + # Returns a string representation of the dataset including the class name + # and the corresponding SQL select statement. + def inspect + "#<#{self.class}: #{sql.inspect}>" + end + + # Creates a unique table alias that hasn't already been used in the dataset. + # table_alias can be any type of object accepted by alias_symbol. + # The symbol returned will be the implicit alias in the argument, + # possibly appended with "_N" if the implicit alias has already been + # used, where N is an integer starting at 0 and increasing until an + # unused one is found. + def unused_table_alias(table_alias) + table_alias = alias_symbol(table_alias) + used_aliases = [] + used_aliases += opts[:from].map{|t| alias_symbol(t)} if opts[:from] + used_aliases += opts[:join].map{|j| j.table_alias ? alias_alias_symbol(j.table_alias) : alias_symbol(j.table)} if opts[:join] + if used_aliases.include?(table_alias) + i = 0 + loop do + ta = :"#{table_alias}_#{i}" + return ta unless used_aliases.include?(ta) + i += 1 + end + else + table_alias + end + end + end +end \ No newline at end of file diff --git a/lib/sequel/dataset/mutation.rb b/lib/sequel/dataset/mutation.rb new file mode 100644 index 0000000000..19d1064121 --- /dev/null +++ b/lib/sequel/dataset/mutation.rb @@ -0,0 +1,64 @@ +module Sequel + class Dataset + # --------------------- + # :section: Mutation methods + # These methods modify the receiving dataset and should be used with care. + # --------------------- + + # All methods that should have a ! method added that modifies + # the receiver. + MUTATION_METHODS = %w'add_graph_aliases and cross_join distinct except exclude + filter for_update from from_self full_join full_outer_join graph + group group_and_count group_by having inner_join intersect invert join join_table left_join + left_outer_join limit lock_style naked natural_full_join natural_join + natural_left_join natural_right_join or order order_by order_more paginate qualify query + reverse reverse_order right_join right_outer_join select select_all select_append select_more server + set_defaults set_graph_aliases set_overrides unfiltered ungraphed ungrouped union + unlimited unordered where with with_recursive with_sql'.collect{|x| x.to_sym} + + # Setup mutation (e.g. filter!) methods. These operate the same as the + # non-! methods, but replace the options of the current dataset with the + # options of the resulting dataset. + def self.def_mutation_method(*meths) + meths.each do |meth| + class_eval("def #{meth}!(*args, &block); mutation_method(:#{meth}, *args, &block) end", __FILE__, __LINE__) + end + end + + # Add the mutation methods via metaprogramming + def_mutation_method(*MUTATION_METHODS) + + + # Set the method to call on identifiers going into the database for this dataset + attr_accessor :identifier_input_method + + # Set the method to call on identifiers coming the database for this dataset + attr_accessor :identifier_output_method + + # Whether to quote identifiers for this dataset + attr_writer :quote_identifiers + + # The row_proc for this database, should be a Proc that takes + # a single hash argument and returns the object you want + # each to return. + attr_accessor :row_proc + + # Add a mutation method to this dataset instance. + def def_mutation_method(*meths) + meths.each do |meth| + instance_eval("def #{meth}!(*args, &block); mutation_method(:#{meth}, *args, &block) end", __FILE__, __LINE__) + end + end + + private + + # Modify the receiver with the results of sending the meth, args, and block + # to the receiver and merging the options of the resulting dataset into + # the receiver's options. + def mutation_method(meth, *args, &block) + copy = send(meth, *args, &block) + @opts.merge!(copy.opts) + self + end + end +end \ No newline at end of file diff --git a/lib/sequel/dataset/prepared_statements.rb b/lib/sequel/dataset/prepared_statements.rb index e30cc6dbef..95c69c28fa 100644 --- a/lib/sequel/dataset/prepared_statements.rb +++ b/lib/sequel/dataset/prepared_statements.rb @@ -1,5 +1,11 @@ module Sequel class Dataset + # --------------------- + # :section: Methods related to prepared statements or bound variables + # On some adapters, these use native prepared statements and bound variables, on others + # support is emulated. + # --------------------- + PREPARED_ARG_PLACEHOLDER = LiteralString.new('?').freeze # Default implementation of the argument mapper to allow diff --git a/lib/sequel/dataset/query.rb b/lib/sequel/dataset/query.rb index 1e9bf5d48b..3de56e4159 100644 --- a/lib/sequel/dataset/query.rb +++ b/lib/sequel/dataset/query.rb @@ -1,5 +1,17 @@ module Sequel class Dataset + # --------------------- + # :section: Methods that return modified datasets + # These methods all return modified copies of the receiver. + # --------------------- + # The dataset options that require the removal of cached columns + # if changed. + COLUMN_CHANGE_OPTS = [:select, :sql, :from, :join].freeze + + # Which options don't affect the SQL generation. Used by simple_select_all? + # to determine if this is a simple SELECT * FROM table. + NON_SQL_OPTIONS = [:server, :defaults, :overrides, :graph, :eager_graph, :graph_aliases] + # These symbols have _join methods created (e.g. inner_join) that # call join_table with the symbol, passing along the arguments and # block from the method call. @@ -20,6 +32,16 @@ def and(*cond, &block) raise(InvalidOperation, "No existing filter found.") unless @opts[:having] || @opts[:where] filter(*cond, &block) end + + # Returns a new clone of the dataset with with the given options merged. + # If the options changed include options in COLUMN_CHANGE_OPTS, the cached + # columns are deleted. + def clone(opts = {}) + c = super() + c.opts = @opts.merge(opts) + c.instance_variable_set(:@columns, nil) if opts.keys.any?{|o| COLUMN_CHANGE_OPTS.include?(o)} + c + end # Returns a copy of the dataset with the SQL DISTINCT clause. # The DISTINCT clause is used to remove duplicate rows from the @@ -186,6 +208,19 @@ def group(*columns) clone(:group => (columns.compact.empty? ? nil : columns)) end alias group_by group + + # Returns a dataset grouped by the given column with count by group, + # order by the count of records. Column aliases may be supplied, and will + # be included in the select clause. + # + # Examples: + # + # ds.group_and_count(:name).all => [{:name=>'a', :count=>1}, ...] + # ds.group_and_count(:first_name, :last_name).all => [{:first_name=>'a', :last_name=>'b', :count=>1}, ...] + # ds.group_and_count(:first_name___name).all => [{:name=>'a', :count=>1}, ...] + def group_and_count(*columns) + group(*columns.map{|c| unaliased_identifier(c)}).select(*(columns + [COUNT_OF_ALL_AS_COUNT])) + end # Returns a copy of the dataset with the HAVING conditions changed. See #filter for argument types. # @@ -354,6 +389,14 @@ def lock_style(style) clone(:lock => style) end + # Returns a naked dataset clone - i.e. a dataset that returns records as + # hashes instead of calling the row proc. + def naked + ds = clone + ds.row_proc = nil + ds + end + # Adds an alternate filter to an existing filter using OR. If no filter # exists an error is raised. # @@ -478,6 +521,25 @@ def select_more(*columns, &block) select(*columns, &block) end + # Set the server for this dataset to use. Used to pick a specific database + # shard to run a query against, or to override the default (which is SELECT uses + # :read_only database and all other queries use the :default database). + def server(servr) + clone(:server=>servr) + end + + # Set the default values for insert and update statements. The values hash passed + # to insert or update are merged into this hash. + def set_defaults(hash) + clone(:defaults=>(@opts[:defaults]||{}).merge(hash)) + end + + # Set values that override hash arguments given to insert and update statements. + # This hash is merged into the hash provided to insert or update. + def set_overrides(hash) + clone(:overrides=>hash.merge(@opts[:overrides]||{})) + end + # Returns a copy of the dataset with no filters (HAVING or WHERE clause) applied. # # dataset.group(:a).having(:a=>1).where(:b).unfiltered # SELECT * FROM items GROUP BY a @@ -547,6 +609,28 @@ def with_recursive(name, nonrecursive, recursive, opts={}) raise(Error, 'This datatset does not support common table expressions') unless supports_cte? clone(:with=>(@opts[:with]||[]) + [opts.merge(:recursive=>true, :name=>name, :dataset=>nonrecursive.union(recursive, {:all=>opts[:union_all] != false, :from_self=>false}))]) end + + # Returns a copy of the dataset with the static SQL used. This is useful if you want + # to keep the same row_proc/graph, but change the SQL used to custom SQL. + # + # dataset.with_sql('SELECT * FROM foo') # SELECT * FROM foo + def with_sql(sql, *args) + sql = SQL::PlaceholderLiteralString.new(sql, args) unless args.empty? + clone(:sql=>sql) + end + + protected + + # Return true if the dataset has a non-nil value for any key in opts. + def options_overlap(opts) + !(@opts.collect{|k,v| k unless v.nil?}.compact & opts).empty? + end + + # Whether this dataset is a simple SELECT * FROM table. + def simple_select_all? + o = @opts.reject{|k,v| v.nil? || NON_SQL_OPTIONS.include?(k)} + o.length == 1 && (f = o[:from]) && f.length == 1 && f.first.is_a?(Symbol) + end private diff --git a/lib/sequel/dataset/sql.rb b/lib/sequel/dataset/sql.rb index 66c4dcc80b..e450a27c86 100644 --- a/lib/sequel/dataset/sql.rb +++ b/lib/sequel/dataset/sql.rb @@ -1,5 +1,182 @@ module Sequel class Dataset + # --------------------- + # :section: User Methods relating to SQL Creation + # These are methods you can call to see what SQL will be generated by the dataset. + # --------------------- + + # Formats a DELETE statement using the given options and dataset options. + # + # dataset.filter{|o| o.price >= 100}.delete_sql #=> + # "DELETE FROM items WHERE (price >= 100)" + def delete_sql + return static_sql(opts[:sql]) if opts[:sql] + check_modification_allowed! + clause_sql(:delete) + end + + # Returns an EXISTS clause for the dataset as a LiteralString. + # + # DB.select(1).where(DB[:items].exists).sql + # #=> "SELECT 1 WHERE (EXISTS (SELECT * FROM items))" + def exists + LiteralString.new("EXISTS (#{select_sql})") + end + + # Formats an INSERT statement using the given values. The API is a little + # complex, and best explained by example: + # + # # Default values + # DB[:items].insert_sql #=> 'INSERT INTO items DEFAULT VALUES' + # DB[:items].insert_sql({}) #=> 'INSERT INTO items DEFAULT VALUES' + # # Values without columns + # DB[:items].insert_sql(1,2,3) #=> 'INSERT INTO items VALUES (1, 2, 3)' + # DB[:items].insert_sql([1,2,3]) #=> 'INSERT INTO items VALUES (1, 2, 3)' + # # Values with columns + # DB[:items].insert_sql([:a, :b], [1,2]) #=> 'INSERT INTO items (a, b) VALUES (1, 2)' + # DB[:items].insert_sql(:a => 1, :b => 2) #=> 'INSERT INTO items (a, b) VALUES (1, 2)' + # # Using a subselect + # DB[:items].insert_sql(DB[:old_items]) #=> 'INSERT INTO items SELECT * FROM old_items + # # Using a subselect with columns + # DB[:items].insert_sql([:a, :b], DB[:old_items]) #=> 'INSERT INTO items (a, b) SELECT * FROM old_items + def insert_sql(*values) + return static_sql(@opts[:sql]) if @opts[:sql] + + check_modification_allowed! + + columns = [] + + case values.size + when 0 + return insert_sql({}) + when 1 + case vals = values.at(0) + when Hash + vals = @opts[:defaults].merge(vals) if @opts[:defaults] + vals = vals.merge(@opts[:overrides]) if @opts[:overrides] + values = [] + vals.each do |k,v| + columns << k + values << v + end + when Dataset, Array, LiteralString + values = vals + else + if vals.respond_to?(:values) && (v = vals.values).is_a?(Hash) + return insert_sql(v) + end + end + when 2 + if (v0 = values.at(0)).is_a?(Array) && ((v1 = values.at(1)).is_a?(Array) || v1.is_a?(Dataset) || v1.is_a?(LiteralString)) + columns, values = v0, v1 + raise(Error, "Different number of values and columns given to insert_sql") if values.is_a?(Array) and columns.length != values.length + end + end + + columns = columns.map{|k| literal(String === k ? k.to_sym : k)} + clone(:columns=>columns, :values=>values)._insert_sql + end + + # Returns a literal representation of a value to be used as part + # of an SQL expression. + # + # dataset.literal("abc'def\\") #=> "'abc''def\\\\'" + # dataset.literal(:items__id) #=> "items.id" + # dataset.literal([1, 2, 3]) => "(1, 2, 3)" + # dataset.literal(DB[:items]) => "(SELECT * FROM items)" + # dataset.literal(:x + 1 > :y) => "((x + 1) > y)" + # + # If an unsupported object is given, an exception is raised. + def literal(v) + case v + when String + return v if v.is_a?(LiteralString) + v.is_a?(SQL::Blob) ? literal_blob(v) : literal_string(v) + when Symbol + literal_symbol(v) + when Integer + literal_integer(v) + when Hash + literal_hash(v) + when SQL::Expression + literal_expression(v) + when Float + literal_float(v) + when BigDecimal + literal_big_decimal(v) + when NilClass + literal_nil + when TrueClass + literal_true + when FalseClass + literal_false + when Array + literal_array(v) + when Time + literal_time(v) + when DateTime + literal_datetime(v) + when Date + literal_date(v) + when Dataset + literal_dataset(v) + else + literal_other(v) + end + end + + # Returns an array of insert statements for inserting multiple records. + # This method is used by #multi_insert to format insert statements and + # expects a keys array and and an array of value arrays. + # + # This method should be overridden by descendants if the support + # inserting multiple records in a single SQL statement. + def multi_insert_sql(columns, values) + values.map{|r| insert_sql(columns, r)} + end + + # Formats a SELECT statement + # + # dataset.select_sql # => "SELECT * FROM items" + def select_sql + return static_sql(@opts[:sql]) if @opts[:sql] + clause_sql(:select) + end + + # Same as select_sql, not aliased directly to make subclassing simpler. + def sql + select_sql + end + + # SQL query to truncate the table + def truncate_sql + if opts[:sql] + static_sql(opts[:sql]) + else + check_modification_allowed! + raise(InvalidOperation, "Can't truncate filtered datasets") if opts[:where] + _truncate_sql(source_list(opts[:from])) + end + end + + # Formats an UPDATE statement using the given values. + # + # dataset.update_sql(:price => 100, :category => 'software') #=> + # "UPDATE items SET price = 100, category = 'software'" + # + # Raises an error if the dataset is grouped or includes more + # than one table. + def update_sql(values = {}) + return static_sql(opts[:sql]) if opts[:sql] + check_modification_allowed! + clone(:values=>values)._update_sql + end + + # --------------------- + # :section: Internal Methods relating to SQL Creation + # These methods, while public, are not designed to be used directly by the end user. + # --------------------- + # Given a type (e.g. select) and an array of clauses, # return an array of methods to call to build the SQL string. def self.clause_methods(type, clauses) @@ -9,10 +186,12 @@ def self.clause_methods(type, clauses) AND_SEPARATOR = " AND ".freeze BOOL_FALSE = "'f'".freeze BOOL_TRUE = "'t'".freeze + COMMA_SEPARATOR = ', '.freeze COLUMN_REF_RE1 = /\A([\w ]+)__([\w ]+)___([\w ]+)\z/.freeze COLUMN_REF_RE2 = /\A([\w ]+)___([\w ]+)\z/.freeze COLUMN_REF_RE3 = /\A([\w ]+)__([\w ]+)\z/.freeze COUNT_FROM_SELF_OPTS = [:distinct, :group, :sql, :limit, :compounds] + COUNT_OF_ALL_AS_COUNT = SQL::Function.new(:count, LiteralString.new('*'.freeze)).as(:count) DATASET_ALIAS_BASE_NAME = 't'.freeze FOR_UPDATE = ' FOR UPDATE'.freeze IS_LITERALS = {nil=>'NULL'.freeze, true=>'TRUE'.freeze, false=>'FALSE'.freeze}.freeze @@ -138,139 +317,12 @@ def constant_sql(constant) constant.to_s end - # Returns the number of records in the dataset. - def count - aggregate_dataset.get{COUNT(:*){}.as(count)}.to_i - end - - # Formats a DELETE statement using the given options and dataset options. - # - # dataset.filter{|o| o.price >= 100}.delete_sql #=> - # "DELETE FROM items WHERE (price >= 100)" - def delete_sql - return static_sql(opts[:sql]) if opts[:sql] - check_modification_allowed! - clause_sql(:delete) - end - - # Returns an EXISTS clause for the dataset as a LiteralString. - # - # DB.select(1).where(DB[:items].exists).sql - # #=> "SELECT 1 WHERE (EXISTS (SELECT * FROM items))" - def exists - LiteralString.new("EXISTS (#{select_sql})") - end - - # The first source (primary table) for this dataset. If the dataset doesn't - # have a table, raises an error. If the table is aliased, returns the aliased name. - def first_source_alias - source = @opts[:from] - if source.nil? || source.empty? - raise Error, 'No source specified for query' - end - case s = source.first - when SQL::AliasedExpression - s.aliaz - when Symbol - sch, table, aliaz = split_symbol(s) - aliaz ? aliaz.to_sym : s - else - s - end - end - alias first_source first_source_alias - - # The first source (primary table) for this dataset. If the dataset doesn't - # have a table, raises an error. If the table is aliased, returns the original - # table, not the alias - def first_source_table - source = @opts[:from] - if source.nil? || source.empty? - raise Error, 'No source specified for query' - end - case s = source.first - when SQL::AliasedExpression - s.expression - when Symbol - sch, table, aliaz = split_symbol(s) - aliaz ? (sch ? SQL::QualifiedIdentifier.new(sch, table) : table.to_sym) : s - else - s - end - end - # SQL fragment specifying an SQL function call def function_sql(f) args = f.args "#{f.f}#{args.empty? ? '()' : literal(args)}" end - # Inserts multiple values. If a block is given it is invoked for each - # item in the given array before inserting it. See #multi_insert as - # a possible faster version that inserts multiple records in one - # SQL statement. - def insert_multiple(array, &block) - if block - array.each {|i| insert(block[i])} - else - array.each {|i| insert(i)} - end - end - - # Formats an INSERT statement using the given values. The API is a little - # complex, and best explained by example: - # - # # Default values - # DB[:items].insert_sql #=> 'INSERT INTO items DEFAULT VALUES' - # DB[:items].insert_sql({}) #=> 'INSERT INTO items DEFAULT VALUES' - # # Values without columns - # DB[:items].insert_sql(1,2,3) #=> 'INSERT INTO items VALUES (1, 2, 3)' - # DB[:items].insert_sql([1,2,3]) #=> 'INSERT INTO items VALUES (1, 2, 3)' - # # Values with columns - # DB[:items].insert_sql([:a, :b], [1,2]) #=> 'INSERT INTO items (a, b) VALUES (1, 2)' - # DB[:items].insert_sql(:a => 1, :b => 2) #=> 'INSERT INTO items (a, b) VALUES (1, 2)' - # # Using a subselect - # DB[:items].insert_sql(DB[:old_items]) #=> 'INSERT INTO items SELECT * FROM old_items - # # Using a subselect with columns - # DB[:items].insert_sql([:a, :b], DB[:old_items]) #=> 'INSERT INTO items (a, b) SELECT * FROM old_items - def insert_sql(*values) - return static_sql(@opts[:sql]) if @opts[:sql] - - check_modification_allowed! - - columns = [] - - case values.size - when 0 - return insert_sql({}) - when 1 - case vals = values.at(0) - when Hash - vals = @opts[:defaults].merge(vals) if @opts[:defaults] - vals = vals.merge(@opts[:overrides]) if @opts[:overrides] - values = [] - vals.each do |k,v| - columns << k - values << v - end - when Dataset, Array, LiteralString - values = vals - else - if vals.respond_to?(:values) && (v = vals.values).is_a?(Hash) - return insert_sql(v) - end - end - when 2 - if (v0 = values.at(0)).is_a?(Array) && ((v1 = values.at(1)).is_a?(Array) || v1.is_a?(Dataset) || v1.is_a?(LiteralString)) - columns, values = v0, v1 - raise(Error, "Different number of values and columns given to insert_sql") if values.is_a?(Array) and columns.length != values.length - end - end - - columns = columns.map{|k| literal(String === k ? k.to_sym : k)} - clone(:columns=>columns, :values=>values)._insert_sql - end - # SQL fragment specifying a JOIN clause without ON or USING. def join_clause_sql(jc) table = jc.table @@ -289,64 +341,6 @@ def join_on_clause_sql(jc) def join_using_clause_sql(jc) "#{join_clause_sql(jc)} USING (#{column_list(jc.using)})" end - - # Returns a literal representation of a value to be used as part - # of an SQL expression. - # - # dataset.literal("abc'def\\") #=> "'abc''def\\\\'" - # dataset.literal(:items__id) #=> "items.id" - # dataset.literal([1, 2, 3]) => "(1, 2, 3)" - # dataset.literal(DB[:items]) => "(SELECT * FROM items)" - # dataset.literal(:x + 1 > :y) => "((x + 1) > y)" - # - # If an unsupported object is given, an exception is raised. - def literal(v) - case v - when String - return v if v.is_a?(LiteralString) - v.is_a?(SQL::Blob) ? literal_blob(v) : literal_string(v) - when Symbol - literal_symbol(v) - when Integer - literal_integer(v) - when Hash - literal_hash(v) - when SQL::Expression - literal_expression(v) - when Float - literal_float(v) - when BigDecimal - literal_big_decimal(v) - when NilClass - literal_nil - when TrueClass - literal_true - when FalseClass - literal_false - when Array - literal_array(v) - when Time - literal_time(v) - when DateTime - literal_datetime(v) - when Date - literal_date(v) - when Dataset - literal_dataset(v) - else - literal_other(v) - end - end - - # Returns an array of insert statements for inserting multiple records. - # This method is used by #multi_insert to format insert statements and - # expects a keys array and and an array of value arrays. - # - # This method should be overridden by descendants if the support - # inserting multiple records in a single SQL statement. - def multi_insert_sql(columns, values) - values.map{|r| insert_sql(columns, r)} - end # SQL fragment for NegativeBooleanConstants def negative_boolean_constant_sql(constant) @@ -422,47 +416,10 @@ def schema_and_table(table_name) end end - # Formats a SELECT statement - # - # dataset.select_sql # => "SELECT * FROM items" - def select_sql - return static_sql(@opts[:sql]) if @opts[:sql] - clause_sql(:select) - end - - # Same as select_sql, not aliased directly to make subclassing simpler. - def sql - select_sql - end - # SQL fragment for specifying subscripts (SQL arrays) def subscript_sql(s) "#{literal(s.f)}[#{expression_list(s.sub)}]" end - - # SQL query to truncate the table - def truncate_sql - if opts[:sql] - static_sql(opts[:sql]) - else - check_modification_allowed! - raise(InvalidOperation, "Can't truncate filtered datasets") if opts[:where] - _truncate_sql(source_list(opts[:from])) - end - end - - # Formats an UPDATE statement using the given values. - # - # dataset.update_sql(:price => 100, :category => 'software') #=> - # "UPDATE items SET price = 100, category = 'software'" - # - # Raises an error if the dataset is grouped or includes more - # than one table. - def update_sql(values = {}) - return static_sql(opts[:sql]) if opts[:sql] - check_modification_allowed! - clone(:values=>values)._update_sql - end # The SQL fragment for the given window's options. def window_sql(opts) @@ -488,15 +445,6 @@ def window_function_sql(function, window) "#{literal(function)} OVER #{literal(window)}" end - # Returns a copy of the dataset with the static SQL used. This is useful if you want - # to keep the same row_proc/graph, but change the SQL used to custom SQL. - # - # dataset.with_sql('SELECT * FROM foo') # SELECT * FROM foo - def with_sql(sql, *args) - sql = SQL::PlaceholderLiteralString.new(sql, args) unless args.empty? - clone(:sql=>sql) - end - protected # Formats in INSERT statement using the stored columns and values. @@ -654,6 +602,12 @@ def format_timestamp_usec(usec) def identifier_list(columns) columns.map{|i| quote_identifier(i)}.join(COMMA_SEPARATOR) end + + # Modify the identifier returned from the database based on the + # identifier_output_method. + def input_identifier(v) + (i = identifier_input_method) ? v.to_s.send(i) : v.to_s + end # SQL fragment specifying the table to insert INTO def insert_into_sql(sql)