Skip to content

Commit

Permalink
Make DataFrame.from_activerecord faster (#468)
Browse files Browse the repository at this point in the history
* Make Daru::DataFrame.from_activerecord faster

* Avoid DataFrame#add_row in each block

* Add a benchmark of from_activerecord

* Fix rubocop error
  • Loading branch information
nowlinuxing authored and v0dro committed Dec 4, 2018
1 parent be470ab commit 5e962f3
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 17 deletions.
34 changes: 34 additions & 0 deletions benchmarks/db_loading.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
$:.unshift File.expand_path("../../lib", __FILE__)

require 'benchmark'
require 'daru'
require 'sqlite3'
require 'dbi'
require 'active_record'

db_name = 'daru_test.sqlite'
FileUtils.rm(db_name) if File.file?(db_name)

SQLite3::Database.new(db_name).tap do |db|
db.execute "create table accounts(id integer, name varchar, age integer, primary key(id))"

values = 1.upto(100_000).map { |i| %!(#{i},"name_#{i}",#{rand(100)})! }.join(",")
db.execute "insert into accounts values #{values}"
end

ActiveRecord::Base.establish_connection("sqlite3:#{db_name}")
ActiveRecord::Base.connection

class Account < ActiveRecord::Base; end

Benchmark.bm do |x|
x.report("DataFrame.from_sql") do
Daru::DataFrame.from_sql(ActiveRecord::Base.connection, "SELECT * FROM accounts")
end

x.report("DataFrame.from_activerecord") do
Daru::DataFrame.from_activerecord(Account.all)
end
end

FileUtils.rm(db_name)
22 changes: 5 additions & 17 deletions lib/daru/io/io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def try_string_to_number(s)
end
end

module IO # rubocop:disable Metrics/ModuleLength
module IO
class << self
# Functions for loading/writing Excel files.

Expand Down Expand Up @@ -134,23 +134,11 @@ def dataframe_write_sql ds, dbh, table
#
# @return A dataframe containing the data in the given relation
def from_activerecord(relation, *fields)
if fields.empty?
records = relation.map do |record|
record.attributes.symbolize_keys
end
return Daru::DataFrame.new(records)
else
fields = fields.map(&:to_sym)
end
fields = relation.klass.column_names if fields.empty?
fields = fields.map(&:to_sym)

vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h

Daru::DataFrame.new(vectors, order: fields).tap do |df|
relation.pluck(*fields).each do |record|
df.add_row(Array(record))
end
df.update
end
result = relation.pluck(*fields).transpose
Daru::DataFrame.new(result, order: fields).tap(&:update)
end

# Loading data from plain text files
Expand Down

0 comments on commit 5e962f3

Please sign in to comment.