Skip to content
Browse files

added hashtable based grouping back in

  • Loading branch information...
1 parent 721ddae commit da0b1c0ccdb05e4d779e3fcb77020f734a1858b2 @bobbysmith007 bobbysmith007 committed May 19, 2011
Showing with 137 additions and 150 deletions.
  1. +1 −1 README.mediawiki
  2. +86 −96 examples/examples.lisp
  3. +25 −19 group-by.lisp
  4. +25 −34 tests/group-by.lisp
View
2 README.mediawiki
@@ -30,7 +30,7 @@ of grouped-list objects
''tests'': a list of tests to compare the keys with<br />
''grouping-implmentation'': What data structure should be used to perform the grouping<br />
-'':alist, :tree , :hash-table''<br />
+'':list, :hash-table''<br />
The implementation doesnt change the output, but it does change
the performance characteristics of the grouped-object (see:
grouped-list-speed-tester for help deciding which to use)
View
182 examples/examples.lisp
@@ -51,41 +51,41 @@
(defparameter +example-timeclock-objs+
(iter top (for i from 0 to 10)
- (iter (for rec in +example-timeclock-data+)
- (in top (collect (apply #'example-tcr rec))))))
+ (iter (for rec in +example-timeclock-data+)
+ (in top (collect (apply #'example-tcr rec))))))
(defun timeclock-report-rec-print (gl &optional (spaces ""))
(let ((further-groups (child-groupings gl)))
(if further-groups
- (iter
- (with hours = 0)
- (for group in further-groups)
- (format T "~?~A~%" spaces () (key-value group) )
- (incf hours (timeclock-report-rec-print
- group (concatenate 'string spaces "~2,1@T")))
- (finally
- (format T "~?Total: ~D~%" spaces () hours )
- (return hours)))
- (iter (for kid in (items-in-group gl))
- (sum (hours kid) into hours)
- (finally
- (format T "~?Total: ~D~%" spaces () hours )
- (return hours))))))
-
-(defun print-timeclock-report ()
+ (iter
+ (with hours = 0)
+ (for group in further-groups)
+ (format T "~?~A~%" spaces () (key-value group) )
+ (incf hours (timeclock-report-rec-print
+ group (concatenate 'string spaces "~2,1@T")))
+ (finally
+ (format T "~?Total: ~D~%" spaces () hours )
+ (return hours)))
+ (iter (for kid in (items-in-group gl))
+ (sum (hours kid) into hours)
+ (finally
+ (format T "~?Total: ~D~%" spaces () hours )
+ (return hours))))))
+
+(defun print-timeclock-report ()
(let ((by-person-project
- (make-grouped-list
- +example-timeclock-objs+
- :keys (list #'name #'proj)
- :tests (list #'string-equal #'eql)
- :grouping-implementation :hash-table))
- (by-project-person
- (make-grouped-list
- +example-timeclock-objs+
- :keys (list #'proj #'name)
- :tests (list #'eql #'string-equal)
- :grouping-implementation :tree)))
-
+ (make-grouped-list
+ +example-timeclock-objs+
+ :keys (list #'name #'proj)
+ :tests (list #'equalp #'eql)
+ :grouping-implementation :hash-table))
+ (by-project-person
+ (make-grouped-list
+ +example-timeclock-objs+
+ :keys (list #'proj #'name)
+ :tests (list #'eql #'equalp)
+ :grouping-implementation :list)))
+
(format T "Hours BY Project > Person~%-----------~%")
(timeclock-report-rec-print by-project-person)
(format T "~%~%Hours BY Person > Project~%-----------~%")
@@ -137,10 +137,11 @@ bob
Total: 165
|#
+
(defparameter +example-speedtest-timeclock-objs+
(iter top (for i from 0 to 100)
- (iter (for rec in +example-timeclock-data+)
- (in top (collect (apply #'example-tcr rec))))))
+ (iter (for rec in +example-timeclock-data+)
+ (in top (collect (apply #'example-tcr rec))))))
(defun speed-test-example ()
(format *trace-output* "~%build-gl-speed-test~%")
@@ -149,7 +150,7 @@ Total: 165
:list +example-speedtest-timeclock-objs+
:keys (list #'name #'proj)
:tests (list #'string-equal #'eql))
-
+
(format *trace-output* "~%~%build-gl-speed-test with-item-access
This shows how implementations differ based on workload~%")
(grouped-list-speed-tester
@@ -158,41 +159,37 @@ This shows how implementations differ based on workload~%")
:keys (list #'name #'proj)
:tests (list #'string-equal #'eql)
:actions (lambda (gl)
- (iter (for c from 0 to 1000 )
- (iter
- (for i in '("russ" "bob"))
- (items-in-group gl i)
- (iter (for j in `(:proj-a :proj-b :proj-c))
- (items-in-group gl i j)))))))
+ (iter (for c from 0 to 1000 )
+ (iter
+ (for i in '("russ" "bob"))
+ (items-in-group gl i)
+ (iter (for j in `(:proj-a :proj-b :proj-c))
+ (items-in-group gl i j)))))))
#|
build-gl-speed-test
Grouping Implentation Speed Tests
HASH-TABLE Implementation
Evaluation took:
- 0.519 seconds of real time
- 0.490000 seconds of total run time (0.420000 user, 0.070000 system)
- [ Run times consist of 0.270 seconds GC time, and 0.220 seconds non-GC time. ]
- 94.41% CPU
- 1,294,672,493 processor cycles
- 99,023,280 bytes consed
-
-TREE Implementation
-Evaluation took:
- 0.091 seconds of real time
- 0.070000 seconds of total run time (0.070000 user, 0.000000 system)
- 76.92% CPU
- 225,769,988 processor cycles
- 1,540,032 bytes consed
+ 0.181 seconds of real time
+ 0.160000 seconds of total run time (0.120000 user, 0.040000 system)
+ [ Run times consist of 0.080 seconds GC time, and 0.080 seconds non-GC time. ]
+ 88.40% CPU
+ 92 lambdas converted
+ 451,161,165 processor cycles
+ 4,939,600 bytes consed
-ALIST Implementation
+
+
+LIST Implementation
Evaluation took:
- 0.095 seconds of real time
- 0.100000 seconds of total run time (0.090000 user, 0.010000 system)
- 105.26% CPU
- 236,701,268 processor cycles
- 1,507,136 bytes consed
+ 0.107 seconds of real time
+ 0.100000 seconds of total run time (0.080000 user, 0.020000 system)
+ 93.46% CPU
+ 46 lambdas converted
+ 265,815,870 processor cycles
+ 2,749,600 bytes consed
@@ -202,30 +199,23 @@ Grouping Implentation Speed Tests
HASH-TABLE Implementation
Evaluation took:
- 0.668 seconds of real time
- 0.620000 seconds of total run time (0.600000 user, 0.020000 system)
- [ Run times consist of 0.260 seconds GC time, and 0.360 seconds non-GC time. ]
- 92.81% CPU
- 1,664,663,812 processor cycles
- 157,757,008 bytes consed
-
-TREE Implementation
-Evaluation took:
- 1.647 seconds of real time
- 1.590000 seconds of total run time (1.360000 user, 0.230000 system)
- [ Run times consist of 0.720 seconds GC time, and 0.870 seconds non-GC time. ]
- 96.54% CPU
- 4,107,706,057 processor cycles
- 439,151,536 bytes consed
+ 0.873 seconds of real time
+ 0.860000 seconds of total run time (0.760000 user, 0.100000 system)
+ [ Run times consist of 0.330 seconds GC time, and 0.530 seconds non-GC time. ]
+ 98.51% CPU
+ 2,175,013,267 processor cycles
+ 294,658,896 bytes consed
-ALIST Implementation
+
+
+LIST Implementation
Evaluation took:
- 0.548 seconds of real time
- 0.510000 seconds of total run time (0.480000 user, 0.030000 system)
- [ Run times consist of 0.230 seconds GC time, and 0.280 seconds non-GC time. ]
- 93.07% CPU
- 1,364,044,695 processor cycles
- 147,965,152 bytes consed
+ 1.031 seconds of real time
+ 0.990000 seconds of total run time (0.900000 user, 0.090000 system)
+ [ Run times consist of 0.460 seconds GC time, and 0.530 seconds non-GC time. ]
+ 96.02% CPU
+ 2,572,489,710 processor cycles
+ 293,604,720 bytes consed
|#
(defparameter +example-names+ #("russ" "alice" "bob" "charlie"))
@@ -235,18 +225,18 @@ Evaluation took:
"An example of building a grouped list up from individual items
rather than starting with a full list then grouping it"
(iter (for type in `(:hash-table :tree :alist))
- (iter
- (with gl = (make-grouped-list
- nil
- :keys (list #'name #'proj)
- :tests (list #'string-equal #'eql)
- :grouping-implementation type))
- (for i from 0 to 1000)
- (for tcr =
- (example-tcr
- (alexandria:random-elt +example-names+)
- (random 10)
- (alexandria:random-elt +example-projects+)))
- (add-item-to-grouping tcr gl)
- (finally (timeclock-report-rec-print gl)
- (format T "----------------------~%")))))
+ (iter
+ (with gl = (make-grouped-list
+ nil
+ :keys (list #'name #'proj)
+ :tests (list #'string-equal #'eql)
+ :grouping-implementation type))
+ (for i from 0 to 1000)
+ (for tcr =
+ (example-tcr
+ (alexandria:random-elt +example-names+)
+ (random 10)
+ (alexandria:random-elt +example-projects+)))
+ (add-item-to-grouping tcr gl)
+ (finally (timeclock-report-rec-print gl)
+ (format T "----------------------~%")))))
View
44 group-by.lisp
@@ -83,26 +83,30 @@ eg: (group-by '((a 1 2) (a 3 4) (b 5 6)))
(defclass grouped-list ()
((orig-list :accessor orig-list :initarg :orig-list :initform nil)
(grouping-implementation
- :accessor grouping-implementation :initarg :grouping-implementation :initform :alist
+ :accessor grouping-implementation :initarg :grouping-implementation :initform :list
:documentation
- "What data structure should be used to perform the grouping
- :alist, :tree , :hash-table")
+ "What data structure should be used to perform the grouping :list, :hash-table")
(keys :accessor keys :initarg :keys :initform nil
:documentation "A list of key functions we will use to group the list")
(tests :accessor tests :initarg :tests :initform nil
:documentation "A list of test functions we will use to test key equality
tree: defaults to #'equal
hash-table: this be a single hash-equality symbol (defaults to 'equal)")
- (child-groupings :accessor child-groupings :initarg :child-groupings :initform nil)
+ (%child-groupings :accessor %child-groupings :initarg :%child-groupings :initform nil)
(%items :accessor %items :initarg :%items :initform nil)
- (child-map :accessor child-map :initarg :child-map :initform nil)
(parent-grouping :accessor parent-grouping :initarg :parent :initform nil
:documentation "If this is a subgrouping of another grouped-list, what is the parent grouping we are apart of (mostly for testing)")
(key-value :accessor key-value :initarg :key-value :initform nil
:documentation "If this is a subgrouping of another grouped-list, what is the key this grouped-list represents in the parent grouping (mostly for testing)"))
(:documentation "This class represents a list that we have grouped by multiple key values
ala one of the group-by-repeatedly functions "))
+(defmethod child-groupings ((gl grouped-list))
+ (case (grouping-implementation gl)
+ (:hash-table (iter (for (k v) in-hashtable (%child-groupings gl))
+ (collect v)))
+ (T (%child-groupings gl))))
+
(defun make-grouped-list (inp &key tests keys (grouping-implementation :alist))
"Given a list of input, produce a grouped-list CLOS object that contains
the original list, configuration about the groupings and the result tree
@@ -126,13 +130,19 @@ of grouped-list objects
(defmethod initialize-instance :after ((o grouped-list) &key list &allow-other-keys)
(unless (listp (keys o)) (setf (keys o) (list (keys o))))
(unless (listp (tests o)) (setf (tests o) (list (tests o))))
+ (when (eql :hash-table (grouping-implementation o))
+ (setf (%child-groupings o)
+ (make-hash-table :test (or (first (tests o)) 'equal))))
+
(when list ;; only do this if we are not a child-grouped-list
(setf (orig-list o) list)
(iter (for x in list)
- (add-item-to-grouping x o))))
+ (add-item-to-grouping x o))))
(defun find-single-sub-category (gl key-value &key test)
- (find key-value (child-groupings gl) :key #'key-value :test test))
+ (case (grouping-implementation gl)
+ (:hash-table (gethash key-value (%child-groupings gl)))
+ (t (find key-value (%child-groupings gl) :key #'key-value :test test))))
(defmethod categorize-item (item (root grouped-list) &key &allow-other-keys)
(iter
@@ -152,7 +162,7 @@ of grouped-list objects
(defmethod add-item-to-grouping (item (gl grouped-list))
"puts a new item in the grouping of the grouped list (but not in the original list)"
- (categorize-item item gl :keys (keys gl) :tests (tests gl)))
+ (categorize-item item gl))
(defmethod %grouping-items ((gl grouped-list))
"Returns the items in a given group"
@@ -170,7 +180,10 @@ of grouped-list objects
:grouping-implementation (grouping-implementation gl)
:parent-grouping gl
:key-value key-value)))
- (push c (child-groupings gl))
+ (case (grouping-implementation gl)
+ (:hash-table
+ (setf (gethash key-value (%child-groupings gl)) c))
+ (t (push c (%child-groupings gl))))
c))
(defmethod items-in-group ((gl grouped-list) &rest key-values)
@@ -181,7 +194,7 @@ of grouped-list objects
(for key in key-values)
(for test = (or (first tests) #'equal))
(setf tests (rest tests))
- (setf subgroup (find-single-sub-category gl key :test test)))
+ (setf subgroup (find-single-sub-category subgroup key :test test)))
;; Get all the items for that subgrouping (for alists this is a list we just produced)
;; and that list will simply pass through
@@ -202,17 +215,10 @@ of grouped-list objects
:list list :keys keys :tests hash-tests
:grouping-implementation :hash-table)))
(when actions (funcall actions gl)))))
- (format *trace-output* "~%~%TREE Implementation~%" )
- (time
- (iter (for i from 1 to iterations)
- (let ((gl (make-instance 'grouped-list :list list :keys keys :tests tests
- :grouping-implementation :tree)))
- (when actions (funcall actions gl)))
- ))
- (format *trace-output* "~%~%ALIST Implementation~%" )
+ (format *trace-output* "~%~%LIST Implementation~%" )
(time
(iter (for i from 1 to iterations)
(let ((gl (make-instance 'grouped-list :list list :keys keys :tests tests
- :grouping-implementation :alist)))
+ :grouping-implementation :list)))
(when actions (funcall actions gl)))
)))
View
59 tests/group-by.lisp
@@ -52,8 +52,8 @@ the vector ALPHABET.
:tests (iter (for i from 1 to depth)
(collect
(case (mod i 2)
- (0 #'string=)
- (1 #'=))))))
+ (0 #'equalp)
+ (1 #'eql))))))
(defmethod print-object ((o grouped-list) s)
;; This is way slow so dont have this in live code and you might wish to undefine it
@@ -64,10 +64,12 @@ the vector ALPHABET.
(format s "num-data:~a " (length (items-in-group o)))))
(defun make-test-data-instance (test-data &rest other-keywords)
- (let ((args (append (list 'grouped-list)
- test-data
- other-keywords)))
- (apply #'make-instance args)))
+ (let* ((args (append (list 'grouped-list)
+ test-data
+ other-keywords))
+ (o (apply #'make-instance args)))
+ o
+ ))
(defparameter +test-timeclock-data+
`(("Russ" 1 "time on proj A")
@@ -127,23 +129,17 @@ the vector ALPHABET.
(define-test run-accuracy-tests
(let ((num-rows 1000) (depth 5))
- (labels ((assertions (g1 g2 g3)
+ (labels ((assertions (g1 g2)
;; all grouped lists contain the same number of children
(assert-equal
(length (items-in-group g1))
(length (items-in-group g2)))
- (assert-equal
- (length (items-in-group g2))
- (length (items-in-group g3)))
;; all grouped lists contain the same data
(assert-true (null (set-difference
- (set-difference
(items-in-group g1)
(items-in-group g2)
- :test #'equalp )
- (items-in-group g3)
- :test #'equalp)))
+ :test #'equalp )))
;; assert that all children should actually be in this group
(when (parent-grouping g1)
@@ -155,30 +151,26 @@ the vector ALPHABET.
(assert-true (funcall test (funcall key item) pk))))))))
;; A function to run the tests on each sub grouping tree
- (recursert (g1 g2 g3)
+ (recursert (g1 g2)
;;(break "recursert:~%~a~%~a~%~a" g1 g2 g3)
- (assertions g1 g2 g3)
+ (assertions g1 g2)
(let* ((k1 (child-groupings g1))
(k2 (child-groupings g2))
- (k3 (child-groupings g3))
(pred (when k1
(if (numberp (key-value (first k1)))
#'<
#'string<))))
(when k1
(setf k1 (sort k1 pred :key #'key-value))
- (setf k2 (sort k2 pred :key #'key-value))
- (setf k3 (sort k3 pred :key #'key-value)))
+ (setf k2 (sort k2 pred :key #'key-value)))
;(break "~A" (list k1 k2 k3))
(iter (for kg1 in k1)
(for kg2 in k2)
- (for kg3 in k3)
- (recursert kg1 kg2 kg3)))))
+ (recursert kg1 kg2)))))
(let* ((data (test-data num-rows depth))
- (lgl (make-test-data-instance data :grouping-implementation :alist))
- (tgl (make-test-data-instance data :grouping-implementation :hash-table))
- (hgl (make-test-data-instance data :grouping-implementation :tree)))
- (recursert lgl tgl hgl)))))
+ (lgl (make-test-data-instance data :grouping-implementation :list))
+ (hgl (make-test-data-instance data :grouping-implementation :hash-table)))
+ (recursert lgl hgl)))))
(defun %run-creation-speed-tests (&key (num-rows 1000) (depth 5) (times 10))
(macrolet ((time-to-log (&body body)
@@ -188,19 +180,18 @@ the vector ALPHABET.
(let ((test-data (iter (for i from 1 to times)
(collect (test-data num-rows depth)))))
(info "Grouping Implentation Speed Tests" )
- (info "~%~%HASH-TABLE Implementation~%" )
+ (info "~%HASH-TABLE Implementation~%" )
(time-to-log
(iter (for data in test-data)
(make-test-data-instance data :grouping-implementation :hash-table)))
- (info "~%~%TREE Implementation~%" )
- (time-to-log
- (iter (for data in test-data)
- (make-test-data-instance data :grouping-implementation :tree)))
- (info "~%~%ALIST Implementation~%" )
+
+ (info "~%LIST Implementation~%" )
(time-to-log
(iter (for data in test-data)
- (make-test-data-instance data :grouping-implementation :alist))))))
+ (make-test-data-instance data :grouping-implementation :list)))
+
+ )))
-;(define-test run-creation-speed-tests (%run-creation-speed-tests))
+(define-test run-creation-speed-tests (%run-creation-speed-tests))
-(run-tests)
+(run-tests )

0 comments on commit da0b1c0

Please sign in to comment.
Something went wrong with that request. Please try again.