From da0b1c0ccdb05e4d779e3fcb77020f734a1858b2 Mon Sep 17 00:00:00 2001 From: Russ Tyndall Date: Thu, 19 May 2011 17:32:19 -0400 Subject: [PATCH] added hashtable based grouping back in --- README.mediawiki | 2 +- examples/examples.lisp | 182 +++++++++++++++++++---------------------- group-by.lisp | 44 +++++----- tests/group-by.lisp | 59 ++++++------- 4 files changed, 137 insertions(+), 150 deletions(-) diff --git a/README.mediawiki b/README.mediawiki index fd3538c..d07f94f 100644 --- a/README.mediawiki +++ b/README.mediawiki @@ -30,7 +30,7 @@ of grouped-list objects ''tests'': a list of tests to compare the keys with
''grouping-implmentation'': What data structure should be used to perform the grouping
-'':alist, :tree , :hash-table''
+'':list, :hash-table''
The implementation doesnt change the output, but it does change the performance characteristics of the grouped-object (see: grouped-list-speed-tester for help deciding which to use) diff --git a/examples/examples.lisp b/examples/examples.lisp index 2a21fb9..0b93343 100644 --- a/examples/examples.lisp +++ b/examples/examples.lisp @@ -51,41 +51,41 @@ (defparameter +example-timeclock-objs+ (iter top (for i from 0 to 10) - (iter (for rec in +example-timeclock-data+) - (in top (collect (apply #'example-tcr rec)))))) + (iter (for rec in +example-timeclock-data+) + (in top (collect (apply #'example-tcr rec)))))) (defun timeclock-report-rec-print (gl &optional (spaces "")) (let ((further-groups (child-groupings gl))) (if further-groups - (iter - (with hours = 0) - (for group in further-groups) - (format T "~?~A~%" spaces () (key-value group) ) - (incf hours (timeclock-report-rec-print - group (concatenate 'string spaces "~2,1@T"))) - (finally - (format T "~?Total: ~D~%" spaces () hours ) - (return hours))) - (iter (for kid in (items-in-group gl)) - (sum (hours kid) into hours) - (finally - (format T "~?Total: ~D~%" spaces () hours ) - (return hours)))))) - -(defun print-timeclock-report () + (iter + (with hours = 0) + (for group in further-groups) + (format T "~?~A~%" spaces () (key-value group) ) + (incf hours (timeclock-report-rec-print + group (concatenate 'string spaces "~2,1@T"))) + (finally + (format T "~?Total: ~D~%" spaces () hours ) + (return hours))) + (iter (for kid in (items-in-group gl)) + (sum (hours kid) into hours) + (finally + (format T "~?Total: ~D~%" spaces () hours ) + (return hours)))))) + +(defun print-timeclock-report () (let ((by-person-project - (make-grouped-list - +example-timeclock-objs+ - :keys (list #'name #'proj) - :tests (list #'string-equal #'eql) - :grouping-implementation :hash-table)) - (by-project-person - (make-grouped-list - +example-timeclock-objs+ - :keys (list #'proj #'name) - :tests (list #'eql #'string-equal) - :grouping-implementation :tree))) - + (make-grouped-list + +example-timeclock-objs+ + :keys (list #'name #'proj) + :tests (list #'equalp #'eql) + :grouping-implementation :hash-table)) + (by-project-person + (make-grouped-list + +example-timeclock-objs+ + :keys (list #'proj #'name) + :tests (list #'eql #'equalp) + :grouping-implementation :list))) + (format T "Hours BY Project > Person~%-----------~%") (timeclock-report-rec-print by-project-person) (format T "~%~%Hours BY Person > Project~%-----------~%") @@ -137,10 +137,11 @@ bob Total: 165 |# + (defparameter +example-speedtest-timeclock-objs+ (iter top (for i from 0 to 100) - (iter (for rec in +example-timeclock-data+) - (in top (collect (apply #'example-tcr rec)))))) + (iter (for rec in +example-timeclock-data+) + (in top (collect (apply #'example-tcr rec)))))) (defun speed-test-example () (format *trace-output* "~%build-gl-speed-test~%") @@ -149,7 +150,7 @@ Total: 165 :list +example-speedtest-timeclock-objs+ :keys (list #'name #'proj) :tests (list #'string-equal #'eql)) - + (format *trace-output* "~%~%build-gl-speed-test with-item-access This shows how implementations differ based on workload~%") (grouped-list-speed-tester @@ -158,12 +159,12 @@ This shows how implementations differ based on workload~%") :keys (list #'name #'proj) :tests (list #'string-equal #'eql) :actions (lambda (gl) - (iter (for c from 0 to 1000 ) - (iter - (for i in '("russ" "bob")) - (items-in-group gl i) - (iter (for j in `(:proj-a :proj-b :proj-c)) - (items-in-group gl i j))))))) + (iter (for c from 0 to 1000 ) + (iter + (for i in '("russ" "bob")) + (items-in-group gl i) + (iter (for j in `(:proj-a :proj-b :proj-c)) + (items-in-group gl i j))))))) #| build-gl-speed-test @@ -171,28 +172,24 @@ Grouping Implentation Speed Tests HASH-TABLE Implementation Evaluation took: - 0.519 seconds of real time - 0.490000 seconds of total run time (0.420000 user, 0.070000 system) - [ Run times consist of 0.270 seconds GC time, and 0.220 seconds non-GC time. ] - 94.41% CPU - 1,294,672,493 processor cycles - 99,023,280 bytes consed - -TREE Implementation -Evaluation took: - 0.091 seconds of real time - 0.070000 seconds of total run time (0.070000 user, 0.000000 system) - 76.92% CPU - 225,769,988 processor cycles - 1,540,032 bytes consed + 0.181 seconds of real time + 0.160000 seconds of total run time (0.120000 user, 0.040000 system) + [ Run times consist of 0.080 seconds GC time, and 0.080 seconds non-GC time. ] + 88.40% CPU + 92 lambdas converted + 451,161,165 processor cycles + 4,939,600 bytes consed -ALIST Implementation + + +LIST Implementation Evaluation took: - 0.095 seconds of real time - 0.100000 seconds of total run time (0.090000 user, 0.010000 system) - 105.26% CPU - 236,701,268 processor cycles - 1,507,136 bytes consed + 0.107 seconds of real time + 0.100000 seconds of total run time (0.080000 user, 0.020000 system) + 93.46% CPU + 46 lambdas converted + 265,815,870 processor cycles + 2,749,600 bytes consed @@ -202,30 +199,23 @@ Grouping Implentation Speed Tests HASH-TABLE Implementation Evaluation took: - 0.668 seconds of real time - 0.620000 seconds of total run time (0.600000 user, 0.020000 system) - [ Run times consist of 0.260 seconds GC time, and 0.360 seconds non-GC time. ] - 92.81% CPU - 1,664,663,812 processor cycles - 157,757,008 bytes consed - -TREE Implementation -Evaluation took: - 1.647 seconds of real time - 1.590000 seconds of total run time (1.360000 user, 0.230000 system) - [ Run times consist of 0.720 seconds GC time, and 0.870 seconds non-GC time. ] - 96.54% CPU - 4,107,706,057 processor cycles - 439,151,536 bytes consed + 0.873 seconds of real time + 0.860000 seconds of total run time (0.760000 user, 0.100000 system) + [ Run times consist of 0.330 seconds GC time, and 0.530 seconds non-GC time. ] + 98.51% CPU + 2,175,013,267 processor cycles + 294,658,896 bytes consed -ALIST Implementation + + +LIST Implementation Evaluation took: - 0.548 seconds of real time - 0.510000 seconds of total run time (0.480000 user, 0.030000 system) - [ Run times consist of 0.230 seconds GC time, and 0.280 seconds non-GC time. ] - 93.07% CPU - 1,364,044,695 processor cycles - 147,965,152 bytes consed + 1.031 seconds of real time + 0.990000 seconds of total run time (0.900000 user, 0.090000 system) + [ Run times consist of 0.460 seconds GC time, and 0.530 seconds non-GC time. ] + 96.02% CPU + 2,572,489,710 processor cycles + 293,604,720 bytes consed |# (defparameter +example-names+ #("russ" "alice" "bob" "charlie")) @@ -235,18 +225,18 @@ Evaluation took: "An example of building a grouped list up from individual items rather than starting with a full list then grouping it" (iter (for type in `(:hash-table :tree :alist)) - (iter - (with gl = (make-grouped-list - nil - :keys (list #'name #'proj) - :tests (list #'string-equal #'eql) - :grouping-implementation type)) - (for i from 0 to 1000) - (for tcr = - (example-tcr - (alexandria:random-elt +example-names+) - (random 10) - (alexandria:random-elt +example-projects+))) - (add-item-to-grouping tcr gl) - (finally (timeclock-report-rec-print gl) - (format T "----------------------~%"))))) \ No newline at end of file + (iter + (with gl = (make-grouped-list + nil + :keys (list #'name #'proj) + :tests (list #'string-equal #'eql) + :grouping-implementation type)) + (for i from 0 to 1000) + (for tcr = + (example-tcr + (alexandria:random-elt +example-names+) + (random 10) + (alexandria:random-elt +example-projects+))) + (add-item-to-grouping tcr gl) + (finally (timeclock-report-rec-print gl) + (format T "----------------------~%"))))) \ No newline at end of file diff --git a/group-by.lisp b/group-by.lisp index 591c3ac..afd3e71 100644 --- a/group-by.lisp +++ b/group-by.lisp @@ -83,19 +83,17 @@ eg: (group-by '((a 1 2) (a 3 4) (b 5 6))) (defclass grouped-list () ((orig-list :accessor orig-list :initarg :orig-list :initform nil) (grouping-implementation - :accessor grouping-implementation :initarg :grouping-implementation :initform :alist + :accessor grouping-implementation :initarg :grouping-implementation :initform :list :documentation - "What data structure should be used to perform the grouping - :alist, :tree , :hash-table") + "What data structure should be used to perform the grouping :list, :hash-table") (keys :accessor keys :initarg :keys :initform nil :documentation "A list of key functions we will use to group the list") (tests :accessor tests :initarg :tests :initform nil :documentation "A list of test functions we will use to test key equality tree: defaults to #'equal hash-table: this be a single hash-equality symbol (defaults to 'equal)") - (child-groupings :accessor child-groupings :initarg :child-groupings :initform nil) + (%child-groupings :accessor %child-groupings :initarg :%child-groupings :initform nil) (%items :accessor %items :initarg :%items :initform nil) - (child-map :accessor child-map :initarg :child-map :initform nil) (parent-grouping :accessor parent-grouping :initarg :parent :initform nil :documentation "If this is a subgrouping of another grouped-list, what is the parent grouping we are apart of (mostly for testing)") (key-value :accessor key-value :initarg :key-value :initform nil @@ -103,6 +101,12 @@ eg: (group-by '((a 1 2) (a 3 4) (b 5 6))) (:documentation "This class represents a list that we have grouped by multiple key values ala one of the group-by-repeatedly functions ")) +(defmethod child-groupings ((gl grouped-list)) + (case (grouping-implementation gl) + (:hash-table (iter (for (k v) in-hashtable (%child-groupings gl)) + (collect v))) + (T (%child-groupings gl)))) + (defun make-grouped-list (inp &key tests keys (grouping-implementation :alist)) "Given a list of input, produce a grouped-list CLOS object that contains the original list, configuration about the groupings and the result tree @@ -126,13 +130,19 @@ of grouped-list objects (defmethod initialize-instance :after ((o grouped-list) &key list &allow-other-keys) (unless (listp (keys o)) (setf (keys o) (list (keys o)))) (unless (listp (tests o)) (setf (tests o) (list (tests o)))) + (when (eql :hash-table (grouping-implementation o)) + (setf (%child-groupings o) + (make-hash-table :test (or (first (tests o)) 'equal)))) + (when list ;; only do this if we are not a child-grouped-list (setf (orig-list o) list) (iter (for x in list) - (add-item-to-grouping x o)))) + (add-item-to-grouping x o)))) (defun find-single-sub-category (gl key-value &key test) - (find key-value (child-groupings gl) :key #'key-value :test test)) + (case (grouping-implementation gl) + (:hash-table (gethash key-value (%child-groupings gl))) + (t (find key-value (%child-groupings gl) :key #'key-value :test test)))) (defmethod categorize-item (item (root grouped-list) &key &allow-other-keys) (iter @@ -152,7 +162,7 @@ of grouped-list objects (defmethod add-item-to-grouping (item (gl grouped-list)) "puts a new item in the grouping of the grouped list (but not in the original list)" - (categorize-item item gl :keys (keys gl) :tests (tests gl))) + (categorize-item item gl)) (defmethod %grouping-items ((gl grouped-list)) "Returns the items in a given group" @@ -170,7 +180,10 @@ of grouped-list objects :grouping-implementation (grouping-implementation gl) :parent-grouping gl :key-value key-value))) - (push c (child-groupings gl)) + (case (grouping-implementation gl) + (:hash-table + (setf (gethash key-value (%child-groupings gl)) c)) + (t (push c (%child-groupings gl)))) c)) (defmethod items-in-group ((gl grouped-list) &rest key-values) @@ -181,7 +194,7 @@ of grouped-list objects (for key in key-values) (for test = (or (first tests) #'equal)) (setf tests (rest tests)) - (setf subgroup (find-single-sub-category gl key :test test))) + (setf subgroup (find-single-sub-category subgroup key :test test))) ;; Get all the items for that subgrouping (for alists this is a list we just produced) ;; and that list will simply pass through @@ -202,17 +215,10 @@ of grouped-list objects :list list :keys keys :tests hash-tests :grouping-implementation :hash-table))) (when actions (funcall actions gl))))) - (format *trace-output* "~%~%TREE Implementation~%" ) - (time - (iter (for i from 1 to iterations) - (let ((gl (make-instance 'grouped-list :list list :keys keys :tests tests - :grouping-implementation :tree))) - (when actions (funcall actions gl))) - )) - (format *trace-output* "~%~%ALIST Implementation~%" ) + (format *trace-output* "~%~%LIST Implementation~%" ) (time (iter (for i from 1 to iterations) (let ((gl (make-instance 'grouped-list :list list :keys keys :tests tests - :grouping-implementation :alist))) + :grouping-implementation :list))) (when actions (funcall actions gl))) ))) diff --git a/tests/group-by.lisp b/tests/group-by.lisp index f845074..58bb70f 100644 --- a/tests/group-by.lisp +++ b/tests/group-by.lisp @@ -52,8 +52,8 @@ the vector ALPHABET. :tests (iter (for i from 1 to depth) (collect (case (mod i 2) - (0 #'string=) - (1 #'=)))))) + (0 #'equalp) + (1 #'eql)))))) (defmethod print-object ((o grouped-list) s) ;; This is way slow so dont have this in live code and you might wish to undefine it @@ -64,10 +64,12 @@ the vector ALPHABET. (format s "num-data:~a " (length (items-in-group o))))) (defun make-test-data-instance (test-data &rest other-keywords) - (let ((args (append (list 'grouped-list) - test-data - other-keywords))) - (apply #'make-instance args))) + (let* ((args (append (list 'grouped-list) + test-data + other-keywords)) + (o (apply #'make-instance args))) + o + )) (defparameter +test-timeclock-data+ `(("Russ" 1 "time on proj A") @@ -127,23 +129,17 @@ the vector ALPHABET. (define-test run-accuracy-tests (let ((num-rows 1000) (depth 5)) - (labels ((assertions (g1 g2 g3) + (labels ((assertions (g1 g2) ;; all grouped lists contain the same number of children (assert-equal (length (items-in-group g1)) (length (items-in-group g2))) - (assert-equal - (length (items-in-group g2)) - (length (items-in-group g3))) ;; all grouped lists contain the same data (assert-true (null (set-difference - (set-difference (items-in-group g1) (items-in-group g2) - :test #'equalp ) - (items-in-group g3) - :test #'equalp))) + :test #'equalp ))) ;; assert that all children should actually be in this group (when (parent-grouping g1) @@ -155,30 +151,26 @@ the vector ALPHABET. (assert-true (funcall test (funcall key item) pk)))))))) ;; A function to run the tests on each sub grouping tree - (recursert (g1 g2 g3) + (recursert (g1 g2) ;;(break "recursert:~%~a~%~a~%~a" g1 g2 g3) - (assertions g1 g2 g3) + (assertions g1 g2) (let* ((k1 (child-groupings g1)) (k2 (child-groupings g2)) - (k3 (child-groupings g3)) (pred (when k1 (if (numberp (key-value (first k1))) #'< #'string<)))) (when k1 (setf k1 (sort k1 pred :key #'key-value)) - (setf k2 (sort k2 pred :key #'key-value)) - (setf k3 (sort k3 pred :key #'key-value))) + (setf k2 (sort k2 pred :key #'key-value))) ;(break "~A" (list k1 k2 k3)) (iter (for kg1 in k1) (for kg2 in k2) - (for kg3 in k3) - (recursert kg1 kg2 kg3))))) + (recursert kg1 kg2))))) (let* ((data (test-data num-rows depth)) - (lgl (make-test-data-instance data :grouping-implementation :alist)) - (tgl (make-test-data-instance data :grouping-implementation :hash-table)) - (hgl (make-test-data-instance data :grouping-implementation :tree))) - (recursert lgl tgl hgl))))) + (lgl (make-test-data-instance data :grouping-implementation :list)) + (hgl (make-test-data-instance data :grouping-implementation :hash-table))) + (recursert lgl hgl))))) (defun %run-creation-speed-tests (&key (num-rows 1000) (depth 5) (times 10)) (macrolet ((time-to-log (&body body) @@ -188,19 +180,18 @@ the vector ALPHABET. (let ((test-data (iter (for i from 1 to times) (collect (test-data num-rows depth))))) (info "Grouping Implentation Speed Tests" ) - (info "~%~%HASH-TABLE Implementation~%" ) + (info "~%HASH-TABLE Implementation~%" ) (time-to-log (iter (for data in test-data) (make-test-data-instance data :grouping-implementation :hash-table))) - (info "~%~%TREE Implementation~%" ) - (time-to-log - (iter (for data in test-data) - (make-test-data-instance data :grouping-implementation :tree))) - (info "~%~%ALIST Implementation~%" ) + + (info "~%LIST Implementation~%" ) (time-to-log (iter (for data in test-data) - (make-test-data-instance data :grouping-implementation :alist)))))) + (make-test-data-instance data :grouping-implementation :list))) + + ))) -;(define-test run-creation-speed-tests (%run-creation-speed-tests)) +(define-test run-creation-speed-tests (%run-creation-speed-tests)) -(run-tests) \ No newline at end of file +(run-tests ) \ No newline at end of file