Skip to content
This repository has been archived by the owner on Mar 13, 2023. It is now read-only.

Commit

Permalink
word wrap: abstract line wrapping into clim-agnostic utility
Browse files Browse the repository at this point in the history
Function accepts closure with width. We've also implemented algorithm which
implements minimum raggedness but it is too slow and not tested (hence
disabled).
  • Loading branch information
dkochmanski committed Feb 20, 2019
1 parent a921ba3 commit 86addca
Show file tree
Hide file tree
Showing 3 changed files with 422 additions and 93 deletions.
108 changes: 32 additions & 76 deletions Core/clim-basic/stream-output.lisp
Expand Up @@ -333,82 +333,38 @@
;; closing the text-output-record and have multiline records to
;; allow gimmics like a dynamic reflow). Also text-style doesn't
;; change until the end of this function. -- jd 2019-01-10
(labels ((string-fits-p (delta string-index)
(<= (stream-string-width stream string
:start start :end string-index
:text-style text-style)
delta))
(find-split (delta)
;; To prevent infinite recursion if there isn't room for even a
;; single character we start from (1+ start). -- jd 2019-01-08
(when (not (string-fits-p delta start))
(if (= right-margin (+ (or left-margin 0) delta))
(return-from find-split (1+ start))
(return-from find-split start)))
(if (text-style-fixed-width-p text-style medium)
(min end (+ start (floor delta (text-style-width text-style medium))))
(bisect (1+ start) end (curry #'string-fits-p delta))))
(find-split-by-word (delta)
(let ((space-indexes (line-break-opportunities string start end)))
(when (not (string-fits-p delta (elt space-indexes 0)))
(if (or (= right-margin (+ (or left-margin 0) delta))
(= (length space-indexes) 1))
;; word exceeds whole line length
(return-from find-split-by-word (find-split delta))
;; word exceeds part of the line length
(return-from find-split-by-word start)))
(let ((split (elt space-indexes
(bisect 0 (1- (length space-indexes))
(lambda (guess)
(string-fits-p delta (elt space-indexes guess)))))))
(if (= (1+ split) end)
end
;; trim leading spaces
(position #\space string
:start split
:end end
:test-not #'char=))))))
(ecase eol-action
(:wrap
(do ((split (find-split (- right-margin cx))
(find-split (- right-margin (or left-margin 0)))))
((= start end)
(return-from seos-write-string))
(unless (= start split)
(stream-write-output stream string nil start split))
(if (/= split end)
;; print a soft newline
(stream-write-char stream #\newline)
;; adjust the cursor
(progn
(setf (cursor-position (stream-text-cursor stream))
(values (+ (stream-effective-left-margin stream)
(stream-string-width stream string
:start start :end split
:text-style text-style))
(nth-value 1 (stream-cursor-position stream))))
(return-from seos-write-string)))
(setf start split)))
(:wrap*
(do ((split (find-split-by-word (- right-margin cx))
(find-split-by-word (- right-margin (or left-margin 0)))))
((= start end)
(return-from seos-write-string))
(unless (= start split)
(stream-write-output stream string nil start split))
(if (/= split end)
;; print a soft newline
(stream-write-char stream #\newline)
;; adjust the cursor
(progn
(setf (cursor-position (stream-text-cursor stream))
(values (+ (stream-effective-left-margin stream)
(stream-string-width stream string
:start start :end split
:text-style text-style))
(nth-value 1 (stream-cursor-position stream))))
(return-from seos-write-string)))
(setf start split)))))))))
(let* ((width (if (text-style-fixed-width-p text-style medium)
(text-style-width text-style medium)
(lambda (string start end)
(text-size medium string
:text-style text-style
:start start :end end))))
(splits (line-breaks string width
:initial-offset cx
:margin (- right-margin (or left-margin 0))
:break-strategy (ecase eol-action
(:wrap NIL)
(:wrap* T))
:start start :end end)))
(do ((start start (car split))
(split splits (rest split)))
((null split)
(stream-write-output stream string nil start end)
(setf (cursor-position (stream-text-cursor stream))
(values (+ (stream-effective-left-margin stream)
(stream-string-width stream string
:start start :end split
:text-style text-style))
(nth-value 1 (stream-cursor-position stream)))))
(ecase eol-action
(:wrap (stream-write-output stream string nil start (car split)))
(:wrap* (let ((pos (position #\space string
:from-end t :start start :end (car split)
:test-not #'char=)))
(when pos (incf pos))
(stream-write-output stream string nil start (or pos (car split))))))
;; print a soft newline
(stream-write-char stream #\newline)))))))

(defun seos-write-newline (stream)
(let* ((current-cy (nth-value 1 (stream-cursor-position stream)))
Expand Down
211 changes: 194 additions & 17 deletions Core/clim-basic/utils.lisp
Expand Up @@ -550,8 +550,8 @@ STREAM in the direction DIRECTION."

(defun bisect (start end predicate-fn &optional predicament-fn)
"Finds the rightmost index meeting the PREDICATE-FN between START and END. It
is assumed that START always meets the predicate while END may but doesn't have
to meet it. That means that function always return some index.
is assumed that START meets the predicate while END doesn't. These indexes are
*not* tested.
PREDICATE-FN INDEX
Should return NIL if index does not meet the predicate and something else
Expand All @@ -561,8 +561,6 @@ PREDICAMENT-FN INDEX-1 INDEX-2
Returns next index between its arguments for test. If there is nothing more to
test must return NIL. When not supplied default function looks always for an
index being halfway between INDEX-1 and INDEX-2."
(when (funcall predicate-fn end)
(return-from bisect end))
(unless predicament-fn
(setf predicament-fn (lambda (last-good last-bad)
(let ((predicament (floor (+ last-good last-bad) 2)))
Expand All @@ -579,19 +577,6 @@ index being halfway between INDEX-1 and INDEX-2."
(setf last-bad current-guess))
finally (return last-good)))

;; Implementing line breaking as defined in Unicode[1] is left as an excercise
;; for the reader. [1] https://unicode.org/reports/tr14/ -- jd 2019-01-08
(defun line-break-opportunities (string start end &optional (break-characters '(#\space)))
"Returns a sequence of string indexes where line can break."
(loop
with space-indexes = (make-array 1 :fill-pointer 0 :adjustable t :element-type 'fixnum)
for i from start below (1- end)
do (when (member (aref string i) break-characters :test #'char=)
(vector-push-extend i space-indexes))
finally
(vector-push-extend (1- end) space-indexes)
(return space-indexes)))

;;; Command name utilities that are useful elsewhere.

(defun command-name-from-symbol (symbol)
Expand Down Expand Up @@ -743,3 +728,195 @@ a flag CLOSED is T then beginning and end of the list are consecutive too."
((and *print-pretty* *print-readably*)
(simple-pprint-object ,sink ,self))
(t ,@body)))



(defun %line-breaks-1 (string width initial-offset margin start end)
(collect (break-line)
(macrolet ((split (step-form)
`(let* ((current-margin (- margin initial-offset))
(split start)
(initial-break ,step-form))
(maxf initial-break split)
(when (and (= initial-break split)
(<= initial-offset 0))
(setf initial-break (1+ split)))
(setf current-margin margin)
(do ((split initial-break
(max ,step-form (1+ split))))
((>= split end))
(if-let ((pos (position #\space string
:start (min split end)
:end end
:test-not #'char=)))
(break-line (setf split pos))
(return-from %line-breaks-1 (break-line)))))))
(etypecase width
(number (split (+ split (floor current-margin width))))
(function (split (bisect split (1+ end) ; we split *after* the string
(lambda (index)
(<= (funcall width string split index)
current-margin)))))))
(break-line)))

;;; Break greedily on the break opportunity. If a single word doesn't fit in the
;;; line and that line is shorter than an empty line then we break it right
;;; away. If the line is longer or equal to an empty one then we break this word
;;; by a character. If there are multiple #\space characters at the line
;;; beginning they are kept in the previous line.
(defun %line-breaks-2 (string width initial-offset margin start end opportunities
&aux (width-fn (etypecase width
(function width)
(number (lambda (string start end)
(declare (ignore string))
(* width (- end start)))))))
(labels ((skip-whitespace (string index)
(and index (position #\space string :start index :test-not #'char=)))
(breaks-rec (offset margin start end next-opportunity-index
&aux (current-margin (max (- margin offset) 1)))
(cond ((<= (funcall width-fn string start end) current-margin)
(return-from breaks-rec
nil))
((= next-opportunity-index (length opportunities))
(return-from breaks-rec
(%line-breaks-1 string width offset margin start end)))
((>= start (aref opportunities next-opportunity-index))
(return-from breaks-rec
(breaks-rec offset margin start end (1+ next-opportunity-index)))))
(loop
with best-break = nil
for i from next-opportunity-index below (length opportunities)
for opportunity = (aref opportunities i)
for current-width = (funcall width-fn string start opportunity)
do
(cond ((<= current-width current-margin)
(if-let ((pos (skip-whitespace string opportunity)))
(setf best-break pos)
(return-from breaks-rec nil)))
((null best-break)
(return-from breaks-rec
(if (> offset 0)
(list* start (breaks-rec 0 margin start end i))
(let* ((char-breaks (%line-breaks-1 string width
offset margin
start opportunity))
(new-start (alexandria:last-elt char-breaks)))
(append char-breaks (breaks-rec 0 margin new-start end i))))))
(T #1=(return-from breaks-rec
(list* best-break (breaks-rec 0 margin best-break end i)))))
finally #1#)))
(breaks-rec initial-offset margin start end 0)))

;; This is super-slow and barely tested. Function implements word wrap with a
;; minimum raggedness. I'm leaving the code for someone eager to optimize and
;; test it. -- jd 2018-12-26
#+ (or)
(defun %line-breaks-3 (string width initial-offset margin start end opportunities
&aux (width-fn (etypecase width
(function width)
(number (lambda (string start end)
(declare (ignore string))
(* width (- end start)))))))
(labels ((skip-whitespace (string index)
(and index (position #\space string :start index :test-not #'char=)))
(breaks-rec (offset margin start end next-opportunity-index
&aux (current-margin (max (- margin offset) 1)))
(cond ((<= (funcall width-fn string start end) current-margin)
(return-from breaks-rec
(values nil 0)))
((= next-opportunity-index (length opportunities))
(return-from breaks-rec
(values (%line-breaks-1 string width offset margin start end) 0)))
((>= start (aref opportunities next-opportunity-index))
(return-from breaks-rec
(breaks-rec offset margin start end (1+ next-opportunity-index)))))
(loop
with best-breaks = nil
with best-cost = nil
for i from next-opportunity-index below (length opportunities)
for opportunity = (aref opportunities i)
for current-width = (funcall width-fn string start opportunity)
do
(cond ((<= current-width current-margin)
(let ((current-cost (expt (- current-margin current-width) 2))
(current-break (if-let ((pos (skip-whitespace string opportunity)))
pos
(return-from breaks-rec nil))))
(multiple-value-bind (breaks remaining-cost)
(breaks-rec 0 margin current-break end (1+ i))
(when (or (null best-cost)
(< (+ current-cost remaining-cost) best-cost))
(setf best-breaks (list* current-break breaks)
best-cost (+ current-cost remaining-cost))))))
((null best-cost)
(return-from breaks-rec
(if (> offset 0)
(multiple-value-bind (breaks remaining-cost)
(breaks-rec 0 margin start end i)
(return-from breaks-rec
(values (list* start breaks)
(+ remaining-cost current-margin))))
(let* ((char-breaks (%line-breaks-1 string width
offset margin
start opportunity))
(new-start (alexandria:last-elt char-breaks)))
(multiple-value-bind (breaks remaining-cost)
(breaks-rec 0 margin new-start end i)
(values (append char-breaks breaks) remaining-cost))))))
(T #1=(return-from breaks-rec
(values best-breaks best-cost))))
finally #1#)))
(breaks-rec initial-offset margin start end 0)))

;; Implementing line breaking as defined in Unicode[1] is left as an excercise
;; for the reader. [1] https://unicode.org/reports/tr14/ -- jd 2019-01-08
(defun line-breaks (string width &key (break-strategy t) initial-offset margin (start 0) end)
"Function takes a string and returns a list of indexes where it should be split.
WIDTH is a function accepting STRING, START and END arguments which should
return string width for these boundaries. Alternatively for fixed font width it
is a number.
INITIAL-OFFSET is an initial position for the first line (may be negative). All
remaining lines will start from the line beginning. Default is line beginning.
MARGIN is a maximum width at which line should break. Defaults to 80
characters (width of a character m is taken as a reference value).
BREAK-STRATEGY may be:
- symbol T implementing a default line breaking by word strategy,
- symbol NIL implementing a line breaking by character strategy,
- function accepting index which should return T for break opportunity,
- list of characters which are break opportunities (i.e space),
- vector of string indexes which are break opportunities.
START/END denote string beginning and ending offset."
(unless start (setq start 0))
(unless end (setq end (length string)))
(unless margin (setq margin (* 80 (etypecase width
(function (funcall width "m" 0 1))
(number width)))))
(unless initial-offset (setq initial-offset 0))
(assert (< start end))
(when (null break-strategy)
(return-from line-breaks
(%line-breaks-1 string width initial-offset margin start end)))
(let ((opportunities (etypecase break-strategy
(function
(coerce (loop for i from start below end
when (funcall break-strategy i)
collect i)
'vector))
((eql t)
(coerce (loop for i from start below end
when (char= (char string i) #\space)
collect i)
'vector))
(list
(coerce (loop for i from start below end
when (member (char string i) break-strategy)
collect i)
'vector))
(vector
break-strategy))))
(%line-breaks-2 string width initial-offset margin start end opportunities)))

0 comments on commit 86addca

Please sign in to comment.