Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Removing files not present on branch acl60:

	examples/urian.cl
  • Loading branch information...
commit c0035e8d61cfca5b583a9a404efcc13b61c015a5 1 parent f77a8cd
authored June 01, 2001

Showing 1 changed file with 0 additions and 448 deletions. Show diff stats Hide diff stats

  1. 448  examples/urian.cl
448  examples/urian.cl
... ...
@@ -1,448 +0,0 @@
1  
-;; -*- mode: common-lisp; package: net.aserve.examples -*-
2  
-;;
3  
-;; urian.cl
4  
-;;
5  
-;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA 
6  
-;;
7  
-;; This code is free software; you can redistribute it and/or
8  
-;; modify it under the terms of the version 2.1 of
9  
-;; the GNU Lesser General Public License as published by 
10  
-;; the Free Software Foundation; 
11  
-;;
12  
-;; This code is distributed in the hope that it will be useful,
13  
-;; but without any warranty; without even the implied warranty of
14  
-;; merchantability or fitness for a particular purpose.  See the GNU
15  
-;; Lesser General Public License for more details.
16  
-;;
17  
-;; Version 2.1 of the GNU Lesser General Public License is in the file 
18  
-;; license-lgpl.txt that was distributed with this file.
19  
-;; If it is not present, you can access it from
20  
-;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
21  
-;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, 
22  
-;; Suite 330, Boston, MA  02111-1307  USA
23  
-;;
24  
-;;
25  
-;; $Id: urian.cl,v 1.2 2001/05/11 21:43:53 jkf Exp $
26  
-
27  
-;; Description:
28  
-;;   urian example
29  
-
30  
-
31  
-;;
32  
-;; Web page character analyzer.
33  
-;; This example retrieves a web page associated with a url, parses it with
34  
-;; parse-html, and then displays all texts found to have non-ascii characters.
35  
-;; Each character is a link.  Clicking on one of these links displays a
36  
-;; description of the linked character.
37  
-;;
38  
-;; Original Author: Charles A. Cox, Franz Inc., October 2000
39  
-;;
40  
-;; To use, compile/load this file into Allegro CL 6.0.  Then,
41  
-;; start allegroserve, eg, (net.aserve:start :port 8000) starts on port 8000.
42  
-;; The main published page for this example is "/urian".
43  
-
44  
-(defpackage :urian
45  
-  (:use :common-lisp :excl))
46  
-
47  
-(in-package :urian)
48  
-
49  
-(eval-when (compile load eval)
50  
-  (unless (featurep '(:version>= 6 0 :final 0))
51  
-    (error "This file not supported in Allegro CL releases earlier than 6.0")))
52  
-
53  
-(eval-when (compile load eval)
54  
-  (require :aserve)
55  
-  (handler-case (require :phtml)
56  
-    ; didn't find it, check to see if it's where it would be in 
57  
-    ; a non-user file layout
58  
-    (error (c)
59  
-      (declare (ignore c))
60  
-      (let (name)
61  
-	(if* (or (probe-file (setq name (concatenate 'string
62  
-					  (directory-namestring *load-truename*)
63  
-					  "../xmlutils/phtml.fasl")))
64  
-		 (probe-file (setq name (concatenate 'string
65  
-					  (directory-namestring *load-truename*)
66  
-					  "../../xmlutils/phtml.fasl"))))
67  
-		 
68  
-	   then (load name)
69  
-	   else (format t " not at ~s~%, tn is ~s~%" name
70  
-			*load-truename*)
71  
-		(error "can't locate phtml module"))))))
72  
-
73  
-(defpackage :urian
74  
-  (:use :net.html.generator :net.aserve :net.html.parser))
75  
-
76  
-(pushnew :x-sjis (ef-nicknames (find-external-format :shiftjis)))
77  
-(pushnew :iso-8859-1 (ef-nicknames (find-external-format :latin1)))
78  
-(pushnew :windows-1252 (ef-nicknames (find-external-format :1252)))
79  
-
80  
-(defparameter *blocks*
81  
-    '((#x0000 #x007f "Basic Latin")
82  
-      (#x0080 #x00ff "Latin-1 Supplement")
83  
-      (#x0100 #x017f "Latin Extended-A")
84  
-      (#x0180 #x024f "Latin Extended-B")
85  
-      (#x0250 #x02af "IPA Extensions")
86  
-      (#x02b0 #x02ff "Spacing Modifier Letters")
87  
-      (#x0300 #x036f "Combining Diacritical Marks")
88  
-      (#x0370 #x03ff "Greek")
89  
-      (#x0400 #x04ff "Cyrillic")
90  
-      (#x0530 #x058f "Armenian")
91  
-      (#x0590 #x05ff "Hebrew")
92  
-      (#x0600 #x06ff "Arabic")
93  
-      (#x0700 #x074f "Syriac  ")
94  
-      (#x0780 #x07bf "Thaana")
95  
-      (#x0900 #x097f "Devanagari")
96  
-      (#x0980 #x09ff "Bengali")
97  
-      (#x0a00 #x0a7f "Gurmukhi")
98  
-      (#x0a80 #x0aff "Gujarati")
99  
-      (#x0b00 #x0b7f "Oriya")
100  
-      (#x0b80 #x0bff "Tamil")
101  
-      (#x0c00 #x0c7f "Telugu")
102  
-      (#x0c80 #x0cff "Kannada")
103  
-      (#x0d00 #x0d7f "Malayalam")
104  
-      (#x0d80 #x0dff "Sinhala")
105  
-      (#x0e00 #x0e7f "Thai")
106  
-      (#x0e80 #x0eff "Lao")
107  
-      (#x0f00 #x0fff "Tibetan")
108  
-      (#x1000 #x109f "Myanmar ")
109  
-      (#x10a0 #x10ff "Georgian")
110  
-      (#x1100 #x11ff "Hangul Jamo")
111  
-      (#x1200 #x137f "Ethiopic")
112  
-      (#x13a0 #x13ff "Cherokee")
113  
-      (#x1400 #x167f "Unified Canadian Aboriginal Syllabics")
114  
-      (#x1680 #x169f "Ogham")
115  
-      (#x16a0 #x16ff "Runic")
116  
-      (#x1780 #x17ff "Khmer")
117  
-      (#x1800 #x18af "Mongolian")
118  
-      (#x1e00 #x1eff "Latin Extended Additional")
119  
-      (#x1f00 #x1fff "Greek Extended")
120  
-      (#x2000 #x206f "General Punctuation")
121  
-      (#x2070 #x209f "Superscripts and Subscripts")
122  
-      (#x20a0 #x20cf "Currency Symbols")
123  
-      (#x20d0 #x20ff "Combining Marks for Symbols")
124  
-      (#x2100 #x214f "Letterlike Symbols")
125  
-      (#x2150 #x218f "Number Forms")
126  
-      (#x2190 #x21ff "Arrows")
127  
-      (#x2200 #x22ff "Mathematical Operators")
128  
-      (#x2300 #x23ff "Miscellaneous Technical")
129  
-      (#x2400 #x243f "Control Pictures")
130  
-      (#x2440 #x245f "Optical Character Recognition")
131  
-      (#x2460 #x24ff "Enclosed Alphanumerics")
132  
-      (#x2500 #x257f "Box Drawing")
133  
-      (#x2580 #x259f "Block Elements")
134  
-      (#x25a0 #x25ff "Geometric Shapes")
135  
-      (#x2600 #x26ff "Miscellaneous Symbols")
136  
-      (#x2700 #x27bf "Dingbats")
137  
-      (#x2800 #x28ff "Braille Patterns")
138  
-      (#x2e80 #x2eff "CJK Radicals Supplement")
139  
-      (#x2f00 #x2fdf "Kangxi Radicals")
140  
-      (#x2ff0 #x2fff "Ideographic Description Characters")
141  
-      (#x3000 #x303f "CJK Symbols and Punctuation")
142  
-      (#x3040 #x309f "Hiragana")
143  
-      (#x30a0 #x30ff "Katakana")
144  
-      (#x3100 #x312f "Bopomofo")
145  
-      (#x3130 #x318f "Hangul Compatibility Jamo")
146  
-      (#x3190 #x319f "Kanbun")
147  
-      (#x31a0 #x31bf "Bopomofo Extended")
148  
-      (#x3200 #x32ff "Enclosed CJK Letters and Months")
149  
-      (#x3300 #x33ff "CJK Compatibility")
150  
-      (#x3400 #x4db5 "CJK Unified Ideographs Extension A")
151  
-      (#x4e00 #x9fff "CJK Unified Ideographs")
152  
-      (#xa000 #xa48f "Yi Syllables")
153  
-      (#xa490 #xa4cf "Yi Radicals")
154  
-      (#xac00 #xd7a3 "Hangul Syllables")
155  
-      (#xd800 #xdb7f "High Surrogates")
156  
-      (#xdb80 #xdbff "High Private Use Surrogates")
157  
-      (#xdc00 #xdfff "Low Surrogates")
158  
-      (#xe000 #xf8ff "Private Use")
159  
-      (#xf900 #xfaff "CJK Compatibility Ideographs")
160  
-      (#xfb00 #xfb4f "Alphabetic Presentation Forms")
161  
-      (#xfb50 #xfdff "Arabic Presentation Forms-A")
162  
-      (#xfe20 #xfe2f "Combining Half Marks")
163  
-      (#xfe30 #xfe4f "CJK Compatibility Forms")
164  
-      (#xfe50 #xfe6f "Small Form Variants")
165  
-      (#xfe70 #xfefe "Arabic Presentation Forms-B")
166  
-      (#xfeff #xfeff "Specials")
167  
-      (#xff00 #xffef "Halfwidth and Fullwidth Forms")
168  
-      (#xfff0 #xfffd "Specials")))
169  
-
170  
-(publish
171  
- :path "/urian"
172  
- :content-type "text/html; charset=utf-8"
173  
- :function
174  
- #'(lambda (req ent)
175  
-     (let* ((uri (cdr (assoc "uri" (request-query req) :test #'equal)))
176  
-	    (results nil))
177  
-       (when uri
178  
-	 (unless  (find #\: uri)
179  
-	   (setq uri (concatenate 'string "http://" uri)))
180  
-	 (setq results (chanal uri)))
181  
-       (with-http-response (req ent)
182  
-	 (with-http-body (req ent
183  
-			      :external-format :utf8-base)
184  
-	   (html
185  
-	    (:html
186  
-	     (:head (:title (:princ-safe
187  
-			     (format nil "String Analysis~@[ for `~a'~]"
188  
-				     uri))))
189  
-	     (:body
190  
-	      (if* (stringp results)
191  
-		 then (html (:p "AllegroServe got error:  "
192  
-				(:b (:princ-safe results))))
193  
-		 else (when results
194  
-			(when (first results)
195  
-			  (html
196  
-			   (:p (:princ-safe
197  
-				(format nil "Server set charset to `~s'."
198  
-					(car (first results))))
199  
-			       :br
200  
-			       (:princ-safe
201  
-				(format nil "Switched to External-Format `~s'."
202  
-					(ef-name (cdr (first results))))))))
203  
-			(when (second results)
204  
-			  (html
205  
-			   (:p (:princ-safe
206  
-				(format
207  
-				 nil
208  
-				 "A page meta tag specified charset as `~s'."
209  
-				 (car (second results))))
210  
-			       :br
211  
-			       (:princ-safe
212  
-				(format
213  
-				 nil "Switched to external-format: `~s'."
214  
-				 (ef-name (cdr (second results))))))))
215  
-			(html (:p "Scanned URL:  " ((:a :href uri
216  
-							target "_blank")
217  
-						    (:princ-safe uri))))
218  
-			(if* (cddr results)
219  
-			   then (html
220  
-				 (:p
221  
-				  "The following texts were found to contain "
222  
-				  "non-ASCII characters.  "
223  
-				  :br
224  
-				  "Click on a character for its description."))
225  
-				"Strings found on URL:  "
226  
-				(dolist (result (cddr results))
227  
-				  (html
228  
-				   :hr
229  
-				   (san-html result *html-stream*)))
230  
-			   else (html
231  
-				 (:p
232  
-				  "No texts containing non-ASCII characters "
233  
-				  "were found on the page.")))))
234  
-	      :hr
235  
-	      (macrolet ((item (title url)
236  
-			   ;; Assumes title and url are string literals
237  
-			   (let ((ref (format nil "/urian?uri=~a"
238  
-					      (uriencode-string url))))
239  
-			     `(html
240  
-			       (:ul (:li (:princ-safe ,title)
241  
-					 " ("
242  
-					 (:princ-safe ,url)
243  
-					 ")"
244  
-					 :br
245  
-					 ((:a href ,url
246  
-					      target "_blank")
247  
-					  "View Page (new browser window)")
248  
-					 :br
249  
-					 ((:a href ,ref) "Analyze")))))))
250  
-		(html
251  
-		 (:p
252  
-		  "Select a sample page:"
253  
-		  (item "UTF-8 Sampler"
254  
-			"http://www.columbia.edu/kermit/utf8.html")
255  
-		  (item "The \"anyone can be provincial!\" page"
256  
-			"http://www.trigeminal.com/samples/provincial.html")
257  
-		  (item "The Japan Netscape Netcenter Page"
258  
-			"http://home.netscape.com/ja")
259  
-		  (item "The Spain Yahoo! Page"
260  
-			"http://es.yahoo.com"))))
261  
-	      :br
262  
-	      ((:form :action "urian"
263  
-		      :method "get")
264  
-	       "Or Enter New URL to analyze:  "
265  
-	       ((:input :type "text" :name "uri" :size 50)))))))))))
266  
-
267  
-(defun san-html (string stream)
268  
-  (net.html.generator:html-stream
269  
-   stream
270  
-   (net.html.generator:html
271  
-    (:p "\""
272  
-	(dotimes (i (length string))
273  
-	  (net.html.generator:html
274  
-	   ((:a href
275  
-		(format nil "/chdescribe?char=~a"
276  
-			(net.aserve:uriencode-string
277  
-			 (format nil "u+~4,'0x:~s"
278  
-				 (char-code
279  
-				  (schar string i))
280  
-				 (schar string i)))))
281  
-	    (:princ (schar string i)))))
282  
-	"\""))))
283  
-
284  
-(defun chanal (uri
285  
-	       &aux (server-ef nil)
286  
-		    (lhtml nil)
287  
-		    (metatag-ef nil))
288  
-  (handler-case
289  
-      (multiple-value-bind (body response-code headers ruri)
290  
-	  (net.aserve.client:do-http-request uri :external-format :latin1-base)
291  
-	(declare (ignore response-code ruri))
292  
-	(setq server-ef (let ((content-type (cdr (assoc :content-type
293  
-							headers))))
294  
-			  (find-charset-from-content-type content-type)))
295  
-	(setq lhtml (net.html.parser:parse-html body))
296  
-	(setq metatag-ef (update-ef lhtml))
297  
-	(cons server-ef
298  
-	      (cons metatag-ef
299  
-		    (delete-duplicates
300  
-		     (chanal-body lhtml (or (cdr metatag-ef)
301  
-					    (cdr server-ef)
302  
-					    ;; www.yahoo.co.jp uses euc without
303  
-					    ;; specifying it.  Let's try using
304  
-					    ;; euc, then, as default.
305  
-					    (crlf-base-ef
306  
-					     (find-external-format :latin1))))
307  
-		     :test #'string=))))
308  
-    (error (c)
309  
-      (format nil "~a" c))))
310  
-
311  
-(defun chanal-body (body ef)
312  
-  (if* (stringp body)
313  
-     then (let ((s (octets-to-string
314  
-		    (string-to-octets body :external-format :latin1-base)
315  
-		    :external-format ef)))
316  
-	    (dotimes (i (length s))
317  
-	      (when (> (char-code (schar s i)) #x7f)
318  
-		;; non-ascii
319  
-		(return-from chanal-body (list s))))
320  
-	    nil)
321  
-   elseif (consp body)
322  
-     then ;; skip unparsed <script> and <style> forms
323  
-	  (if* (or (eq :script (car body))
324  
-		   (eq :style (car body))
325  
-		   (eq :comment (car body))
326  
-		   (and (listp (car body))
327  
-			(or (eq :script (caar body))
328  
-			    (eq :style (caar body)))))
329  
-	     then nil
330  
-	     else (nconc (chanal-body (car body) ef)
331  
-			 (chanal-body (cdr body) ef)))))
332  
-
333  
-(defun find-charset-from-content-type (content-type)
334  
-  (let ((charsetp (search "charset=" content-type
335  
-			  :test #'string-equal))
336  
-	(cs-name nil))
337  
-    (when charsetp
338  
-      (setq cs-name (subseq content-type
339  
-			    (1+ (position #\= content-type
340  
-					  :start charsetp))
341  
-			    (position #\; content-type
342  
-				      :start charsetp)))
343  
-      (cons cs-name
344  
-	    (crlf-base-ef
345  
-	     (find-external-format
346  
-	      (let ((*package* (find-package :keyword)))
347  
-		(read-from-string
348  
-		 (string-downcase cs-name)))))))))
349  
-
350  
-(defun update-ef (lhtml)
351  
-  (when (listp lhtml)
352  
-    (let ((html-body (car lhtml)))
353  
-      (when (eq :html (car html-body))
354  
-	(let ((html-component (second html-body)))
355  
-	  (when (eq :head (car html-component))
356  
-	    (dolist (x (cdr html-component))
357  
-	      (let ((charset-string (charset-metatag-p x)))
358  
-		(when charset-string
359  
-		  (return (find-charset-from-content-type
360  
-			   charset-string)))))))))))
361  
-
362  
-(defun charset-metatag-p (head-component)
363  
-  (when (listp head-component)
364  
-    (let ((arg-tag (car head-component)))
365  
-      (when (and (listp arg-tag)
366  
-		 (eq :meta (car arg-tag)))
367  
-	(when (equalp '(:http "http" :equiv "content-type" :content)
368  
-		      (subseq arg-tag 1 6))
369  
-	  (elt arg-tag 6))))))
370  
-
371  
-
372  
-
373  
-(defmacro cjk-p (code)
374  
-  `(or
375  
-    ;; CJK Ideographs
376  
-    (<= #x4e00 ,code #x9fff)
377  
-    ;; Hangul Syllables
378  
-    (<= #xac00 ,code #xd7a3)))
379  
-
380  
-(publish
381  
- :path "/chdescribe"
382  
- :content-type "text/html; charset=utf-8"
383  
- :function
384  
- #'(lambda (req ent)
385  
-     (let ((lookup
386  
-	    (assoc "char" (request-query req)
387  
-		   :test #'string=)))
388  
-       (when lookup
389  
-	 (setq lookup
390  
-	   (let ((*read-base* 16))
391  
-	     (read-from-string
392  
-	      (subseq (cdr lookup)
393  
-		      #.(length "u+")
394  
-		      #.(length "u+xxxx"))))))
395  
-       (with-http-response (req ent)
396  
-	 (with-http-body (req ent
397  
-			      :external-format :utf8-base)
398  
-	   (html
399  
-	    (:html
400  
-	     (:head (:title "Character Description"))
401  
-	     (:body
402  
-	      (:p
403  
-	       (:princ (format nil "Unicode value:  U+~4,'0x"
404  
-			       lookup)))
405  
-	      (:p
406  
-	       "Lisp Character Name:  "
407  
-	       ((:font :size "+3")
408  
-		(:prin1 (code-char lookup))))
409  
-	      (:p
410  
-	       "Browser Font Display:  "
411  
-	       ((:font :size "+3")
412  
-		(:princ (code-char lookup)))
413  
-	       :br
414  
-	       #.(format nil "~
415  
-Characters that appear as dots or empty boxes or question-marks likely look
416  
-that way because your browser is missing the needed font(s)."))
417  
-	      (unless (cjk-p lookup)
418  
-		(let ((uglyph (format nil "~
419  
-http://charts.unicode.org/Glyphs/~2,'0x/U~4,'0x.gif"
420  
-				      (ldb (byte 8 8) lookup)
421  
-				      lookup)))
422  
-		  (html ((:table border 0)
423  
-			 (:tr
424  
-			  (:td #.(format nil "~
425  
-Glyph GIF (from Unicode web site -- not all characters have gifs):")
426  
-			       :br
427  
-			       (:princ (format nil "[Loading from `~a'.]"
428  
-					       uglyph)))
429  
-			  (:td
430  
-			   ((:img :src uglyph
431  
-				  :alt (format nil "~s" (code-char lookup))
432  
-				  :border 2))))))))
433  
-	      (html
434  
-	       (:p "Character is in the "
435  
-		   (:b
436  
-		    (:princ
437  
-		     (dolist (b *blocks*)
438  
-		       (when (<= lookup (second b))
439  
-			 (return (third b))))))
440  
-		   " unicode block.")
441  
-	       (when (cjk-p lookup)
442  
-		 (html
443  
-		  (:p "More information may be available from Unicode site: "
444  
-		      (let ((upage (format nil "~
445  
-http://charts.unicode.org/unihan/unihan.acgi$0x~4,'0x"
446  
-					   lookup)))
447  
-			(html
448  
-			 ((:a href upage) (:princ-safe upage))))))))))))))))

0 notes on commit c0035e8

Please sign in to comment.
Something went wrong with that request. Please try again.