/
htmlscan.mli
18 lines (15 loc) · 983 Bytes
/
htmlscan.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
(***********************************************************************)
(* *)
(* SpamOracle -- a Bayesian spam filter *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 2002 Institut National de Recherche en Informatique et *)
(* en Automatique. This file is distributed under the terms of the *)
(* GNU Public License version 2, http://www.gnu.org/licenses/gpl.txt *)
(* *)
(***********************************************************************)
(* $Id$ *)
(** Approximate HTML scanner. Extracts words and certain parameters
of certain tags (e.g. URLs) from HTML text. *)
val extract_text: string -> string