<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array">
    <added>
      <filename>controller/spam.rb</filename>
    </added>
    <added>
      <filename>db/ham.txt</filename>
    </added>
    <added>
      <filename>db/spam.txt</filename>
    </added>
    <added>
      <filename>model/paste.rb</filename>
    </added>
    <added>
      <filename>vendor/bayes.rb</filename>
    </added>
    <added>
      <filename>view/spam/list_pending.haml</filename>
    </added>
  </added>
  <modified type="array">
    <modified>
      <diff>@@ -46,12 +46,13 @@ class PasteController &lt; Ramaze::Controller
 
     if request.post? and text and $rapaste[:syntaxes][syntax]
       paste = Paste.create(
-        :created =&gt; Time.now,
-        :digest  =&gt; Digest::SHA1.hexdigest(text),
-        :ip      =&gt; request.ip,
-        :private =&gt; private,
-        :syntax  =&gt; syntax,
-        :text    =&gt; text
+        :category =&gt; BAYES.classify(text)
+        :created  =&gt; Time.now,
+        :private  =&gt; private,
+        :syntax   =&gt; syntax,
+        :digest   =&gt; Digest::SHA1.hexdigest(text),
+        :text     =&gt; text,
+        :ip       =&gt; request.ip,
       )
 
       session[:pastes] ||= Set.new
@@ -70,6 +71,7 @@ class PasteController &lt; Ramaze::Controller
     redirect Rs(:fork =&gt; id, :digest =&gt; digest)
   end
 
+  # TODO: implement this using the session[:pastes]
   def delete(id, digest = nil)
     redirect_referrer unless request.post?
     paste = paste_for(id, digest)
@@ -78,13 +80,13 @@ class PasteController &lt; Ramaze::Controller
   def search
     return unless @needle = request['substring'] and not @needle.empty?
     needle = &quot;%#{@needle}%&quot;
-    @pastes = Paste.filter(:text.like(needle) &amp; ({:archive =&gt; true, :private =&gt; false} | {:ip =&gt; request.ip}))
+    @pastes = Paste.filter(:text.like(needle) &amp; ({:archive =&gt; true, :private =&gt; false, :category =&gt; 'ham'} | {:ip =&gt; request.ip}))
     @total = @pastes.count
     @pager = paginate(@pastes, :limit =&gt; $rapaste[:pager])
   end
 
   def paste_list
-    Paste.order(:id.desc).filter({:archive =&gt; true, :private =&gt; false} | {:ip =&gt; request.ip})
+    Paste.order(:id.desc).filter({:archive =&gt; true, :private =&gt; false, :category =&gt; 'ham'} | {:ip =&gt; request.ip})
   end
 
   # TODO: This could be improved.
@@ -97,7 +99,7 @@ class PasteController &lt; Ramaze::Controller
       return paste if paste.ip == request.ip
       return paste if paste.archive
       paste.archive = true
-      paste.save
+      paste.categorize!
       return paste
     end
 </diff>
      <filename>controller/paste.rb</filename>
    </modified>
    <modified>
      <diff>@@ -5,9 +5,12 @@ $rapaste = {
   :priority =&gt; %w[ ruby plain_text plaintext html css javascript java_script yaml diff ],
   :theme    =&gt; 'iplastic', # only used by uv
   :title    =&gt; 'RaPaste', # title of page
+  :admins   =&gt; { # hash of username and password for spamhunters
+    'manveru' =&gt; 'letmein'
+  }
 
   # You might want to edit start.rb directly.
   :ramaze =&gt; { :port =&gt; 7000, :host =&gt; '0.0.0.0' }
 }
 
-DB = Sequel.sqlite('rapaste.db', :logger =&gt; Ramaze::Log)
+DB = Sequel.sqlite(__DIR__/'db/rapaste.db') #, :logger =&gt; Ramaze::Log)</diff>
      <filename>env.rb</filename>
    </modified>
    <modified>
      <diff>@@ -5,10 +5,15 @@ require 'sequel'
 $LOAD_PATH.unshift(__DIR__)
 
 require 'env'
+
 require 'vendor/highlight'
-require 'model'
+require 'vendor/bayes'
+
+require 'model/paste'
+
 require 'controller/css'
 require 'controller/paste'
+require 'controller/spam'
 
 class Highlight
   def default_options
@@ -21,4 +26,19 @@ class Highlight
   $rapaste[:syntaxes] = mod.syntaxes($rapaste[:priority])
 end
 
+# The Bayes database contains information about the ham and spam rating of
+# certain words.
+# If you would like to reset it, just remove the db/bayes.marshal file.
+BAYES = Bayes.new(__DIR__/'db/bayes.marshal')
+
+# Initial seeding of the bayes filter, setting up categories and a couple of
+# common ratings.
+# The format of the files isn't that important, given that it should recognize
+# any text.
+# But you should separate words in some way (whitespace, commas, numbers...)
+if BAYES.categories.empty?
+  BAYES.train(:spam, File.read(__DIR__/'db/spam.txt'))
+  BAYES.train(:ham,  File.read(__DIR__/'db/ham.txt'))
+end
+
 Ramaze.start $rapaste[:ramaze]</diff>
      <filename>start.rb</filename>
    </modified>
    <modified>
      <diff>@@ -55,6 +55,8 @@ class Highlight
       args = options.values_at(:output, :syntax, :line_numbers, :style, :headers)
 
       Uv.parse(text, *args)
+    rescue =&gt; ex
+      &quot;&lt;pre&gt;#{Rack::Utils.escape(text)}&lt;/pre&gt;&quot;
     end
 
     def translate(options)
@@ -109,6 +111,8 @@ class Highlight
       options = translate(DEFAULT.merge(options))
       tokens = CodeRay.scan(text, options[:syntax])
       tokens.html(options)
+    rescue =&gt; ex
+      &quot;&lt;pre&gt;#{Rack::Utils.escape(text)}&lt;/pre&gt;&quot;
     end
 
     def translate(options)</diff>
      <filename>vendor/highlight.rb</filename>
    </modified>
  </modified>
  <removed type="array">
    <removed>
      <filename>model.rb</filename>
    </removed>
  </removed>
  <parents type="array">
    <parent>
      <id>f89ef53713fec4c6c0e1e2009dbaa55bbcef4387</id>
    </parent>
  </parents>
  <author>
    <name>Michael Fellinger</name>
    <email>m.fellinger@gmail.com</email>
  </author>
  <url>http://github.com/manveru/rapaste/commit/336fbcbf1e7054e4b40eee253f3472e92bf53ee8</url>
  <id>336fbcbf1e7054e4b40eee253f3472e92bf53ee8</id>
  <committed-date>2008-10-27T20:01:12-07:00</committed-date>
  <authored-date>2008-10-27T20:01:12-07:00</authored-date>
  <message>Adding bayesian filtering</message>
  <tree>9c25a63ca21a2a7b2d66df3ab8b718170b49fbbc</tree>
  <committer>
    <name>Michael Fellinger</name>
    <email>m.fellinger@gmail.com</email>
  </committer>
</commit>
