Skip to content

Commit

Permalink
Adding bayesian filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
manveru committed Oct 28, 2008
1 parent f89ef53 commit 336fbcb
Show file tree
Hide file tree
Showing 11 changed files with 740 additions and 56 deletions.
20 changes: 11 additions & 9 deletions controller/paste.rb
Expand Up @@ -46,12 +46,13 @@ def save

if request.post? and text and $rapaste[:syntaxes][syntax]
paste = Paste.create(
:created => Time.now,
:digest => Digest::SHA1.hexdigest(text),
:ip => request.ip,
:private => private,
:syntax => syntax,
:text => text
:category => BAYES.classify(text)
:created => Time.now,
:private => private,
:syntax => syntax,
:digest => Digest::SHA1.hexdigest(text),
:text => text,
:ip => request.ip,
)

session[:pastes] ||= Set.new
Expand All @@ -70,6 +71,7 @@ def fork(id, digest = nil)
redirect Rs(:fork => id, :digest => digest)
end

# TODO: implement this using the session[:pastes]
def delete(id, digest = nil)
redirect_referrer unless request.post?
paste = paste_for(id, digest)
Expand All @@ -78,13 +80,13 @@ def delete(id, digest = nil)
def search
return unless @needle = request['substring'] and not @needle.empty?
needle = "%#{@needle}%"
@pastes = Paste.filter(:text.like(needle) & ({:archive => true, :private => false} | {:ip => request.ip}))
@pastes = Paste.filter(:text.like(needle) & ({:archive => true, :private => false, :category => 'ham'} | {:ip => request.ip}))
@total = @pastes.count
@pager = paginate(@pastes, :limit => $rapaste[:pager])
end

def paste_list
Paste.order(:id.desc).filter({:archive => true, :private => false} | {:ip => request.ip})
Paste.order(:id.desc).filter({:archive => true, :private => false, :category => 'ham'} | {:ip => request.ip})
end

# TODO: This could be improved.
Expand All @@ -97,7 +99,7 @@ def paste_for(id, digest = nil, redirect_on_failure = true)
return paste if paste.ip == request.ip
return paste if paste.archive
paste.archive = true
paste.save
paste.categorize!
return paste
end

Expand Down
56 changes: 56 additions & 0 deletions controller/spam.rb
@@ -0,0 +1,56 @@
class SpamController < Ramaze::Controller
map '/spam'
helper :paginate, :formatting, :aspect
engine :Haml
layout '/layout'

def list_pending
@pastes = Paste.filter({:private => false, :approved => nil} & ({:category => 'spam'} | {:category => nil}))
@pager = paginate(@pastes, :limit => $rapaste[:pager])
@count = @pastes.count
end

def list_spammy
@pastes = Paste.filter(:private => false, :spammy => true)
@pager = paginate(@pastes, :limit => $rapaste[:pager])
@count = @pastes.count
end

def mark
return unless request.post?

request.params.each do |id, category|
if paste = Paste[id]
if category == 'ham'
paste.ham!
elsif category == 'spam'
paste.spam!
end
end
end

session[:undo] ||= []
session[:undo] << request.params
session[:undo].shift until session[:undo].size < 10 # keep it reasonable

flash[:good] = "Categorized this page, #{A('undo?', :href => Rs(:undo))}"

redirect_referrer
end

def undo
if last = session[:undo].pop
Paste.filter(:id => last.keys.map{|k| k.to_i }).each do |paste|
paste.category = nil
paste.approved = nil
paste.save
end

flash[:good] = "Undo successful, want to #{A('undo further?', :href => Rs(:undo))}"
else
flash[:bad] = "Nothing to undo"
end

redirect_referrer
end
end
172 changes: 172 additions & 0 deletions db/ham.txt
@@ -0,0 +1,172 @@
context
please
mode
main
special
which
either
html
default
shares
attr
conversion
else
given
over
match
also
closed
types
long
exit
case
check
going
root
without
left
list
even
display
lists
where
style
should
last
info
again
handle
false
path
elements
modify
error
filter
unit
pershareitemtype
author
substring
tierdetails
return
when
affiliateid
details
some
http
dataitem
expanded
class
public
perunititemtype
while
depth
would
file
later
show
body
misc
arguments
response
catch
block
join
math
document
only
pure
first
will
find
that
name
reload
normal
load
tier
user
view
michael
local
thanks
onclick
unitref
because
code
more
ajax
about
monetaryitemtype
affiliate
remove
them
then
somewhere
test
version
your
want
function
files
they
page
size
shown
start
request
support
affiliatesdtx
decimals
modules
sure
using
source
found
like
working
thing
element
actually
been
convert
ruby
searching
type
from
with
patch
null
hope
data
position
read
into
this
module
filename
controller
broker
already
open
exists
text
each
after
string
line
link
resolve
suite
xbrli
update
work
variables
current
raise
have
sharesitemtype
admin
instead
money
true
windows

0 comments on commit 336fbcb

Please sign in to comment.