-
Notifications
You must be signed in to change notification settings - Fork 19
/
SNLbase.class.php
134 lines (109 loc) · 3.71 KB
/
SNLbase.class.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
<?php
/**
* File: SNLbase.class.php
* Spam Number of Links (SNL) Base Class
* Copyright (C) 2006 Tom Homer - WebSiteMaster AT cogeco DOT com
* Licensed under the GNU General Public License
*/
if (stripos($_SERVER['PHP_SELF'], basename(__FILE__)) !== false) {
die ('This file can not be used on its own!');
}
/**
* Checks number of links in post.
* based in large part on the works of Dirk Haun, Tom Willet (Spam-X) and Russ Jones (SLV)
*/
class SNLbase
{
private $_debug = false;
private $_verbose = false;
/**
* Constructor
*/
public function __construct()
{
$this->_debug = false;
$this->_verbose = false;
}
/**
* Check for spam links
*
* @param string $post post to check for spam
* @return boolean true = spam found, false = no spam
* Note: Also returns 'false' in case of problems communicating with SNL.
* Error messages are logged in Geeklog's error.log
*/
public function CheckForSpam($post)
{
global $_SPX_CONF;
$retval = false;
if (!isset($_SPX_CONF['snl_enabled'])) {
$_SPX_CONF['snl_enabled'] = false;
}
if (empty($post) || !$_SPX_CONF['snl_enabled']) {
return $retval;
}
$links = $this->prepareLinks($post);
if (empty($links)) {
return $retval;
}
if (!isset($_SPX_CONF['snl_num_links'])) {
$_SPX_CONF['snl_num_links'] = 5;
}
if ($links > $_SPX_CONF['snl_num_links']) {
$retval = true;
SPAMX_log('SNL: spam detected, found ' . $links . ' links.');
}
return $retval;
}
/**
* Extract links
* Extracts all the links from a post; expects HTML links, i.e. <a> tags
*
* @param string $comment The post to check
* @return string All the URLs in the post, sep. by line feeds
*/
public function getLinks($comment)
{
global $_CONF;
$links = '';
preg_match_all("/<a[^>]*href=[\"']([^\"']*)[\"'][^>]*>(.*?)<\/a>/i", $comment, $matches);
for ($i = 0; $i < count($matches[0]); $i++) {
$url = $matches[1][$i];
if (stripos($url, $_CONF['site_url']) === 0) {
// skip links to our own site
continue;
} else {
// $links .= $url . "\n";
$links++;
}
}
return $links;
}
/**
* Extract only the links from the post
* SNL has a problem with non-ASCII character sets, so we feed it the URLs
* only. We also remove all URLs containing our site's URL.
* Since we don't know if the post is in HTML or plain ASCII, we run it
* through getLinks() twice.
*
* @param string $comment The post to check
* @return string All the URLs in the post, sep. by linefeeds
*/
public function prepareLinks($comment)
{
// some spam posts have extra backslashes
$comment = stripslashes($comment);
// some spammers have yet to realize that we're not supporting BBcode
// but since we want the URLs, convert it here ...
$comment = preg_replace('/\[url=([^\]]*)\]/i', '<a href="\1">',
$comment);
$comment = str_replace(array('[/url]', '[/URL]'),
array('</a>', '</a>'), $comment);
// get all links from <a href="..."> tags
$links = $this->getLinks($comment);
// strip all HTML, then get all the plain text links
$comment = COM_makeClickableLinks(GLText::stripTags($comment));
$links += $this->getLinks($comment);
return $links;
}
}