-
Notifications
You must be signed in to change notification settings - Fork 13
/
categorizer.cpp
70 lines (55 loc) · 1.77 KB
/
categorizer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <string>
#include <unordered_map>
#include <boost/algorithm/string.hpp>
#include "categorizer.hpp"
#include "url.hpp"
namespace {
std::string getExtension(spider::Url const& url) {
using std::string;
using boost::to_lower;
string const& path = url.getPath();
string::const_reverse_iterator rposition = find(
path.rbegin(), path.rend(), '.');
if (rposition == path.rend()) {
return string();
}
string::const_iterator position = rposition.base();
string extension = string(position, path.end());
to_lower(extension);
return extension;
}
}
spider::Categorizer::~Categorizer() {
}
void spider::Categorizer::supportExtension(int priority, std::string const& extension) {
using std::string;
using boost::to_lower_copy;
string copy = to_lower_copy(extension);
m_extensions[copy] = priority;
}
bool spider::Categorizer::isDesired(Url const& url) const {
using std::find;
using std::string;
using boost::to_lower;
string extension = getExtension(url);
return m_extensions.find(extension) != m_extensions.end();
}
int spider::Categorizer::getPriority(Url const& url) const {
using std::string;
using std::unordered_map;
string extension = getExtension(url);
auto iterator = m_extensions.find(extension);
if (iterator == m_extensions.end()) {
return 0;
}
return iterator->second;
}
spider::DomainCategorizer::DomainCategorizer(std::string const& domain)
: m_domain(domain) {
}
bool spider::DomainCategorizer::isDesired(Url const& url) const {
using std::string;
using boost::iends_with;
string const& domain = url.getHost();
return iends_with(domain, m_domain) && Categorizer::isDesired(url);
}