Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Reworked to work with the Drupal module.

  • Loading branch information...
commit 0f41f59f28369c5ee1934a2d5139c0d1765dad8c 1 parent a0043bb
Johannes Wehner authored
View
31 Tagger.php
@@ -39,7 +39,7 @@ private function __construct($configuration = array(), $file = 'conf.php') {
$wordlists = array('initwords', 'prefix_infix', 'stopwords');
foreach ($wordlists AS $wordlist) {
if (self::$$wordlist == NULL) {
- $path = realpath(__ROOT__ .'resources/'. $wordlist .'/'. $wordlist .'_'.
+ $path = realpath(__ROOT__ .'resources/'. $wordlist .'/'. $wordlist .'_'.
self::$configuration['language'] .'.txt');
self::$$wordlist = array_flip(file($path, FILE_IGNORE_NEW_LINES));
}
@@ -50,7 +50,7 @@ private function __construct($configuration = array(), $file = 'conf.php') {
public function getTaggerVersion() {
return TAGGER_VERSION;
}
-
+
public function getVocabularyIds() {
$sql = sprintf("SELECT vid FROM tagger_lookup GROUP BY vid");
$result = TaggerQueryManager::query($sql);
@@ -164,17 +164,32 @@ public function __clone() {
public function tagText($text, $options = array()) {
$default = self::$configuration;
+ $filtered_conf = array();
+
+ //These configuration options can be overriden when this function is called
+ $conf = array(
+ 'named_entity',
+ 'keyword',
+ 'return_marked_text',
+ 'linked_data',
+ );
+ foreach ($conf as $key) {
+ if (isset($options[$key])) {
+ $filtered_conf[$key] = $options[$key];
+ }
+ }
+ // let some $options override $configuration temporarily
+ self::setConfiguration(self::$configuration, $filtered_conf);
- // let $options array override $configuration temporarily
- self::setConfiguration(self::$configuration, $options);
+ $ner = Tagger::getConfiguration('named_entity', 'vocab_ids');
+ $keyword = Tagger::getConfiguration('keyword', 'vocab_ids');
+ if (empty($ner) && empty($keyword)) {
+ throw new ErrorException('Missing vocab definition in configuration.');
+ }
- //if (empty($options['ner_vocab_ids']) && empty($options['keyword_vocab_ids'])) {
- // throw new ErrorException('Missing vocab definition in configuration.');
- //}
$tagged_text = new TaggedText($text);
$tagged_text->process();
-
self::$configuration = $default;
return $tagged_text;
View
1  classes/Matcher.class.php
@@ -35,7 +35,6 @@ protected function term_query() {
unset($unmatched[mb_strtolower($row['name'])]);
TaggerLogManager::logDebug("Synonym:\n" . print_r($row, TRUE));
}
-
// Then we find the actual names of entities
if (!empty($synonyms)) {
View
6 classes/TaggedText.class.php
@@ -82,10 +82,9 @@ public function process() {
}
}
- // NER
+ // NER
// - if NER-vocabs are provided
if (count( Tagger::getConfiguration('named_entity', 'vocab_ids') ) > 0) {
-
// Do named entity recognition: find named entities.
$ner_matcher = new NamedEntityMatcher($this->partialTokens);
$ner_matcher->match();
@@ -129,6 +128,9 @@ public function getTags($options = array()) {
// let $options array override $configuration temporarily
Tagger::setConfiguration($default, $options);
+ if ( Tagger::getConfiguration('return_full_tag_object') ) {
+ return $this->tags;
+ }
function create_output($type, $full_tags) {
$vocab_ids = Tagger::getConfiguration($type, 'vocab_ids');
View
12 db/TaggerQueryHandler.class.php
@@ -5,9 +5,8 @@ class TaggerQueryHandler {
private static $link = NULL;
private static $instance = NULL;
private function __construct() {
- $tagger_instance = Tagger::getTagger();
- $db_settings = $tagger_instance->getConfiguration('db');
+ $db_settings = Tagger::getConfiguration('db');
try {
if($db_settings['type'] != 'sqlite') {
// Anything but SQLite
@@ -41,7 +40,7 @@ public function fetch($result, $type) {
case 'assoc':
return $result->fetch(PDO::FETCH_ASSOC);
break;
-
+
default:
return $result->fetch(PDO::FETCH_ASSOC);
break;
@@ -53,7 +52,7 @@ public function query($sql, $args) {
$c = __CLASS__;
self::$instance = new $c;
}
-
+
if (!empty($args)) {
foreach (array_keys($args) as $key) {
$value = $args[$key];
@@ -72,9 +71,10 @@ public function query($sql, $args) {
$stmt = self::$instance->link->prepare($sql);
$stmt->execute($args);
$result = $stmt;
+
}
else {
- $result = self::$instance->link->query($sql);
+ $result = self::$instance->link->query($sql);
}
if($result) {
@@ -113,7 +113,7 @@ public static function bufferedInsert($table, $fields, $values_array, $num) {
$error_info = $st->errorInfo();
throw new Exception('Insert failed: ' . $error_info[2]);
}
-
+
$insert_count++;
if ($insert_count == $num) {
$commit_bool = self::$instance->link->commit();
View
7 defaults.php
@@ -14,6 +14,7 @@
// Find URI to Wikipedia etc. for tags
$tagger_conf['linked_data'] = FALSE;
+ $tagger_conf['return_full_tag_object'] = FALSE;
$tagger_conf['named_entity']['public_fields'] = array(
'realName' => 'name',
@@ -93,17 +94,17 @@
);
- // A keyword must be related to at least 15 texts for it to be
+ // A keyword must be related to at least 15 texts for it to be
// processed
$tagger_conf['keyword']['minimum_number_of_texts'] = 15;
$tagger_conf['keyword']['property'] = 'diff_outer_doc_freq';
$tagger_conf['keyword']['enable_stemmer'] = FALSE;
$tagger_conf['keyword']['normalize'] = TRUE;
-
+
// For a text be given 100% score it must have the equivalent of
// one full keyword per 100 words
$tagger_conf['keyword']['max_score'] = 1;
-
+
// For a keyword to be listed it must have a score of a least 15%
$tagger_conf['keyword']['threshold'] = 15;
Please sign in to comment.
Something went wrong with that request. Please try again.