From 7025280188e74a974cce6679089ba4f984555ab3 Mon Sep 17 00:00:00 2001 From: Piotr Gabryjeluk Date: Sat, 18 Jul 2009 15:45:37 +0200 Subject: [PATCH] Reverted indexing, deleting and searching to pre-Lucene state (http://www.wikidot.org/bug:2). Added search:* pages to initial dump (http://www.wikidot.org/bug:16). Updated Makefile and made generate_keys.sh executable (http://www.wikidot.org/bug:9). Removed sidebar from account category on main site (http://www.wikidot.org/bug:10). --- Makefile | 5 +- bin/generate_keys.sh | 0 files/dump/db/4-main-site.sql | 4 +- files/dump/sites/www/search.all.page | 3 + files/dump/sites/www/search.site.page | 3 + php/actions/ManageSiteAction.php | 11 +- php/class/Wikidot/Search/Exception.php | 27 -- php/class/Wikidot/Search/Lucene.php | 454 ------------------------- php/modules/search/SearchAllModule.php | 168 ++++----- php/utils/Deleter.php | 8 - php/utils/Indexer.php | 15 +- tests/lucene_bootstrap.php | 30 -- tests/lucene_search.php | 56 --- 13 files changed, 87 insertions(+), 697 deletions(-) mode change 100644 => 100755 bin/generate_keys.sh create mode 100644 files/dump/sites/www/search.all.page create mode 100644 files/dump/sites/www/search.site.page delete mode 100644 php/class/Wikidot/Search/Exception.php delete mode 100644 php/class/Wikidot/Search/Lucene.php delete mode 100644 tests/lucene_bootstrap.php delete mode 100644 tests/lucene_search.php diff --git a/Makefile b/Makefile index a7ff0ab..cdcd511 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -all: db config finish +all: db keys config finish prepare_db: bin/prepare_db.php | psql @@ -9,6 +9,9 @@ db: bin/generate_om.php bin/bootstrap_pages.php files/dump/sites/* +keys: + bin/generate_keys.sh + config: bin/configure.php diff --git a/bin/generate_keys.sh b/bin/generate_keys.sh old mode 100644 new mode 100755 diff --git a/files/dump/db/4-main-site.sql b/files/dump/db/4-main-site.sql index c012782..23467b1 100644 --- a/files/dump/db/4-main-site.sql +++ b/files/dump/db/4-main-site.sql @@ -56,10 +56,10 @@ INSERT INTO category VALUES (5, INSERT INTO category VALUES (6, 1, 'account', - true, 20, + false, 21, false, 'e:;c:;m:;d:;a:;r:;z:;o:', true, 1, NULL, - true, 'nav:top', 'nav:side', + false, 'nav:top', NULL, NULL, false, true, NULL, NULL, NULL, true, false, false, NULL );;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/files/dump/sites/www/search.all.page b/files/dump/sites/www/search.all.page new file mode 100644 index 0000000..f950736 --- /dev/null +++ b/files/dump/sites/www/search.all.page @@ -0,0 +1,3 @@ +Search All Sites + +[[module SearchAll]] diff --git a/files/dump/sites/www/search.site.page b/files/dump/sites/www/search.site.page new file mode 100644 index 0000000..31f55f9 --- /dev/null +++ b/files/dump/sites/www/search.site.page @@ -0,0 +1,3 @@ +Search + +[[module Search]] diff --git a/php/actions/ManageSiteAction.php b/php/actions/ManageSiteAction.php index e7af3e9..175ba61 100644 --- a/php/actions/ManageSiteAction.php +++ b/php/actions/ManageSiteAction.php @@ -818,12 +818,7 @@ public function savePrivateSettingsEvent($runData){ } } - $db->commit(); - - // update the search index - $lucene = new Wikidot_Search_Lucene(); - $lucene->queueReIndexSite($site->getSiteId()); - + $db->commit(); if (GlobalProperties::$UI_SLEEP) { sleep(1); } } @@ -910,10 +905,6 @@ public function deleteSiteEvent($runData){ $site->setCustomDomain(null); } $db->commit(); - - // update the search index - $lucene = new Wikidot_Search_Lucene(); - $lucene->queueReIndexSite($site->getSiteId()); } diff --git a/php/class/Wikidot/Search/Exception.php b/php/class/Wikidot/Search/Exception.php deleted file mode 100644 index beddf41..0000000 --- a/php/class/Wikidot/Search/Exception.php +++ /dev/null @@ -1,27 +0,0 @@ -indexFile = $indexFile; - } else { - $this->indexFile = GlobalProperties::$SEARCH_LUCENE_INDEX; - } - - if ($queueFile) { - $this->queueFile = $queueFile; - } else { - $this->queueFile = GlobalProperties::$SEARCH_LUCENE_QUEUE; - } - - if ($queueLockFile) { - $this->queueLockFile = $queueLockFile; - } else { - $this->queueLockFile = GlobalProperties::$SEARCH_LUCENE_LOCK; - } - } - - protected function loadIndex() { - $this->index = Zend_Search_Lucene::open($this->indexFile); - Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); - } - - public function createIndex() { - $this->index = Zend_Search_Lucene::create($this->indexFile); - Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()); - } - - protected function resetQueue() { - file_put_contents($this->queueFile, ""); - } - - protected function getFtsEntryDetails($fts, $site = null) { - if ($fts) { - if (in_array($fts->getFtsId(), $this->processedFtsEntries)) { - return; - } - - $this->processedFtsEntries[] = $fts->getFtsId(); - - if (! $site) { - $site = DB_SitePeer::instance()->selectByPrimaryKey($fts->getSiteId()); - } - - if (! $site || $site->getDeleted() || ! $site->getVisible()) { - return; - } - - // add content, site_id, site_public, fts_id fields - $doc = "UNSTORED content 1.0 " . str_replace("\n", " ", $fts->getText()); - $doc .= "\nTEXT site_id 0.1 " . $fts->getSiteId(); - $doc .= "\nTEXT site_public 0.1 " . ($site->getPrivate() ? "false" : "true"); - $doc .= "\nTEXT title 7.0 " . $fts->getTitle(); - - if ($fts->getPageId()) { - - $doc .= "\nTEXT item_type 0.1 page"; - $doc .= "\nTEXT page_id 0.1 " . $fts->getPageId(); - - // TAGS - if ($page = DB_PagePeer::instance()->selectByPrimaryKey($fts->getPageId())) { - $tags = $page->getTagsAsArray(); - $doc .= "\nUNSTORED tags " . (4.0 * count($tags)) . " " . implode(" ", $tags); - } - - } elseif ($fts->getThreadId()) { - - $doc .= "\nTEXT item_type 0.1 thread"; - $doc .= "\nTEXT thread_id 0.1 " . $fts->getThreadId(); - - } else { - // NEITHER A PAGE NOR THREAD - return; - } - - return "$doc\n"; - } - } - - protected function queue($type, $id, $details = null) { - while (! $this->tryLockingQueue()) { - sleep(1); - } - - $fp = fopen($this->queueFile, "a"); - - if (! in_array($type, array("INDEX_FTS", "DELETE_PAGE", "DELETE_THREAD", "DELETE_SITE"))) { - $type = "UNKNOWN"; - } - $id = (int) $id; - - if ($type == "INDEX_FTS") { - if ($details) { - fwrite($fp, "$type $id\n"); - fwrite($fp, $details); - fwrite($fp, "\n"); - } - } else { - fwrite($fp, "$type $id\n"); - } - fclose($fp); - - $this->releaseQueueLock(); - } - - public function queueFtsEntry($fts_id, $fts_details = null) { - if (! $fts_details) { - $fts = DB_FtsEntryPeer::instance()->selectByPrimaryKey($fts_id); - $fts_details = $this->getFtsEntryDetails($fts); - } - $this->queue("INDEX_FTS", $fts_id, $fts_details); - } - - public function queueDeletePage($page_id) { - $this->queue("DELETE_PAGE", $page_id); - } - - public function queueDeleteThread($thread_id) { - $this->queue("DELETE_THREAD", $thread_id); - } - - protected function queueSite($site, $verbose = false, $fts_id_from = null, $fts_id_to = null) { - - if (is_numeric($site)) { - $site = DB_SitePeer::instance()->selectByPrimaryKey($site); - } - - if ($site) { - - $atOnce = $this->AT_ONCE; - $offset = 0; - - $c = new Criteria(); - $c->setLimit($atOnce, $offset); - - if ($fts_id_to) { - $c->add("fts_id", $fts_id_from, ">="); - $c->add("fts_id", $fts_id_to, "<"); - } - - if ($site == "ALL") { - $site = null; - } else { - $c->add("site_id", $site->getSiteId()); - } - - $pp = DB_FtsEntryPeer::instance(); - $entries = null; - - do { - unset($entries); // try to save SOME memory - - $entries = $pp->selectByCriteria($c); - - foreach ($entries as $fts) { - $this->queueFtsEntry($fts->getFtsId(), $this->getFtsEntryDetails($fts, $site)); - } - - $offset += $atOnce; - $c->setLimit($atOnce, $offset); - - if ($verbose) { - echo "."; - } - - } while (count($entries)); - } - } - - public function queueReIndexSite($site_id) { - $this->queue("DELETE_SITE", $site_id); - $this->queueSite($site_id); - } - - /* - * queries the index and returns the array of Fts entries - * @param $query Lucene query to search for - * @return array fts_id array - */ - public function rawQuery($query) { - $cache = Ozone::$memcache; - $key = "search.." . md5($query); - - if ($cache && $result = $cache->get($key)) { - return $result; - } - - $result = $this->executeWikidotSearch($query); - - if ($cache) { - $cache->set($key, $result, 0, $this->CACHE_FOR); - } - - return $result; - } - - /** - * high level Wikidot search function - * manages user permisisons, searches only in public sites + those user is a member of - * - * @param $phrase Lucene query to search for - * @param $user user that searches - * @param $itemType p - search only pages, f - only forums - * @param $sites sites to search within - * @param $onlyUserSites whether to search ONLY in user sites - * @return array fts_id array - */ - public function search($phrase, $user = null, $itemType = null, $sites = null, $onlyUserSites = false) { - - // user filter - - if ($onlyUserSites) { - $user_query = ""; - } else { - $user_query = "site_public:true"; - } - - if ($user) { - $c = new Criteria(); - $c->add("user_id", $user->getUserId()); - $c->setLimit(100, 0); - - $memberships = DB_MemberPeer::instance()->selectByCriteria($c); - if (count($memberships) < 100) { - foreach ($memberships as $m) { - $user_query .= " site_id:" . $m->getSiteId() . "^2"; - } - } - } - - if ($user_query == "") { - $user_query = "site_public:true"; - } - - // sites filter - - $sites_query = ""; - if (is_array($sites) && count($sites)) { - foreach ($sites as $site) { - if (! is_numeric($site)) { // not an ID - if (is_string($site)) { // maybe unix_name? - $c = new Criteria(); - $c->add("unix_name", $site); - $site = DB_SitePeer::instance()->selectOne($c); // make it an object - } - } - if (is_a($site, "DB_Site")) { // object? - $site = $site->getSiteId(); // get an id - } - if ($site !== null && is_numeric($site)) { // we have site id finally - $sites_query .= " site_id:$site"; - } - } - } - - // construct content_query - $phrase = trim($phrase); - if ($phrase == "") { - return array(); - } - if (! preg_match("/tags:/", $phrase) && ! preg_match("/title:/", $phrase) && ! preg_match("/content:/", $phrase)) { - - // give the exact match in title higher boost - if (! strstr($phrase, '"') && ! strstr($phrase, '^')) { - $title_phrase = "\"$phrase\"^5 $phrase"; - } else { - $title_phrase = $phrase; - } - - $content_query = "tags:($phrase) title:($title_phrase) content:($phrase)"; - } else { - $content_query = $phrase; - } - - $query = ""; - if ($itemType == "p") { - $query .= "+item_type:page "; - } - if ($itemType == "f") { - $query .= "+item_type:thread "; - } - if ($sites_query) { - $query .= "+($sites_query) "; - } - $query .= "+($user_query) +($content_query)"; - - return $this->rawQuery($query); - } - - public function indexAllSitesVerbose($fts_id_from = null, $fts_id_to = null) { - $this->loadIndex(); - $this->queueSite("ALL", true, $fts_id_from, $fts_id_to); - echo "\n"; - } - - protected function executeWikidotSearch($query) { - $results = array(); - if (GlobalProperties::$SEARCH_USE_JAVA) { - $cmd = "java -jar " . escapeshellcmd(WIKIDOT_ROOT . "/bin/wikidotIndexer.jar"); - $cmd .= " search " . escapeshellarg($this->indexFile); - $cmd .= " " . escapeshellarg($query); - $cmd .= " 2>&1"; - - exec($cmd, $results); - if (count($results)) { - // something other than int in the first line means we had an exception in java program - if (! is_numeric($results[0])) { - throw new Wikidot_Search_Exception(join("\n", $results)); - } - } - } else { - $this->loadIndex(); - foreach ($this->index->find($query) as $hit) { - $results[] = $hit->fts_id; - } - } - - return $results; - } - - protected function tryLockingQueue() { - $this->lock = fopen($this->queueLockFile, 'w'); - return flock($this->lock, LOCK_EX); - } - - protected function releaseQueueLock() { - fclose($this->lock); - } - - public function processQueue() { - if (GlobalProperties::$SEARCH_USE_JAVA) { - $cmd = "java -jar " . escapeshellcmd(WIKIDOT_ROOT . "/bin/wikidotIndexer.jar"); - $cmd .= " process " . escapeshellarg($this->indexFile); - $cmd .= " " . escapeshellarg($this->queueFile); - $cmd .= " " . escapeshellarg($this->queueLockFile); - $cmd .= " 2>&1"; - exec($cmd, $results); - if (count($results)) { - // something other than int in the first line means we had an exception in java program - if (! is_numeric($results[0])) { - throw new Wikidot_Search_Exception(join("\n", $results)); - } - } - } else { - - if (! $this->tryLockingQueue()) { - return; - } - - $this->loadIndex(); - - $cmds = file($this->queueFile); - $this->resetQueue(); - - $this->releaseQueueLock(); - - while (count($cmds)) { - - $cmd = array_shift($cmds); - $a = explode(" ", $cmd); - if ($a[0] == "DELETE_PAGE") { - $this->zlDeleteItems("page_id:" . $a[1]); - } elseif ($a[0] == "DELETE_THREAD") { - $this->zlDeleteItems("thread_id:" . $a[1]); - } elseif ($a[0] == "DELETE_SITE") { - $this->zlDeleteItems("site_id:" . $a[1]); - } elseif ($a[0] == "INDEX_FTS") { - - // delete it first - $this->zlDeleteItems("fts_id:" . $a[1]); - - // construct the document - $doc = new Zend_Search_Lucene_Document(); - - while (true) { - $line = array_shift($cmds); - if (trim($line) == "") { - break; - } - - $args = explode(" ", $line); - $type = array_shift($args); - $key = array_shift($args); - $boost = array_shift($args); - $val = implode(" ", $args); - - if ($type == "TEXT") { - $field = Zend_Search_Lucene_Field::text($key, $val); - } else { - $field = Zend_Search_Lucene_Field::unStored($key, $val); - } - - $field->boost = $boost; - $doc->addField($field); - } - $this->index->addDocument($doc); - } - } - $this->index->commit(); - } - } - - public function getCount() { - $this->loadIndex(); - return $this->index->count(); - } - - protected function zlDeleteItems($query) { - foreach ($this->index->find($query) as $hit) { - $this->index->delete($hit->id); - } - - $this->index->commit(); - } -} diff --git a/php/modules/search/SearchAllModule.php b/php/modules/search/SearchAllModule.php index 0e95f46..640ab87 100644 --- a/php/modules/search/SearchAllModule.php +++ b/php/modules/search/SearchAllModule.php @@ -24,61 +24,17 @@ */ class SearchAllModule extends SmartyModule { - - protected function normalizeWhiteSpace($query) { - return trim(preg_replace('/\s+/', ' ', $query)); - } - - protected function parseQuery($query) { - // add some space - $q = " $query "; - - // check for site:X,Y,Z strings - $sites = null; - $m = array(); - if (preg_match("/ site:([a-z0-9,-]+) /i", $q, $m)) { - $sites = explode(",", $m[1]); - $q = preg_replace("/ site:([a-z0-9,-]+) /i", "", $q); - } - - // we want "pure" query version now - // escaping \, !, (, ), :, ^, [, ], {, }, ~, *, ? - $q = preg_replace('/[&\|\?~,)("^!{}[]/', " ", $q); - $q = str_replace(']', " ", $q); - $q = preg_replace('/([a-z][a-z][a-z])\*/', '\1~', $q); - $q = str_replace('*', ' ', $q); - $q = str_replace("~", '*', $q); - $q = str_replace("tags:", "tags~", $q); - $q = str_replace("tag:", "tags~", $q); - $q = str_replace("title:", "title~", $q); - $q = str_replace("content:", "content~", $q); - $q = str_replace(":", " ", $q); - $q = str_replace("~", ":", $q); - - $q = $this->normalizeWhiteSpace($q); - - return array("sites" => $sites, "query" => $q); - } - - protected function simplifyForTs($query) { - $q = " $query "; - $q = preg_replace("/ site:[a-z0-9,-]+/i", " ", $q); - $q = $this->normalizeWhiteSpace($q); - $q = preg_replace("/[&\|:\?^~]/", ' ', $q); - $q = preg_replace("/((^)|([\s]+))\-/", '&!', $q); - $q = str_replace("-", " ", $q); - $q = trim($q); - $q = preg_replace('/ +/', '&', $q); - return $q; - } public function build($runData){ - - // parse parameters + $pl = $runData->getParameterList(); $query = trim($pl->getParameterValue("q")); $area = $pl->getParameterValue("a"); + if($area != 'p' && $area != 'f' && $area != 'pf'){ + $area = null; + } + if($query == ''){ return; } @@ -88,90 +44,108 @@ public function build($runData){ return; } + $site = $runData->getTemp("site"); + // pagination + $pageNumber = $pl->getParameterValue("p"); if($pageNumber == null || !is_numeric($pageNumber) || $pageNumber <1){ $pageNumber = 1; } $perPage = 10; - $limit = $perPage; - $offset = ($pageNumber - 1) * $perPage; - // parse query - $query_array = $this->parseQuery($query); - $ts_query = "'" . db_escape_string($this->simplifyForTs($query_array['query'])) . "'"; + $limit = $perPage*2+1; + $offset = ($pageNumber - 1)*$perPage; - // find - $lucene = new Wikidot_Search_Lucene(); - $lucene_hits = $lucene->search($query_array['query'], $runData->getUser(), $area, $query_array['sites']); - $result_count = count($lucene_hits); + $qe = $query; + $qe = preg_replace("/[!:\?]/",' ', $qe); + $qe = preg_replace("/[&\|!]+/", ' ', $qe); + $qe = preg_replace("/((^)|([\s]+))\-/", '&!', $qe); + $qe = str_replace("-", " ", $qe); + $qe = trim($qe); + $qe = preg_replace('/ +/', '&', $qe); + // prepare fts query - // limit - $lucene_hits = array_slice($lucene_hits, $offset, $limit); + // escaped query + $eq = "'".db_escape_string($qe)."'"; - // hedline options + // search pages $headlineOptions = "'MaxWords=200, MinWords=100'"; - // fetch items from database with highlight $db = Database::connection(); + $v = pg_version($db->getLink()); - if (!preg_match(';^8\.3;', $v['server'])) { + if(!preg_match(';^8\.3;', $v['server'])){ $db->query("SELECT set_curcfg('default')"); } else { $tsprefix = 'ts_'; // because in postgresql 8.3 functions are ts_rank and ts_header } - $res = array(); + $q = "SELECT *, fts_entry.unix_name AS fts_unix_name, {$tsprefix}headline(text, q, 'MaxWords=50, MinWords=30') AS headline_text, {$tsprefix}headline(title, q, $headlineOptions) AS headline_title FROM fts_entry, site, to_tsquery($eq) AS q " . + "WHERE site.visible=TRUE AND site.private = FALSE AND site.deleted = FALSE"; + - foreach ($lucene_hits as $fts_id) { - - $q = "SELECT *, - fts_entry.unix_name AS fts_unix_name, - {$tsprefix}headline(text, q, 'MaxWords=50, MinWords=30') AS headline_text, - {$tsprefix}headline(title, q, $headlineOptions) AS headline_title - FROM fts_entry, site, to_tsquery($ts_query) AS q - WHERE fts_id = $fts_id AND fts_entry.site_id = site.site_id"; - - file_put_contents("/tmp/debug-query", "$q\n"); + if($area){ - $r = $db->query($q); - $res_one = $r->fetchAll(); - - if ($res_one && count($res_one)) { - $res[] = $res_one[0]; + switch($area){ + case 'f': + $q .= " AND thread_id IS NOT NULL "; + break; + case 'p': + $q .= " AND page_id IS NOT NULL "; + break; } + } + $q .= " AND " . + "vector @@ q " . + "AND fts_entry.site_id=site.site_id " . + "ORDER BY {$tsprefix}rank(vector, q) DESC LIMIT $limit OFFSET $offset"; - // pager data - $total_pages = ceil($result_count / $perPage); - $pagerData = array(); - $pagerData['current_page'] = $pageNumber; - $pagerData['known_pages'] = min(array($pageNumber + 2, $total_pages)); - $pagerData['total_pages'] = $total_pages; - - // construct URLs - for ($i = 0; $i < count($res); $i++) { - $o = $res[$i]; - $res[$i]['site'] = new DB_Site($res[$i]); - if($o['page_id'] !== null){ - $res[$i]['url'] = 'http://'.$res[$i]['site']->getDomain().'/'.$o['fts_unix_name']; + $r = $db->query($q); + $res = $r->fetchAll(); + + if($res){ + // fix urls + $counted = count($res); + + $pagerData = array(); + $pagerData['current_page'] = $pageNumber; + if($counted >$perPage*2){ + $knownPages=$pageNumber + 2; + $pagerData['known_pages'] = $knownPages; + } elseif($counted>$perPage){ + $knownPages=$pageNumber + 1; + $pagerData['total_pages'] = $knownPages; }else{ - $res[$i]['url'] = 'http://'.$res[$i]['site']->getDomain().'/forum/t-'.$o['thread_id'].'/'.$o['unix_name']; + $totalPages = $pageNumber; + $pagerData['total_pages'] = $totalPages; + } + + $res = array_slice($res, 0, $perPage); + for($i=0; $igetDomain().'/'.$o['fts_unix_name']; + }else{ + $res[$i]['url'] = 'http://'.$res[$i]['site']->getDomain().'/forum/t-'.$o['thread_id'].'/'.$o['unix_name']; + } } + } - // feed the template $runData->contextAdd("pagerData", $pagerData); + $runData->contextAdd("results", $res); $runData->contextAdd("countResults", count($res)); - $runData->contextAdd("totalResults", $result_count); $runData->contextAdd("query", $query); $runData->contextAdd("encodedQuery", urldecode($query)); $runData->contextAdd("queryEncoded", urlencode($query)); $runData->contextAdd("area", $area); - //$runData->contextAdd("query_debug", $lucene_query); - $runData->contextAdd("domain", $runData->getTemp("site")->getDomain()); + $runData->contextAdd("query_debug", $qe); + $runData->contextAdd("domain", $site->getDomain()); } diff --git a/php/utils/Deleter.php b/php/utils/Deleter.php index 8732ad5..9626c98 100644 --- a/php/utils/Deleter.php +++ b/php/utils/Deleter.php @@ -102,10 +102,6 @@ public function deletePage($page, $site = null) { $outdater = new Outdater(); $outdater->pageEvent('delete', $page->getUnixName()); - // and from search index - $lucene = new Wikidot_Search(); - $lucene->queueDeletePage($page->getPageId()); - // outdate descs too foreach ($descs as $desc) { $outdater->outdatePageCache($desc); @@ -149,10 +145,6 @@ public function deleteSite($site) { // delete forum? no. will be autodeleted based on the database constrains. - // need to delete post revisions - $db = Database::connection(); - $q = "DELETE FROM forum_post_revision WHERE forum_post_id IN (SELECT post_id FROM forum_post WHERE site_id= {$site->getSiteId()}"; - $db->query($q); //delete the site itself $outdater = new Outdater(); diff --git a/php/utils/Indexer.php b/php/utils/Indexer.php index 9a99f52..a08ea25 100644 --- a/php/utils/Indexer.php +++ b/php/utils/Indexer.php @@ -74,19 +74,13 @@ public function indexPage($page){ if(!preg_match(';^8\.3;', $v['server'])){ $db->query("SELECT set_curcfg('default')"); } - $ie->setVector("(setweight( to_tsvector('$title'), 'A') || to_tsvector('".db_escape_string($text)."') || setweight( to_tsvector('$tagstring'), 'B'))", true); + $ie->setVector("(setweight( to_tsvector('$title'), 'C')||setweight( to_tsvector('$unixName'), 'C') || to_tsvector('".db_escape_string($text)."')||setweight( to_tsvector('$tagstring'), 'C'))", true); $ie->save(); - - $lucene = new Wikidot_Search_Lucene(); - $lucene->queueFtsEntry($ie->getFtsId()); } public function deindexPage($page){ $ie = DB_FtsEntryPeer::instance()->selectByPageId($page->getPageId()); - DB_FtsEntryPeer::instance()->deleteByPrimaryKey($ie->getFtsId()); - - $lucene = new Wikidot_Search_Lucene(); - $lucene->queueDeletePage($page->getPageId()); + DB_FtsEntryPeer::instance()->deleteByPrimaryKey($ie->getFtsId()); } public function indexThread($thread){ @@ -124,14 +118,11 @@ public function indexThread($thread){ $ie->setVector("setweight( to_tsvector('$title'), 'C') || setweight( to_tsvector('$description'), 'C') || to_tsvector('".db_escape_string($text)."')", true); $ie->save(); - - $lucene = new Wikidot_Search_Lucene(); - $lucene->queueFtsEntry($ie->getFtsId()); } public function deindexThread($thread){ $ie = DB_FtsEntryPeer::instance()->selectByThreadId($thread->getThreadId()); - DB_FtsEntryPeer::instance()->deleteByPrimaryKey($ie->getFtsId()); + DB_FtsEntryPeer::instance()->deleteByPrimaryKey($ie->getFtsId()); } } diff --git a/tests/lucene_bootstrap.php b/tests/lucene_bootstrap.php deleted file mode 100644 index a18ec44..0000000 --- a/tests/lucene_bootstrap.php +++ /dev/null @@ -1,30 +0,0 @@ -createIndex(); -$lucene->indexAllSitesVerbose(); diff --git a/tests/lucene_search.php b/tests/lucene_search.php deleted file mode 100644 index baa5457..0000000 --- a/tests/lucene_search.php +++ /dev/null @@ -1,56 +0,0 @@ -\n"; - echo " php lucene_search.php php -- force using the PHP Lucene implementation\n"; - echo " php lucene_search.php java -- force using the Java Lucene implementation\n"; - exit(); -} - -if (isset($argv[2]) && $argv[2] == 'java') { - GlobalProperties::$SEARCH_USE_JAVA = true; -} elseif (isset($argv[2]) && $argv[2] == 'php') { - GlobalProperties::$SEARCH_USE_JAVA = false; -} - -$lucene = new Wikidot_Search_Lucene(); -$hits = $lucene->rawQuery($argv[1]); - -$i = 0; -echo "indexed: " . $lucene->getCount() . "\n"; -echo "hits: " . count($hits) . "\n"; - -foreach ($hits as $hit) { - if (++$i == 10) { - return; - } - echo "\n"; - echo $hit; -} -echo "\n";