Skip to content

Commit

Permalink
Separated standard search mode and Japanese search mode
Browse files Browse the repository at this point in the history
  • Loading branch information
yama committed Mar 1, 2018
1 parent 97f1e80 commit 484e7cf
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 107 deletions.
8 changes: 7 additions & 1 deletion nucleus/libs/BLOG.php
Expand Up @@ -527,7 +527,13 @@ function search($query, $template, $amountMonths, $maxresults, $startpos) {
*/
function getSqlSearch($query, $amountMonths = 0, &$highlight, $mode = '')
{
$searchclass = new SEARCH($query);
global $DIR_LIBS;

if (stripos(getLanguageName(),'japanese')!==false) {
include_once("{$DIR_LIBS}SEARCH_JA.php");
$searchclass = new SEARCH_JA($query);
}
else $searchclass = new SEARCH($query);

$highlight = $searchclass->inclusive;

Expand Down
112 changes: 6 additions & 106 deletions nucleus/libs/SEARCH.php
Expand Up @@ -27,40 +27,17 @@ class SEARCH {
public $marked;
public $inclusive;
public $blogs;
private $is_jp_mode;
private $jp_marked

public function SEARCH($text) { $this->__construct($text); }
function __construct($text) {
global $blogid;

$this->is_jp_mode = (preg_match('/japanese/i', getLanguageName()) && function_exists('mb_convert_encoding'));
$text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/",'',$text);

if ($this->is_jp_mode) {
/* * * for jp * * * * * * * * * * */
$this->encoding = strtolower(preg_replace('|[^a-z0-9-_]|i', '', _CHARSET));
if ($this->encoding != 'utf-8') {
$text = mb_convert_encoding($text, "UTF-8", $this->encoding);
}
$text = str_replace ("\xE3\x80\x80",' ',$text);
$text = preg_replace ("/[<>=?!#^()[\]:;\\%]/",'',$text);

$this->ascii = '[\x00-\x7F]';
$this->two = '[\xC0-\xDF][\x80-\xBF]';
$this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';

$this->jp_marked = $this->boolean_mark_atoms_jp($text);
/* * * * * * * * * * * * * * * * */
} else {
$text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/",'',$text);
}

$this->querystring = $text;
if (!$this->is_jp_mode) {
$this->marked = $this->boolean_mark_atoms($text);
}
$this->inclusive = $this->boolean_inclusive_atoms($text);
$this->blogs = array();
$this->querystring = $text;
$this->marked = $this->boolean_mark_atoms($text);
$this->inclusive = $this->boolean_inclusive_atoms($text);
$this->blogs = array();

// get all public searchable blogs, no matter what, include the current blog allways.
$res = sql_query( parseQuery('SELECT bnumber FROM <%prefix%>blog WHERE bincludesearch=1') );
Expand Down Expand Up @@ -130,28 +107,17 @@ function boolean_inclusive_atoms($string) {
$result = str_replace('+', '', $result);

/* strip exlusive atoms */
if ($this->is_jp_mode)
$pattern = "#\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\)#";
else
$pattern = "#\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\)#";
$pattern = "#\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\)#";
$result = preg_replace($pattern, '', $result);

$result = str_replace('(', ' ', $result);
$result = str_replace(')', ' ', $result);
$result = str_replace(',', ' ', $result);
if ($this->is_jp_mode) {
if ($this->encoding != 'utf-8') {
$result = mb_convert_encoding($result, $this->encoding, "UTF-8");
}
}

return $result;
}

function boolean_sql_where($match) {
if ($this->is_jp_mode)
return $this->boolean_sql_where_jp($match);

$result = $this->marked;

$this->boolean_sql_where_cb1($match); // set the static $match
Expand All @@ -170,14 +136,6 @@ function boolean_sql_where($match) {

return $result;
}
protected function boolean_sql_where_jp($match) {
$result = $this->jp_marked; /* for jp */
$result = $this->boolean_sql_where_jp_short($result, $match); /* for jp */
if ($this->encoding != 'utf-8') {
$result = mb_convert_encoding($result, $this->encoding, 'UTF-8');
}
return $result;
}

function boolean_sql_where_cb1($matches) {
static $match;
Expand Down Expand Up @@ -263,62 +221,4 @@ protected function boolean_sql_select_short($string, $match) {

return $score;
}

/***********************************************
Make "WHERE" (jp)
***********************************************/

protected function boolean_mark_atoms_jp($string) {
$result = trim($string);
$result = preg_replace("/([[:space:]]{2,})/", ' ', $result);

/* convert normal boolean operators to shortened syntax */
$result = str_ireplace(' not ', ' -', $result);
$result = str_ireplace(' and ', ' ', $result);
$result = str_ireplace(' or ', ',', $result);

/* strip excessive whitespace */
$result = str_replace(', ', ',', $result);
$result = str_replace(' ,', ',', $result);
$result = str_replace('- ', '-', $result);
$result = str_replace('+', '', $result);
$result = str_replace(',', ' ,', $result);

return $result;
}

protected function boolean_sql_where_jp_short($string, $match) {
$match_a = explode(',', $match);
$key_a = explode(' ', $string);

$total = count($match_a);
for ($ith=0; $ith<$total; $ith++) {
$binKey = preg_match('/[a-zA-Z]/', $key_a[0]) ? '' : 'BINARY';
$temp_a[$ith] = "(i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string($key_a[0]) . "%') ";
}
$like = '('.join(' or ',$temp_a).')';

for ($kn = 1; $kn < count($key_a); $kn++) {
$binKey = preg_match('/[a-zA-Z]/', $key_a[$kn]) ? '' : 'BINARY';
if (substr($key_a[$kn], 0, 1) == ",") {
for($ith = 0; $ith < count($match_a); $ith++) {
$temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string(substr($key_a[$kn], 1)) . "%') ";
}
$like .=' OR ('. join(' or ', $temp_a).')';
}elseif(substr($key_a[$kn],0,1) != '-'){
for($ith=0;$ith<count($match_a);$ith++){
$temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string($key_a[$kn]) . "%') ";
}
$like .=' AND ('. join(' or ', $temp_a).')';
}else{
for($ith=0;$ith<count($match_a);$ith++){
$temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string(substr($key_a[$kn], 1)) . "%') ";
}
$like .=' AND ('. join(' and ', $temp_a).')';
}
}

$like = '('.$like.')';
return $like;
}
}
131 changes: 131 additions & 0 deletions nucleus/libs/SEARCH_JA.php
@@ -0,0 +1,131 @@
<?php

class SEARCH_JA extends SEARCH {
public function SEARCH($text) { $this->__construct($text); }
public function SEARCH_JA($text) { $this->__construct($text); }
function __construct($text) {
global $blogid;

$this->encoding = strtolower(preg_replace('|[^a-z0-9-_]|i', '', _CHARSET));
if ($this->encoding != 'utf-8') {
$text = mb_convert_encoding($text, "UTF-8", $this->encoding);
}
$text = str_replace ("\xE3\x80\x80",' ',$text);
$text = preg_replace ("/[<>=?!#^()[\]:;\\%]/",'',$text);

$this->ascii = '[\x00-\x7F]';
$this->two = '[\xC0-\xDF][\x80-\xBF]';
$this->three = '[\xE0-\xEF][\x80-\xBF][\x80-\xBF]';

$this->marked = $this->boolean_mark_atoms($text);
/* * * * * * * * * * * * * * * * */

$this->querystring = $text;
$this->inclusive = $this->boolean_inclusive_atoms($text);
$this->blogs = array();

// get all public searchable blogs, no matter what, include the current blog allways.
$res = sql_query( parseQuery('SELECT bnumber FROM <%prefix%>blog WHERE bincludesearch=1') );
while ($obj = sql_fetch_object($res)) {
$this->blogs[] = intval($obj->bnumber);
}
}

function boolean_inclusive_atoms($string) {
$result = trim($string);
$result = preg_replace("#([[:space:]]{2,})#", ' ', $result);

# just added delimiters to regex and the 'i' for case-insensitive matching

/* convert normal boolean operators to shortened syntax */
$result = str_ireplace(' not ', ' -', $result);
$result = str_ireplace(' and ', ' ', $result);
$result = str_ireplace(' or ', ',', $result);

/* drop unnecessary spaces */
$result = str_replace(' ,', ',', $result);
$result = str_replace(', ', ',', $result);
$result = str_replace('- ', '-', $result);
$result = str_replace('+', '', $result);

/* strip exlusive atoms */
$pattern = "#\-\(([A-Za-z0-9]|$this->two|$this->three){1,}([A-Za-z0-9\-\.\_\,]|$this->two|$this->three){0,}\)#";
$result = preg_replace($pattern, '', $result);

$result = str_replace('(', ' ', $result);
$result = str_replace(')', ' ', $result);
$result = str_replace(',', ' ', $result);
if ($this->encoding != 'utf-8') {
$result = mb_convert_encoding($result, $this->encoding, "UTF-8");
}

return $result;
}

function boolean_sql_where($match) {
$result = $this->marked;
$result = $this->boolean_sql_where_short($result, $match);
if ($this->encoding != 'utf-8') {
$result = mb_convert_encoding($result, $this->encoding, 'UTF-8');
}
return $result;
}

/***********************************************
Make "WHERE"
***********************************************/

function boolean_mark_atoms($string) {
$result = trim($string);
$result = preg_replace("/([[:space:]]{2,})/", ' ', $result);

/* convert normal boolean operators to shortened syntax */
$result = str_ireplace(' not ', ' -', $result);
$result = str_ireplace(' and ', ' ', $result);
$result = str_ireplace(' or ', ',', $result);

/* strip excessive whitespace */
$result = str_replace(', ', ',', $result);
$result = str_replace(' ,', ',', $result);
$result = str_replace('- ', '-', $result);
$result = str_replace('+', '', $result);
$result = str_replace(',', ' ,', $result);

return $result;
}

function boolean_sql_where_short($string, $match) {
$match_a = explode(',', $match);
$key_a = explode(' ', $string);

$total = count($match_a);
for ($ith=0; $ith<$total; $ith++) {
$binKey = preg_match('/[a-zA-Z]/', $key_a[0]) ? '' : 'BINARY';
$temp_a[$ith] = "(i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string($key_a[0]) . "%') ";
}
$like = '('.join(' or ',$temp_a).')';

for ($kn = 1; $kn < count($key_a); $kn++) {
$binKey = preg_match('/[a-zA-Z]/', $key_a[$kn]) ? '' : 'BINARY';
if (substr($key_a[$kn], 0, 1) == ",") {
for($ith = 0; $ith < count($match_a); $ith++) {
$temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string(substr($key_a[$kn], 1)) . "%') ";
}
$like .=' OR ('. join(' or ', $temp_a).')';
}elseif(substr($key_a[$kn],0,1) != '-'){
for($ith=0;$ith<count($match_a);$ith++){
$temp_a[$ith] = " (i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string($key_a[$kn]) . "%') ";
}
$like .=' AND ('. join(' or ', $temp_a).')';
}else{
for($ith=0;$ith<count($match_a);$ith++){
$temp_a[$ith] = " NOT(i.$match_a[$ith] LIKE " . $binKey . " '%" . sql_real_escape_string(substr($key_a[$kn], 1)) . "%') ";
}
$like .=' AND ('. join(' and ', $temp_a).')';
}
}

$like = '('.$like.')';
return $like;
}
}

0 comments on commit 484e7cf

Please sign in to comment.