Skip to content

Commit

Permalink
#146 improve search script
Browse files Browse the repository at this point in the history
  • Loading branch information
Sonatai committed Jan 25, 2024
1 parent 695382d commit 4d1b2e9
Showing 1 changed file with 85 additions and 127 deletions.
212 changes: 85 additions & 127 deletions src/components/Search/getSearchScoreV2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ export const getSearchScoreV2TS = (
searchInput: string,
specifications: IMinimizedSummaryEntry[]
) => {
console.log('---------------------- TS ------------------------------');
const scores: IScore[] = [];

const searchTerm = normalizeString(searchInput);
Expand All @@ -26,14 +25,10 @@ export const getSearchScoreV2TS = (
let score = 0.0;
const featureScores: IFeatureScore[] = [];

// console.log('0 SCORE: ', score);
const classNameScore = searchScore(
score += searchScore(
searchTerm,
normalizedClassName.replace('.', ' ').replace('_', ' ')
);
// console.log('CLASS NAME SCORE: ', classNameScore);
score += classNameScore;

score += searchScore(searchTerm, normalizedTitle);
score += searchScore(searchTerm, normalizedNarrative);

Expand All @@ -56,192 +51,155 @@ export const getSearchScoreV2TS = (
score,
featureScores: sortedFeatureScores,
});

// console.log('FEATURE SCORES: ', featureScores);
});

const sortedScores = scores.sort(sortScore);
const chosen = getTop25(sortedScores);

const chosen = sortedScores.filter((score) => score.score > 0);

// console.log('SCORES: ', scores);
console.log('SORTED SCORE: ', sortedScores);

// console.log('CHOSEN: ', chosen);
console.log('-------------------------------------------------------');
return chosen;
};

const getTop25 = (sortedScores: IScore[]) =>
sortedScores.filter((score) => score.score > 0).slice(0, 25);

const sortScore = (
scoreA: IScore | IFeatureScore,
scoreB: IScore | IFeatureScore
) => scoreB.score - scoreA.score;

const normalizeString = (term: string) => term.trim().toLowerCase();

const featureSearchScore = (searchTerm: string, sentence: string) => {
let score = 0;

// console.log('SEARCH TERM', searchTerm);
// console.log('SENTENCE', sentence);
// console.log('00 SEARCH SCORE', score);

if (searchTerm.length === 0) {
// console.log('01 SEARCH SCORE 0 - NOTHING HERE');
return 0.0;
}

if (sentence.length === 0) {
// console.log('02 SEARCH SCORE 0 - NOTHING HERE');
return 0.0;
}

let words = removeFillWords(sentence.split(' '));

// console.log('WORDS', words);

let searchWords = searchTerm.split(' ');
searchWords = searchWords.filter((word) => word.length > 0);
words = words.filter((word) => word.length > 0);
const normalizeString = (term: string) => {
return removeFillWords(term.toLowerCase().trim());
};

const getSearchWordsScore = (searchWords: string[], searchTerm: string) => {
let searchWordsScore = 0;
if (searchWords.length > 1) {
searchWords.forEach((word) => {
if (word.length === 0) {
// console.log('040 SEARCH SCORE 0 - NOTHING HERE');
searchWordsScore += 0;
}

if (word.includes(searchTerm)) {
searchWordsScore += 0.25;
// console.log('041 SEARCH SCORE: ', score);
}
});
}
if (searchWordsScore > 0) {
score += searchWordsScore / searchWords.length;
}

return searchWordsScore;
};

const getWordsScore = (words: string[], searchTerm: string) => {
let wordScore = 0;
words.forEach((word) => {
if (word.includes(searchTerm)) {
wordScore += 1;
}
});
// console.log('05 SEARCH SCORE: ', wordScore);

return wordScore;
};

const getWords = (sentence: string) =>
sentence.split(' ').filter((word) => word.length > 0);

const featureSearchScore = (searchTerm: string, sentence: string) => {
let score = 0;

if (searchTerm.length === 0) {
return 0.0;
}

if (sentence.length === 0) {
return 0.0;
}

const words = getWords(sentence);
const searchWords = getWords(searchTerm);

const searchWordsScore = getSearchWordsScore(searchWords, searchTerm);
if (searchWordsScore > 0) {
score += searchWordsScore / searchWords.length;
}

const wordScore = getWordsScore(words, searchTerm);
if (wordScore === 0) {
// console.log('06 SEARCH SCORE: ', score);
return score;
}
score += wordScore / words.length;
// console.log('07 SEARCH SCORE: ', score);

return score;
};

const searchScore = (searchTerm: string, sentence: string) => {
let score = 0;

// console.log('SEARCH TERM', searchTerm);
// console.log('SENTENCE', sentence);
// console.log('00 SEARCH SCORE', score);

if (searchTerm.length === 0) {
// console.log('01 SEARCH SCORE 0 - NOTHING HERE');
return 0.0;
}

if (sentence.length === 0) {
// console.log('02 SEARCH SCORE 0 - NOTHING HERE');
return 0.0;
}

if (sentence.includes(searchTerm)) {
score += 1.0;
// console.log('03 SEARCH SCORE: ', score);
}

let words = removeFillWords(sentence.split(' '));

// console.log('WORDS', words);

let searchWords = searchTerm.split(' ');
searchWords = searchWords.filter((word) => word.length > 0);
words = words.filter((word) => word.length > 0);

let searchWordsScore = 0;
if (searchWords.length > 1) {
searchWords.forEach((word) => {
if (word.length === 0) {
// console.log('040 SEARCH SCORE 0 - NOTHING HERE');
return 0;
}
const words = getWords(sentence);
const searchWords = getWords(searchTerm);

if (word.includes(searchTerm)) {
searchWordsScore += 0.25;
// console.log('041 SEARCH SCORE: ', score);
}
});
}
const searchWordsScore = getSearchWordsScore(searchWords, searchTerm);
if (searchWordsScore > 0) {
score += searchWordsScore / searchWords.length;
}

let wordScore = 0;
words.forEach((word) => {
if (word.includes(searchTerm)) {
wordScore += 0.5;
}
});
// console.log('05 SEARCH SCORE: ', wordScore);

const wordScore = getWordsScore(words, searchTerm);
if (wordScore === 0) {
// console.log('06 SEARCH SCORE: ', score);
return score;
}
score += wordScore / words.length;
// console.log('07 SEARCH SCORE: ', score);

return score;
};

const removeFillWords = (words: string[]) => {
// We define an array of things to remove ("noisy words"):
const fillWords = [
'the',
'a',
'of',
'and',
'or',
'in',
'on',
'at',
'to',
'with',
'by',
'as',
'from',
'is',
'of',
'and',
'in',
'on',
'at',
'with',
'by',
'for',
'an',
];
// We create a new array:
const newWords = [];
// We then iterate over the words:
for (let i = 0; i < words.length; i++) {
// We then check if the word is a fill word:
if (!fillWords.includes(words[i])) {
// If it is not, we add it to the new array:
newWords.push(words[i]);
}
}
// We return the new array:
return newWords;
};
const removeFillWords = (sentence: string) =>
sentence
.replace(regex, ' ')
.replace(/\s\s/gi, ' ')
.split(' ')
.filter(
(word) =>
!fillWords.includes(word) && word?.length > 0 && word !== ' '
)
.join(' ')
.trim();

const fillWords = [
'the',
'a',
'of',
'and',
'or',
'in',
'on',
'at',
'to',
'with',
'by',
'as',
'from',
'is',
'of',
'and',
'in',
'on',
'at',
'with',
'by',
'for',
'an',
];

const replaceCharSet = ['.', '`', ',', '"', '?', '_'];

const regex = new RegExp(`[${replaceCharSet.join('')}]`, 'gi');

0 comments on commit 4d1b2e9

Please sign in to comment.