diff --git a/Specialized Areas/Data Quality/Similarity Calculator/README.md b/Specialized Areas/Data Quality/Similarity Calculator/README.md new file mode 100644 index 0000000000..0ba195a349 --- /dev/null +++ b/Specialized Areas/Data Quality/Similarity Calculator/README.md @@ -0,0 +1,32 @@ +# Similarity Calculator for ServiceNow Incidents + +## Overview +This utility provides manual similarity scoring between ServiceNow incident records using text analysis, without requiring machine learning. It helps developers and admins find similar incidents by comparing descriptions and calculating similarity scores programmatically. + +## How It Works +1. Extracts keywords from incident descriptions +2. Compares keyword overlap between incidents +3. Calculates a similarity score (0-100%) +4. Finds and ranks similar incidents based on score + +## Features +- Compare incident descriptions using keyword matching +- Calculate similarity scores between incidents +- Find and rank similar incidents programmatically +- No ML or Predictive Intelligence required + +## Use Cases +- Manual clustering of incidents +- Identifying duplicate or related tickets +- Data quality analysis before ML model training +- Root cause analysis and incident triage + +## Setup Requirements +- ServiceNow instance with access to the `incident` table +- Script execution permissions (Background Script or Script Include) +- No external dependencies + +## Customization +- Adjust keyword extraction logic for your environment +- Change scoring algorithm to use TF-IDF, cosine similarity, etc. +- Filter by assignment group, category, or other fields diff --git a/Specialized Areas/Data Quality/Similarity Calculator/similarity_calculator.js b/Specialized Areas/Data Quality/Similarity Calculator/similarity_calculator.js new file mode 100644 index 0000000000..9d9fc6c502 --- /dev/null +++ b/Specialized Areas/Data Quality/Similarity Calculator/similarity_calculator.js @@ -0,0 +1,97 @@ +// ======================================== +// Similarity Calculator for ServiceNow Incidents +// ======================================== +// Purpose: Manually score similarity between incidents using text analysis +// No ML required +// ======================================== + +(function similarityCalculator() { + // --- CONFIG --- + var config = { + table: 'incident', + baseIncidentSysId: '89b325155370f610de0038e0a0490ec5', // Set to the sys_id of the incident to compare + fields: ['short_description', 'description'], + maxResults: 50, + minSimilarity: 0 // Minimum similarity % to report + }; + + // --- Helper: Extract keywords from text --- + function extractKeywords(text) { + if (!text) return []; + // Simple keyword extraction: split, lowercase, remove stopwords + var stopwords = ['the','and','a','an','to','of','in','for','on','with','at','by','from','is','it','this','that','as','are','was','were','be','has','have','had','but','or','not','can','will','do','does','did','if','so','then','than','too','very','just','also','into','out','up','down','over','under','again','more','less','most','least','such','no','yes','you','your','our','their','my','me','i']; + var words = text.toLowerCase().replace(/[^a-z0-9 ]/g, ' ').split(/\s+/); + var keywords = []; + for (var i = 0; i < words.length; i++) { + var word = words[i]; + if (word && stopwords.indexOf(word) === -1 && word.length > 2) { + keywords.push(word); + } + } + return keywords; + } + + // --- Helper: Calculate similarity score --- + function calcSimilarity(keywordsA, keywordsB) { + if (!keywordsA.length || !keywordsB.length) return 0; + var mapA = {}; + var mapB = {}; + for (var i = 0; i < keywordsA.length; i++) { + mapA[keywordsA[i]] = true; + } + for (var j = 0; j < keywordsB.length; j++) { + mapB[keywordsB[j]] = true; + } + var intersection = 0; + var unionMap = {}; + for (var k in mapA) { + unionMap[k] = true; + if (mapB[k]) intersection++; + } + for (var l in mapB) { + unionMap[l] = true; + } + var union = Object.keys(unionMap).length; + return union ? (intersection / union * 100) : 0; + } + + // --- Get base incident --- + var baseGr = new GlideRecord(config.table); + if (!baseGr.get(config.baseIncidentSysId)) { + gs.error('Base incident not found: ' + config.baseIncidentSysId); + return; + } + var baseText = config.fields.map(function(f) { return baseGr.getValue(f); }).join(' '); + var baseKeywords = extractKeywords(baseText); + + // --- Find candidate incidents --- + var gr = new GlideRecord(config.table); + gr.addQuery('active', true); + gr.addQuery('sys_id', '!=', config.baseIncidentSysId); + gr.setLimit(config.maxResults); + gr.query(); + + var results = []; + while (gr.next()) { + var compareText = config.fields.map(function(f) { return gr.getValue(f); }).join(' '); + var compareKeywords = extractKeywords(compareText); + var score = calcSimilarity(baseKeywords, compareKeywords); + results.push({ + sys_id: gr.getUniqueValue(), + number: gr.getValue('number'), + short_description: gr.getValue('short_description'), + similarity: score + }); + } + + // --- Sort and print results --- + results.sort(function(a, b) { return b.similarity - a.similarity; }); + gs.info('=== Similarity Results ==='); + for (var i = 0; i < results.length; i++) { + var r = results[i]; + gs.info((i+1) + '. ' + r.number + ' (' + r.similarity.toFixed(1) + '%) - ' + r.short_description); + } + if (results.length === 0) { + gs.info('No similar incidents found above threshold.'); + } +})();