diff --git a/Specialized Areas/Predictive Intelligence/Training Data Preparer/README.md b/Specialized Areas/Predictive Intelligence/Training Data Preparer/README.md new file mode 100644 index 0000000000..030e21cec1 --- /dev/null +++ b/Specialized Areas/Predictive Intelligence/Training Data Preparer/README.md @@ -0,0 +1,42 @@ +# Training Data Quality Analyzer for ServiceNow Predictive Intelligence + +## Overview +This script analyzes the quality of incident data in ServiceNow to determine readiness for Predictive Intelligence (PI) model training. It provides detailed statistics and quality metrics to help ServiceNow developers and admins identify and address data issues before starting ML training jobs. + +## Purpose +- Assess completeness and quality of key fields in incident records +- Identify common data issues that could impact PI model performance +- Provide actionable insights for improving training data + +## Features +- Checks completeness of important fields (e.g., short_description, description, category, subcategory, close_notes, assignment_group) +- Analyzes text quality for description and close notes +- Evaluates category diversity and resolution times +- Calculates an overall data quality score +- Outputs results to the ServiceNow system logs + +## Setup Requirements +1. **ServiceNow Instance** with Predictive Intelligence plugin enabled +2. **Script Execution Permissions**: Run as a background script or Script Include with access to the `incident` table +3. **No external dependencies**: Uses only standard ServiceNow APIs (GlideRecord, GlideAggregate, GlideDateTime) +4. **Sufficient Data Volume**: At least 50 resolved/closed incidents recommended for meaningful analysis + +## How It Works +1. **Field Existence Check**: Dynamically verifies that each key field exists on the incident table or its parent tables +2. **Statistics Gathering**: Collects counts for total, resolved, and recent incidents +3. **Completeness Analysis**: Calculates the percentage of records with each key field filled +4. **Text Quality Analysis**: Measures average length and quality of description and close notes +5. **Category Distribution**: Reports on the spread and diversity of incident categories +6. **Resolution Time Analysis**: Evaluates how quickly incidents are resolved +7. **Quality Scoring**: Combines all metrics into a single overall score +8. **Log Output**: Prints all results and warnings to the ServiceNow logs for review + +## Customization +- Adjust the `keyFields` array in the config section to match your organization's data requirements +- Modify thresholds for text length, resolution time, and completeness as needed +- Increase `sampleSize` for more detailed analysis if you have a large dataset + +## Security & Best Practices +- Do not run in production without review +- Ensure no sensitive data is exposed in logs +- Validate script results in a sub-production environment before using for model training diff --git a/Specialized Areas/Predictive Intelligence/Training Data Preparer/analyze_incident_data_training_quality.js b/Specialized Areas/Predictive Intelligence/Training Data Preparer/analyze_incident_data_training_quality.js new file mode 100644 index 0000000000..c34d6440d2 --- /dev/null +++ b/Specialized Areas/Predictive Intelligence/Training Data Preparer/analyze_incident_data_training_quality.js @@ -0,0 +1,614 @@ +// ======================================== +// PI Training Data Quality Analyzer +// ======================================== +// Purpose: Analyze incident data quality for Predictive Intelligence training +// Use Case: Identify data quality issues before training ML models +// No Training Required: Analyzes existing data without ML +// ======================================== + +(function analyzeTrainingDataQuality() { + // Print all fields that exist on the incident table and its parents + function printAllFields(tableName) { + var tables = [tableName]; + var currentTable = tableName; + while (currentTable) { + var tableRec = new GlideRecord('sys_db_object'); + tableRec.addQuery('name', currentTable); + tableRec.query(); + if (tableRec.next()) { + var parentSysId = tableRec.getValue('super_class'); + if (parentSysId && parentSysId != '') { + var parentRec = new GlideRecord('sys_db_object'); + if (parentRec.get(parentSysId)) { + var parentName = parentRec.getValue('name'); + tables.push(parentName); + currentTable = parentName; + } else { + currentTable = null; + } + } else { + currentTable = null; + } + } else { + currentTable = null; + } + } + var field = new GlideRecord('sys_dictionary'); + field.addQuery('name', 'IN', tables.join(',')); + field.query(); + // Removed printout of all available fields + } + + printAllFields('incident'); + // Helper: check if field exists in table hierarchy + function fieldExists(tableName, fieldName) { + var tables = [tableName]; + var currentTable = tableName; + while (currentTable) { + var tableRec = new GlideRecord('sys_db_object'); + tableRec.addQuery('name', currentTable); + tableRec.query(); + if (tableRec.next()) { + var parentSysId = tableRec.getValue('super_class'); + if (parentSysId && parentSysId != '') { + var parentRec = new GlideRecord('sys_db_object'); + if (parentRec.get(parentSysId)) { + var parentName = parentRec.getValue('name'); + tables.push(parentName); + currentTable = parentName; + } else { + currentTable = null; + } + } else { + currentTable = null; + } + } else { + currentTable = null; + } + } + var field = new GlideRecord('sys_dictionary'); + field.addQuery('element', fieldName); + field.addQuery('name', 'IN', tables.join(',')); + field.query(); + return field.hasNext(); + } + + // ============================================ + // CONFIGURATION + // ============================================ + var config = { + table: 'incident', + + // Fields to analyze for completeness + keyFields: [ + 'short_description', + 'description', + 'category', + 'subcategory', + 'close_notes', + 'assignment_group' + ], + + // Quality thresholds + thresholds: { + minDescriptionLength: 20, // Characters + minCloseNotesLength: 50, // Characters + minResolutionTime: 5, // Minutes + maxAge: 365, // Days - only analyze recent data + targetCompleteness: 80 // Percent of fields filled + }, + + // States to analyze + states: { + resolved: 6, + closed: 7 + }, + + sampleSize: 500 // Max records to analyze in detail + }; + + gs.info('========================================'); + gs.info('PI Training Data Quality Analysis'); + gs.info('========================================'); + gs.info('Table: ' + config.table); + gs.info('Sample Size: Up to ' + config.sampleSize + ' records'); + gs.info(''); + + // ============================================ + // STEP 1: Overall Data Statistics + // ============================================ + gs.info('=== STEP 1: Overall Statistics ==='); + gs.info(''); + + var stats = getOverallStats(); + + gs.info('Total Incidents:'); + gs.info(' All States: ' + stats.total); + gs.info(' Resolved/Closed: ' + stats.resolved); + gs.info(' Last 90 Days: ' + stats.recent90); + gs.info(' Last 365 Days: ' + stats.recent365); + gs.info(''); + + if (stats.resolved < 50) { + gs.warn('⚠️ Low number of resolved incidents - need at least 50 for training'); + gs.info('Current: ' + stats.resolved); + } else { + gs.info('✅ Sufficient resolved incidents for training'); + } + + // ============================================ + // STEP 2: Field Completeness Analysis + // ============================================ + gs.info(''); + gs.info('=== STEP 2: Field Completeness Analysis ==='); + gs.info('Analyzing resolved/closed incidents from last ' + config.thresholds.maxAge + ' days'); + gs.info(''); + + var completeness = analyzeFieldCompleteness(); + + gs.info('Field Completeness Scores:'); + gs.info(''); + + for (var field in completeness) { + var pct = completeness[field].percentage; + var icon = pct >= 80 ? '✅' : pct >= 50 ? '⚠️' : '❌'; + + gs.info(icon + ' ' + field + ': ' + pct.toFixed(1) + '%'); + gs.info(' Filled: ' + completeness[field].filled + ' / ' + completeness[field].total); + + if (pct < 50) { + gs.info(' ⚠️ LOW - This field may not be useful for training'); + } + gs.info(''); + } + + // ============================================ + // STEP 3: Text Quality Analysis + // ============================================ + gs.info(''); + gs.info('=== STEP 3: Text Quality Analysis ==='); + gs.info('Analyzing text field content quality'); + gs.info(''); + + var textQuality = analyzeTextQuality(); + + gs.info('Description Quality:'); + gs.info(' Average Length: ' + textQuality.description.avgLength.toFixed(0) + ' characters'); + gs.info(' Too Short (<20 chars): ' + textQuality.description.tooShort + ' (' + + (textQuality.description.tooShortPct).toFixed(1) + '%)'); + gs.info(' Good Quality: ' + textQuality.description.goodQuality + ' (' + + (textQuality.description.goodQualityPct).toFixed(1) + '%)'); + gs.info(''); + + gs.info('Close Notes Quality:'); + gs.info(' Average Length: ' + textQuality.closeNotes.avgLength.toFixed(0) + ' characters'); + gs.info(' Too Short (<50 chars): ' + textQuality.closeNotes.tooShort + ' (' + + (textQuality.closeNotes.tooShortPct).toFixed(1) + '%)'); + gs.info(' Good Quality: ' + textQuality.closeNotes.goodQuality + ' (' + + (textQuality.closeNotes.goodQualityPct).toFixed(1) + '%)'); + gs.info(''); + + if (textQuality.description.goodQualityPct < 70) { + gs.warn('⚠️ Many incidents have short/poor descriptions'); + gs.info(' Consider filtering for better quality data'); + } + + if (textQuality.closeNotes.goodQualityPct < 70) { + gs.warn('⚠️ Many incidents have short/poor close notes'); + gs.info(' This will impact solution recommendation quality'); + } + + // ============================================ + // STEP 4: Category Distribution + // ============================================ + gs.info(''); + gs.info('=== STEP 4: Category Distribution ==='); + gs.info('Analyzing incident category spread'); + gs.info(''); + + var categoryDist = analyzeCategoryDistribution(); + + gs.info('Top 10 Categories:'); + for (var i = 0; i < Math.min(10, categoryDist.length); i++) { + var cat = categoryDist[i]; + gs.info(' ' + (i+1) + '. ' + (cat.category || '(empty)') + ': ' + cat.count + ' incidents'); + } + gs.info(''); + + if (categoryDist.length < 5) { + gs.warn('⚠️ Low category diversity - model may not generalize well'); + } else { + gs.info('✅ Good category diversity for training'); + } + + // ============================================ + // STEP 5: Resolution Time Analysis + // ============================================ + gs.info(''); + gs.info('=== STEP 5: Resolution Time Analysis ==='); + gs.info(''); + + var timeAnalysis = analyzeResolutionTimes(); + + gs.info('Resolution Times:'); + gs.info(' Average: ' + timeAnalysis.avgMinutes.toFixed(0) + ' minutes'); + gs.info(' Median: ' + timeAnalysis.medianMinutes.toFixed(0) + ' minutes'); + gs.info(' Too Quick (<5 min): ' + timeAnalysis.tooQuick + ' (' + + (timeAnalysis.tooQuickPct).toFixed(1) + '%)'); + gs.info(''); + + if (timeAnalysis.tooQuickPct > 30) { + gs.warn('⚠️ Many incidents resolved very quickly'); + gs.info(' These may be duplicates or low-quality data'); + gs.info(' Consider filtering: resolved_at > opened_at + 5 minutes'); + } + + // ============================================ + // STEP 6: Overall Quality Score + // ============================================ + gs.info(''); + gs.info('=== STEP 6: Overall Data Quality Score ==='); + gs.info(''); + + var overallScore = calculateOverallScore(completeness, textQuality, timeAnalysis); + + var scoreIcon = overallScore >= 80 ? '✅' : overallScore >= 60 ? '⚠️' : '❌'; + gs.info(scoreIcon + ' Overall Quality Score: ' + overallScore.toFixed(0) + '/100'); + gs.info(''); + + if (overallScore >= 80) { + gs.info('✅ EXCELLENT - Data is ready for high-quality training'); + } else if (overallScore >= 60) { + gs.info('⚠️ FAIR - Data can be used but consider improvements'); + } else { + gs.info('❌ POOR - Significant data quality issues exist'); + } + + // ============================================ + // STEP 7: Recommendations + // ============================================ + gs.info(''); + gs.info('========================================'); + gs.info('Analysis Complete'); + gs.info('========================================'); + + // ============================================ + // HELPER FUNCTIONS + // ============================================ + + function getOverallStats() { + var result = { + total: 0, + resolved: 0, + recent90: 0, + recent365: 0 + }; + + // Total incidents + var totalGr = new GlideAggregate(config.table); + totalGr.addAggregate('COUNT'); + totalGr.query(); + if (totalGr.next()) { + result.total = parseInt(totalGr.getAggregate('COUNT')); + } + + // Resolved/closed + var resolvedGr = new GlideAggregate(config.table); + resolvedGr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + resolvedGr.addAggregate('COUNT'); + resolvedGr.query(); + if (resolvedGr.next()) { + result.resolved = parseInt(resolvedGr.getAggregate('COUNT')); + } + + // Recent 90 days + var recent90Gr = new GlideAggregate(config.table); + recent90Gr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + recent90Gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(90)'); + recent90Gr.addAggregate('COUNT'); + recent90Gr.query(); + if (recent90Gr.next()) { + result.recent90 = parseInt(recent90Gr.getAggregate('COUNT')); + } + + // Recent 365 days + var recent365Gr = new GlideAggregate(config.table); + recent365Gr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + recent365Gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(365)'); + recent365Gr.addAggregate('COUNT'); + recent365Gr.query(); + if (recent365Gr.next()) { + result.recent365 = parseInt(recent365Gr.getAggregate('COUNT')); + } + + return result; + } + + function analyzeFieldCompleteness() { + var results = {}; + + // Get total count first + var totalGr = new GlideAggregate(config.table); + totalGr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + totalGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')'); + totalGr.addAggregate('COUNT'); + totalGr.query(); + + var total = 0; + if (totalGr.next()) { + total = parseInt(totalGr.getAggregate('COUNT')); + } + + // Helper: check if field exists in table hierarchy + function fieldExists(tableName, fieldName) { + var tables = [tableName]; + var currentTable = tableName; + while (currentTable) { + var tableRec = new GlideRecord('sys_db_object'); + tableRec.addQuery('name', currentTable); + tableRec.query(); + if (tableRec.next()) { + var parentSysId = tableRec.getValue('super_class'); + if (parentSysId && parentSysId != '') { + var parentRec = new GlideRecord('sys_db_object'); + if (parentRec.get(parentSysId)) { + var parentName = parentRec.getValue('name'); + tables.push(parentName); + currentTable = parentName; + } else { + currentTable = null; + } + } else { + currentTable = null; + } + } else { + currentTable = null; + } + } + var field = new GlideRecord('sys_dictionary'); + field.addQuery('element', fieldName); + field.addQuery('name', 'IN', tables.join(',')); + field.query(); + return field.hasNext(); + } + + // Check each field, skip if not present + for (var f = 0; f < config.keyFields.length; f++) { + var fieldName = config.keyFields[f]; + if (!fieldExists(config.table, fieldName)) { + gs.warn('Field does not exist: ' + fieldName + ' - skipping completeness analysis for this field'); + continue; + } + var filledGr = new GlideAggregate(config.table); + filledGr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + filledGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')'); + filledGr.addQuery(fieldName, '!=', ''); + filledGr.addNotNullQuery(fieldName); + filledGr.addAggregate('COUNT'); + filledGr.query(); + var filled = 0; + if (filledGr.next()) { + filled = parseInt(filledGr.getAggregate('COUNT')); + } + results[fieldName] = { + total: total, + filled: filled, + percentage: total > 0 ? (filled / total * 100) : 0 + }; + } + return results; + } + + function analyzeTextQuality() { + var gr = new GlideRecord(config.table); + gr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')'); + gr.setLimit(config.sampleSize); + gr.query(); + + var descStats = { + totalLength: 0, + count: 0, + tooShort: 0, + goodQuality: 0 + }; + + var closeNotesStats = { + totalLength: 0, + count: 0, + tooShort: 0, + goodQuality: 0 + }; + + while (gr.next()) { + // Analyze description + var desc = gr.getValue('description') || ''; + if (desc) { + descStats.count++; + descStats.totalLength += desc.length; + + if (desc.length < config.thresholds.minDescriptionLength) { + descStats.tooShort++; + } else { + descStats.goodQuality++; + } + } + + // Analyze close notes + var closeNotes = gr.getValue('close_notes') || ''; + if (closeNotes) { + closeNotesStats.count++; + closeNotesStats.totalLength += closeNotes.length; + + if (closeNotes.length < config.thresholds.minCloseNotesLength) { + closeNotesStats.tooShort++; + } else { + closeNotesStats.goodQuality++; + } + } + } + + return { + description: { + avgLength: descStats.count > 0 ? descStats.totalLength / descStats.count : 0, + tooShort: descStats.tooShort, + tooShortPct: descStats.count > 0 ? (descStats.tooShort / descStats.count * 100) : 0, + goodQuality: descStats.goodQuality, + goodQualityPct: descStats.count > 0 ? (descStats.goodQuality / descStats.count * 100) : 0 + }, + closeNotes: { + avgLength: closeNotesStats.count > 0 ? closeNotesStats.totalLength / closeNotesStats.count : 0, + tooShort: closeNotesStats.tooShort, + tooShortPct: closeNotesStats.count > 0 ? (closeNotesStats.tooShort / closeNotesStats.count * 100) : 0, + goodQuality: closeNotesStats.goodQuality, + goodQualityPct: closeNotesStats.count > 0 ? (closeNotesStats.goodQuality / closeNotesStats.count * 100) : 0 + } + }; + } + + function analyzeCategoryDistribution() { + var catGr = new GlideAggregate(config.table); + catGr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + catGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')'); + catGr.groupBy('category'); + catGr.addAggregate('COUNT'); + catGr.orderByAggregate('COUNT'); + catGr.query(); + + var categories = []; + while (catGr.next()) { + categories.push({ + category: catGr.getValue('category'), + count: parseInt(catGr.getAggregate('COUNT')) + }); + } + + // Sort descending + categories.sort(function(a, b) { return b.count - a.count; }); + + return categories; + } + + function analyzeResolutionTimes() { + var gr = new GlideRecord(config.table); + gr.addQuery('state', 'IN', [config.states.resolved, config.states.closed].join(',')); + gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')'); + gr.addNotNullQuery('opened_at'); + gr.addNotNullQuery('resolved_at'); + gr.setLimit(config.sampleSize); + gr.query(); + + var times = []; + var tooQuick = 0; + + while (gr.next()) { + var opened = new GlideDateTime(gr.getValue('opened_at')); + var resolved = new GlideDateTime(gr.getValue('resolved_at')); + + var diff = GlideDateTime.subtract(opened, resolved); + var minutes = diff.getNumericValue() / 1000 / 60; + + if (minutes > 0) { + times.push(minutes); + + if (minutes < config.thresholds.minResolutionTime) { + tooQuick++; + } + } + } + + times.sort(function(a, b) { return a - b; }); + + var avgMinutes = 0; + if (times.length > 0) { + var sum = 0; + for (var t = 0; t < times.length; t++) { + sum += times[t]; + } + avgMinutes = sum / times.length; + } + + var medianMinutes = 0; + if (times.length > 0) { + var midIdx = Math.floor(times.length / 2); + medianMinutes = times[midIdx]; + } + + return { + avgMinutes: avgMinutes, + medianMinutes: medianMinutes, + tooQuick: tooQuick, + tooQuickPct: times.length > 0 ? (tooQuick / times.length * 100) : 0, + sampleSize: times.length + }; + } + + function calculateOverallScore(completeness, textQuality, timeAnalysis) { + var score = 0; + var weights = { + completeness: 40, + textQuality: 40, + timeQuality: 20 + }; + + // Completeness score (average of all fields) + var compTotal = 0; + var compCount = 0; + for (var field in completeness) { + compTotal += completeness[field].percentage; + compCount++; + } + var compScore = compCount > 0 ? (compTotal / compCount) : 0; + score += (compScore / 100) * weights.completeness; + + // Text quality score (average of description and close notes) + var textScore = (textQuality.description.goodQualityPct + textQuality.closeNotes.goodQualityPct) / 2; + score += (textScore / 100) * weights.textQuality; + + // Time quality score (inverse of too-quick percentage) + var timeScore = 100 - timeAnalysis.tooQuickPct; + score += (timeScore / 100) * weights.timeQuality; + + return score; + } + + function generateRecommendations(stats, completeness, textQuality, timeAnalysis) { + var recs = []; + + // Check volume + if (stats.resolved < 100) { + recs.push('Increase training data volume - aim for 100+ resolved incidents'); + } + + // Check field completeness + for (var field in completeness) { + if (completeness[field].percentage < 50) { + recs.push('Improve ' + field + ' completeness (currently ' + + completeness[field].percentage.toFixed(0) + '%)'); + } + } + + // Check text quality + if (textQuality.description.goodQualityPct < 70) { + recs.push('Encourage more detailed incident descriptions (20+ characters)'); + } + + if (textQuality.closeNotes.goodQualityPct < 70) { + recs.push('Improve close notes quality - require detailed resolution steps (50+ characters)'); + } + + // Check resolution times + if (timeAnalysis.tooQuickPct > 30) { + recs.push('Filter out quick resolutions (<5 min) - may be duplicates or invalid data'); + recs.push('Add filter: resolved_at > opened_at + 5 minutes'); + } + + // Check category diversity + if (stats.resolved > 0 && stats.resolved < 50) { + recs.push('Collect more diverse incident data across different categories'); + } + + return recs; + } + +})();