From 7fdfb3df4743747a4e71e9673cf7bcf6694f35ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Diamond?= <32074058+Andre-Diamond@users.noreply.github.com> Date: Tue, 28 Jan 2025 10:40:09 +0200 Subject: [PATCH] refactor: Add sanitizeObject function to clean summary data before processing --- .../batchUpdateMeetingSummariesArray.js | 27 +++++++++++++++- .../batchUpdateMeetingSummariesById.js | 31 +++++++++++++++++-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/netlify/functions/batchUpdateMeetingSummariesArray.js b/netlify/functions/batchUpdateMeetingSummariesArray.js index 89d7c44..348fc09 100644 --- a/netlify/functions/batchUpdateMeetingSummariesArray.js +++ b/netlify/functions/batchUpdateMeetingSummariesArray.js @@ -5,6 +5,28 @@ import { Octokit } from "@octokit/rest"; const BATCH_SIZE = 100; const MAX_CONCURRENT_REQUESTS = 10; +function sanitizeObject(item) { + if (typeof item === 'string') { + // Replace anything outside ASCII printable chars (0x20–0x7E) with '-' + return item.replace(/[^a-zA-Z0-9.,:;!?"'()\-\s]/g, '-'); + } + + if (Array.isArray(item)) { + return item.map(element => sanitizeObject(element)); + } + + if (item && typeof item === 'object') { + const newObj = {}; + for (const key in item) { + newObj[key] = sanitizeObject(item[key]); + } + return newObj; + } + + // For numbers, booleans, null, etc., return as is + return item; +} + async function fetchMeetingSummaries(lastProcessedTimestamp, batchNumber) { const { data: summaries, error } = await supabase .from('meetingsummaries') @@ -31,7 +53,10 @@ function groupSummariesByYear(summaries, allSummaries) { allSummaries[year] = []; } - allSummaries[year].push(summaryText); + // Sanitize the summary object + const sanitizedSummary = sanitizeObject(summaryText); + + allSummaries[year].push(sanitizedSummary); }); } diff --git a/netlify/functions/batchUpdateMeetingSummariesById.js b/netlify/functions/batchUpdateMeetingSummariesById.js index 705f446..74997ed 100644 --- a/netlify/functions/batchUpdateMeetingSummariesById.js +++ b/netlify/functions/batchUpdateMeetingSummariesById.js @@ -5,6 +5,29 @@ import { Octokit } from "@octokit/rest"; const BATCH_SIZE = 100; const MAX_CONCURRENT_REQUESTS = 10; +function sanitizeObject(item) { + if (typeof item === 'string') { + // Replace anything outside ASCII printable chars (0x20-0x7E) with '-' + return item.replace(/[^a-zA-Z0-9.,:;!?"'()\-\s]/g, '-'); + } + + if (Array.isArray(item)) { + return item.map(element => sanitizeObject(element)); + } + + if (item && typeof item === 'object') { + const newObj = {}; + for (const key in item) { + newObj[key] = sanitizeObject(item[key]); + } + return newObj; + } + + // for numbers, booleans, null, etc just return item. + return item; +} + + async function fetchMeetingSummaries(lastProcessedTimestamp, batchNumber) { const { data: summaries, error } = await supabase .from('meetingsummaries') @@ -26,18 +49,22 @@ function groupSummariesByMeetingId(summaries, allSummaries) { const { meeting_id, summary: summaryText } = summary; const year = new Date(summaryText.meetingInfo.date).getFullYear(); + // Make sure the buckets exist if (!allSummaries[year]) { allSummaries[year] = {}; } - if (!allSummaries[year][meeting_id]) { allSummaries[year][meeting_id] = []; } - allSummaries[year][meeting_id].push(summaryText); + // Sanitize the entire summary object (assuming summaryText could be an object) + const sanitizedSummary = sanitizeObject(summaryText); + + allSummaries[year][meeting_id].push(sanitizedSummary); }); } + async function commitSummariesToGitHub(allSummaries) { const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });