From 2a3051f2da3e2579c7a2e663d49e5f5b344d8c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Diamond?= <32074058+Andre-Diamond@users.noreply.github.com> Date: Sat, 27 Apr 2024 08:41:29 +0200 Subject: [PATCH] Refactor meeting summaries retrieval and commit process --- .../batchUpdateMeetingSummariesById.js | 126 ++++++++++-------- 1 file changed, 67 insertions(+), 59 deletions(-) diff --git a/netlify/functions/batchUpdateMeetingSummariesById.js b/netlify/functions/batchUpdateMeetingSummariesById.js index bb3fc78..b138587 100644 --- a/netlify/functions/batchUpdateMeetingSummariesById.js +++ b/netlify/functions/batchUpdateMeetingSummariesById.js @@ -2,76 +2,84 @@ import { supabase } from '../../lib/supabaseClient'; import { Octokit } from "@octokit/rest"; -const BATCH_SIZE = 200; // Increase the batch size -const MAX_CONCURRENT_REQUESTS = 5; // Adjust the number of concurrent requests +const BATCH_SIZE = 200; +const MAX_CONCURRENT_REQUESTS = 5; -export const handler = async (event, context) => { - try { - let allSummaries = {}; - let lastProcessedTimestamp = null; - let hasMoreSummaries = true; - - while (hasMoreSummaries) { - const { data: summaries, error } = await supabase - .from('meetingsummaries') - .select('created_at, meeting_id, summary') - .order('created_at', { ascending: true }) - .limit(BATCH_SIZE * MAX_CONCURRENT_REQUESTS) - .gt('created_at', lastProcessedTimestamp || '1970-01-01'); +async function fetchMeetingSummaries(lastProcessedTimestamp) { + const { data: summaries, error } = await supabase + .from('meetingsummaries') + .select('created_at, meeting_id, summary') + .order('created_at', { ascending: true }) + .limit(BATCH_SIZE * MAX_CONCURRENT_REQUESTS) + .gt('created_at', lastProcessedTimestamp || '1970-01-01'); - if (error) { - console.error('Error retrieving meeting summaries:', error); - return { - statusCode: 500, - body: JSON.stringify({ error: 'Failed to retrieve meeting summaries' }), - }; - } - - if (summaries.length === 0) { - hasMoreSummaries = false; - break; - } + if (error) { + throw new Error('Failed to retrieve meeting summaries'); + } - // Group summaries by meeting_id - summaries.forEach(summary => { - const { meeting_id, summary: summaryText } = summary; - if (!allSummaries[meeting_id]) { - allSummaries[meeting_id] = []; - } - allSummaries[meeting_id].push(summaryText); - }); + return summaries; +} - lastProcessedTimestamp = summaries[summaries.length - 1].created_at; +function groupSummariesByMeetingId(summaries, allSummaries) { + summaries.forEach(summary => { + const { meeting_id, summary: summaryText } = summary; + if (!allSummaries[meeting_id]) { + allSummaries[meeting_id] = []; } + allSummaries[meeting_id].push(summaryText); + }); +} - // Commit all summaries to GitHub in a single file - const octokit = new Octokit({ - auth: process.env.GITHUB_TOKEN, - }); - - // Get the current SHA of the file - let currentSHA = null; - try { - const { data: currentFile } = await octokit.repos.getContent({ - owner: "SingularityNET-Archive", - repo: "SingularityNET-Archive", - path: "Data/Meeting-Summaries/meeting-summaries-by-id.json", - }); - currentSHA = currentFile.sha; - } catch (error) { - if (error.status !== 404) { - throw error; - } - } +async function commitSummariesToGitHub(allSummaries) { + const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); - const { data } = await octokit.repos.createOrUpdateFileContents({ + let currentSHA = null; + try { + const { data: currentFile } = await octokit.repos.getContent({ owner: "SingularityNET-Archive", repo: "SingularityNET-Archive", path: "Data/Meeting-Summaries/meeting-summaries-by-id.json", - message: "Update meeting summaries", - content: Buffer.from(JSON.stringify(allSummaries, null, 2)).toString('base64'), - sha: currentSHA, }); + currentSHA = currentFile.sha; + } catch (error) { + if (error.status !== 404) { + throw error; + } + } + + await octokit.repos.createOrUpdateFileContents({ + owner: "SingularityNET-Archive", + repo: "SingularityNET-Archive", + path: "Data/Meeting-Summaries/meeting-summaries-by-id.json", + message: "Update meeting summaries", + content: Buffer.from(JSON.stringify(allSummaries, null, 2)).toString('base64'), + sha: currentSHA, + }); +} + +async function processAndCommitSummaries() { + const allSummaries = {}; + let lastProcessedTimestamp = null; + let hasMoreSummaries = true; + + while (hasMoreSummaries) { + const summaries = await fetchMeetingSummaries(lastProcessedTimestamp); + + if (summaries.length === 0) { + hasMoreSummaries = false; + break; + } + + groupSummariesByMeetingId(summaries, allSummaries); + lastProcessedTimestamp = summaries[summaries.length - 1].created_at; + } + + await commitSummariesToGitHub(allSummaries); +} + +export const handler = async (event, context) => { + try { + await processAndCommitSummaries(); return { statusCode: 200,