From 2aa1cf3e36c40157eb8de48b635f67656dd5980d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Diamond?= <32074058+Andre-Diamond@users.noreply.github.com> Date: Wed, 1 May 2024 06:23:36 +0200 Subject: [PATCH] Refactor meeting summaries retrieval and commit process --- .../batchUpdateMeetingSummariesArray.js | 143 ++++++++++++------ .../singleCallUpdateMeetingSummariesArray.js | 66 ++++++++ 2 files changed, 161 insertions(+), 48 deletions(-) create mode 100644 netlify/functions/singleCallUpdateMeetingSummariesArray.js diff --git a/netlify/functions/batchUpdateMeetingSummariesArray.js b/netlify/functions/batchUpdateMeetingSummariesArray.js index 21070f3..77a7d9f 100644 --- a/netlify/functions/batchUpdateMeetingSummariesArray.js +++ b/netlify/functions/batchUpdateMeetingSummariesArray.js @@ -2,65 +2,112 @@ import { supabase } from '../../lib/supabaseClient'; import { Octokit } from "@octokit/rest"; -export const handler = async (event, context) => { - try { - // Retrieve all summaries - const { data: summaries, error } = await supabase - .from('meetingsummaries') - .select('meeting_id, created_at, summary') - .eq('confirmed', true) - .order('created_at', { ascending: true }); - - if (error) { - console.error('Error retrieving meeting summaries:', error); - return { statusCode: 500, body: JSON.stringify({ error: 'Failed to retrieve meeting summaries' }), }; +const BATCH_SIZE = 100; +const MAX_CONCURRENT_REQUESTS = 10; + +async function fetchMeetingSummaries(lastProcessedTimestamp, batchNumber) { + const { data: summaries, error } = await supabase + .from('meetingsummaries') + .select('created_at, meeting_id, summary') + .eq('confirmed', true) + .order('created_at', { ascending: true }) + .limit(BATCH_SIZE) + .gt('created_at', lastProcessedTimestamp || '1970-01-01') + .range(batchNumber * BATCH_SIZE, (batchNumber + 1) * BATCH_SIZE - 1); + + if (error) { + throw new Error('Failed to retrieve meeting summaries'); + } + + return summaries; +} + +function groupSummariesByYear(summaries, allSummaries) { + summaries.forEach(summary => { + const { summary: summaryText } = summary; + const year = new Date(summaryText.meetingInfo.date).getFullYear(); + + if (!allSummaries[year]) { + allSummaries[year] = []; } - // Group summaries by year - const summariesByYear = {}; - summaries.forEach(summary => { - const year = new Date(summary.summary.meetingInfo.date).getFullYear(); - if (!summariesByYear[year]) { - summariesByYear[year] = []; - } - summariesByYear[year].push(summary.summary); - }); + allSummaries[year].push(summaryText); + }); +} - // Commit summaries to GitHub in separate year folders - const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN, }); - - for (const year in summariesByYear) { - const yearSummaries = summariesByYear[year]; - const path = `Data/Meeting-Summaries/${year}/meeting-summaries-array.json`; - - // Get the current SHA of the file - let currentSHA = null; - try { - const { data: currentFile } = await octokit.repos.getContent({ - owner: "SingularityNET-Archive", - repo: "SingularityNET-Archive", - path, - }); - currentSHA = currentFile.sha; - } catch (error) { - if (error.status !== 404) { - throw error; - } - } +async function commitSummariesToGitHub(allSummaries) { + const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); - await octokit.repos.createOrUpdateFileContents({ + for (const year in allSummaries) { + const yearSummaries = allSummaries[year]; + const path = `Data/Meeting-Summaries/${year}/meeting-summaries-array.json`; + + let currentSHA = null; + try { + const { data: currentFile } = await octokit.repos.getContent({ owner: "SingularityNET-Archive", repo: "SingularityNET-Archive", path, - message: `Update meeting summaries for ${year}`, - content: Buffer.from(JSON.stringify(yearSummaries, null, 2)).toString('base64'), - sha: currentSHA, }); + currentSHA = currentFile.sha; + } catch (error) { + if (error.status !== 404) { + throw error; + } + } + + await octokit.repos.createOrUpdateFileContents({ + owner: "SingularityNET-Archive", + repo: "SingularityNET-Archive", + path, + message: `Update meeting summaries for ${year}`, + content: Buffer.from(JSON.stringify(yearSummaries, null, 2)).toString('base64'), + sha: currentSHA, + }); + } +} + +async function processAndCommitSummaries() { + const allSummaries = {}; + let lastProcessedTimestamp = null; + let hasMoreSummaries = true; + let batchNumber = 0; + + while (hasMoreSummaries) { + const fetchPromises = []; + for (let i = 0; i < MAX_CONCURRENT_REQUESTS; i++) { + fetchPromises.push(fetchMeetingSummaries(lastProcessedTimestamp, batchNumber)); + batchNumber++; + } + + const summariesBatches = await Promise.all(fetchPromises); + const flattenedSummaries = summariesBatches.flat(); + + if (flattenedSummaries.length === 0) { + hasMoreSummaries = false; + break; } - return { statusCode: 200, body: JSON.stringify({ message: 'Meeting summaries updated successfully' }), }; + groupSummariesByYear(flattenedSummaries, allSummaries); + + lastProcessedTimestamp = flattenedSummaries[flattenedSummaries.length - 1].created_at; + } + + await commitSummariesToGitHub(allSummaries); +} + +export const handler = async (event, context) => { + try { + await processAndCommitSummaries(); + return { + statusCode: 200, + body: JSON.stringify({ message: 'Meeting summaries updated successfully' }), + }; } catch (error) { console.error('Error in updateGitHubRepo function:', error); - return { statusCode: 500, body: JSON.stringify({ error: 'Failed to update meeting summaries' }), }; + return { + statusCode: 500, + body: JSON.stringify({ error: 'Failed to update meeting summaries' }), + }; } }; \ No newline at end of file diff --git a/netlify/functions/singleCallUpdateMeetingSummariesArray.js b/netlify/functions/singleCallUpdateMeetingSummariesArray.js new file mode 100644 index 0000000..cbd2eef --- /dev/null +++ b/netlify/functions/singleCallUpdateMeetingSummariesArray.js @@ -0,0 +1,66 @@ +// netlify/functions/singleCallUpdateMeetingSummariesArray.js +import { supabase } from '../../lib/supabaseClient'; +import { Octokit } from "@octokit/rest"; + +export const handler = async (event, context) => { + try { + // Retrieve all summaries + const { data: summaries, error } = await supabase + .from('meetingsummaries') + .select('meeting_id, created_at, summary') + .eq('confirmed', true) + .order('created_at', { ascending: true }); + + if (error) { + console.error('Error retrieving meeting summaries:', error); + return { statusCode: 500, body: JSON.stringify({ error: 'Failed to retrieve meeting summaries' }), }; + } + + // Group summaries by year + const summariesByYear = {}; + summaries.forEach(summary => { + const year = new Date(summary.summary.meetingInfo.date).getFullYear(); + if (!summariesByYear[year]) { + summariesByYear[year] = []; + } + summariesByYear[year].push(summary.summary); + }); + + // Commit summaries to GitHub in separate year folders + const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN, }); + + for (const year in summariesByYear) { + const yearSummaries = summariesByYear[year]; + const path = `Data/Meeting-Summaries/${year}/meeting-summaries-array.json`; + + // Get the current SHA of the file + let currentSHA = null; + try { + const { data: currentFile } = await octokit.repos.getContent({ + owner: "SingularityNET-Archive", + repo: "SingularityNET-Archive", + path, + }); + currentSHA = currentFile.sha; + } catch (error) { + if (error.status !== 404) { + throw error; + } + } + + await octokit.repos.createOrUpdateFileContents({ + owner: "SingularityNET-Archive", + repo: "SingularityNET-Archive", + path, + message: `Update meeting summaries for ${year}`, + content: Buffer.from(JSON.stringify(yearSummaries, null, 2)).toString('base64'), + sha: currentSHA, + }); + } + + return { statusCode: 200, body: JSON.stringify({ message: 'Meeting summaries updated successfully' }), }; + } catch (error) { + console.error('Error in updateGitHubRepo function:', error); + return { statusCode: 500, body: JSON.stringify({ error: 'Failed to update meeting summaries' }), }; + } +}; \ No newline at end of file