From c6f05d955350d3aa7d8505eb40062615bfe5b3df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Diamond?= <32074058+Andre-Diamond@users.noreply.github.com> Date: Thu, 25 Apr 2024 08:17:57 +0200 Subject: [PATCH] Update meeting summaries retrieval and commit process --- .../workflows/commit-meeting-summaries.yml | 29 ++++--- ...js => batchUpdateMeetingSummariesArray.js} | 18 ++-- .../batchUpdateMeetingSummariesById.js | 87 +++++++++++++++++++ netlify/functions/commitToGitHub.js | 31 ------- netlify/functions/getMeetingSummaries.js | 29 ------- 5 files changed, 115 insertions(+), 79 deletions(-) rename netlify/functions/{updateGitHubRepo.js => batchUpdateMeetingSummariesArray.js} (79%) create mode 100644 netlify/functions/batchUpdateMeetingSummariesById.js delete mode 100644 netlify/functions/commitToGitHub.js delete mode 100644 netlify/functions/getMeetingSummaries.js diff --git a/.github/workflows/commit-meeting-summaries.yml b/.github/workflows/commit-meeting-summaries.yml index 6e42b4e..02b7aec 100644 --- a/.github/workflows/commit-meeting-summaries.yml +++ b/.github/workflows/commit-meeting-summaries.yml @@ -1,27 +1,36 @@ # .github/workflows/commit-meeting-summaries.yml name: Commit Meeting Summaries - on: workflow_dispatch: schedule: - - cron: '0 0 * * *' # Run daily at midnight - + - cron: '0 0 * * *' # Run daily at midnight jobs: commit-meeting-summaries: runs-on: ubuntu-latest - steps: - - name: Update GitHub Repository + - name: Update Meeting Summaries Array env: NETLIFY_BASE_URL: ${{ secrets.NETLIFY_BASE_URL }} run: | - response=$(curl -s -X POST -H "Content-Type: application/json" "${NETLIFY_BASE_URL}/.netlify/functions/updateGitHubRepo") - echo "Response from updateGitHubRepo: $response" - + response=$(curl -s -X POST -H "Content-Type: application/json" "${NETLIFY_BASE_URL}/.netlify/functions/batchUpdateMeetingSummariesArray") + echo "Response from batchUpdateMeetingSummariesArray: $response" # Check if the response indicates success if echo "$response" | grep -qE '\{"message":\s*"Meeting summaries updated successfully"\}'; then - echo "Meeting summaries updated successfully" + echo "Meeting summaries array updated successfully" else - echo "Error updating meeting summaries" + echo "Error updating meeting summaries array" + exit 1 + fi + - name: Update Meeting Summaries by ID + env: + NETLIFY_BASE_URL: ${{ secrets.NETLIFY_BASE_URL }} + run: | + response=$(curl -s -X POST -H "Content-Type: application/json" "${NETLIFY_BASE_URL}/.netlify/functions/batchUpdateMeetingSummariesById") + echo "Response from batchUpdateMeetingSummariesById: $response" + # Check if the response indicates success + if echo "$response" | grep -qE '\{"message":\s*"Meeting summaries updated successfully"\}'; then + echo "Meeting summaries by ID updated successfully" + else + echo "Error updating meeting summaries by ID" exit 1 fi \ No newline at end of file diff --git a/netlify/functions/updateGitHubRepo.js b/netlify/functions/batchUpdateMeetingSummariesArray.js similarity index 79% rename from netlify/functions/updateGitHubRepo.js rename to netlify/functions/batchUpdateMeetingSummariesArray.js index a7d485d..b773c31 100644 --- a/netlify/functions/updateGitHubRepo.js +++ b/netlify/functions/batchUpdateMeetingSummariesArray.js @@ -1,4 +1,4 @@ -// netlify/functions/updateGitHubRepo.js +// netlify/functions/batchUpdateMeetingSummariesArray.js import { supabase } from '../../lib/supabaseClient'; import { Octokit } from "@octokit/rest"; @@ -7,19 +7,19 @@ const BATCH_SIZE = 100; export const handler = async (event, context) => { try { let allSummaries = []; - let lastProcessedId = null; + let lastProcessedTimestamp = null; let hasMoreSummaries = true; while (hasMoreSummaries) { // Retrieve the next batch of summaries let { data: summaries, error } = await supabase .from('meetingsummaries') - .select('meeting_id, summary') - .order('meeting_id', { ascending: true }) + .select('meeting_id, created_at, summary') + .order('created_at', { ascending: true }) .limit(BATCH_SIZE); - if (lastProcessedId) { - summaries = summaries.filter(summary => summary.meeting_id > lastProcessedId); + if (lastProcessedTimestamp) { + summaries = summaries.filter(summary => summary.created_at > lastProcessedTimestamp); } if (error) { @@ -37,7 +37,7 @@ export const handler = async (event, context) => { // Accumulate the summaries allSummaries = allSummaries.concat(summaries.map(summary => summary.summary)); - lastProcessedId = summaries[summaries.length - 1].meeting_id; + lastProcessedTimestamp = summaries[summaries.length - 1].created_at; } // Commit all summaries to GitHub in a single file @@ -51,7 +51,7 @@ export const handler = async (event, context) => { const { data: currentFile } = await octokit.repos.getContent({ owner: "SingularityNET-Archive", repo: "SingularityNET-Archive", - path: "Data/meeting-summaries.json", + path: "Data/Meeting-Summaries/meeting-summaries-array.json", }); currentSHA = currentFile.sha; } catch (error) { @@ -63,7 +63,7 @@ export const handler = async (event, context) => { const { data } = await octokit.repos.createOrUpdateFileContents({ owner: "SingularityNET-Archive", repo: "SingularityNET-Archive", - path: "Data/meeting-summaries.json", + path: "Data/Meeting-Summaries/meeting-summaries-array.json", message: "Update meeting summaries", content: Buffer.from(JSON.stringify(allSummaries, null, 2)).toString('base64'), sha: currentSHA, diff --git a/netlify/functions/batchUpdateMeetingSummariesById.js b/netlify/functions/batchUpdateMeetingSummariesById.js new file mode 100644 index 0000000..bb3fc78 --- /dev/null +++ b/netlify/functions/batchUpdateMeetingSummariesById.js @@ -0,0 +1,87 @@ +// netlify/functions/batchUpdateMeetingSummariesById.js +import { supabase } from '../../lib/supabaseClient'; +import { Octokit } from "@octokit/rest"; + +const BATCH_SIZE = 200; // Increase the batch size +const MAX_CONCURRENT_REQUESTS = 5; // Adjust the number of concurrent requests + +export const handler = async (event, context) => { + try { + let allSummaries = {}; + let lastProcessedTimestamp = null; + let hasMoreSummaries = true; + + while (hasMoreSummaries) { + const { data: summaries, error } = await supabase + .from('meetingsummaries') + .select('created_at, meeting_id, summary') + .order('created_at', { ascending: true }) + .limit(BATCH_SIZE * MAX_CONCURRENT_REQUESTS) + .gt('created_at', lastProcessedTimestamp || '1970-01-01'); + + if (error) { + console.error('Error retrieving meeting summaries:', error); + return { + statusCode: 500, + body: JSON.stringify({ error: 'Failed to retrieve meeting summaries' }), + }; + } + + if (summaries.length === 0) { + hasMoreSummaries = false; + break; + } + + // Group summaries by meeting_id + summaries.forEach(summary => { + const { meeting_id, summary: summaryText } = summary; + if (!allSummaries[meeting_id]) { + allSummaries[meeting_id] = []; + } + allSummaries[meeting_id].push(summaryText); + }); + + lastProcessedTimestamp = summaries[summaries.length - 1].created_at; + } + + // Commit all summaries to GitHub in a single file + const octokit = new Octokit({ + auth: process.env.GITHUB_TOKEN, + }); + + // Get the current SHA of the file + let currentSHA = null; + try { + const { data: currentFile } = await octokit.repos.getContent({ + owner: "SingularityNET-Archive", + repo: "SingularityNET-Archive", + path: "Data/Meeting-Summaries/meeting-summaries-by-id.json", + }); + currentSHA = currentFile.sha; + } catch (error) { + if (error.status !== 404) { + throw error; + } + } + + const { data } = await octokit.repos.createOrUpdateFileContents({ + owner: "SingularityNET-Archive", + repo: "SingularityNET-Archive", + path: "Data/Meeting-Summaries/meeting-summaries-by-id.json", + message: "Update meeting summaries", + content: Buffer.from(JSON.stringify(allSummaries, null, 2)).toString('base64'), + sha: currentSHA, + }); + + return { + statusCode: 200, + body: JSON.stringify({ message: 'Meeting summaries updated successfully' }), + }; + } catch (error) { + console.error('Error in updateGitHubRepo function:', error); + return { + statusCode: 500, + body: JSON.stringify({ error: 'Failed to update meeting summaries' }), + }; + } +}; \ No newline at end of file diff --git a/netlify/functions/commitToGitHub.js b/netlify/functions/commitToGitHub.js deleted file mode 100644 index 710be0a..0000000 --- a/netlify/functions/commitToGitHub.js +++ /dev/null @@ -1,31 +0,0 @@ -// netlify/functions/commitToGitHub.js -import { Octokit } from "@octokit/rest"; - -export const handler = async (event, context) => { - try { - const { owner, repo, filePath, content, commitMessage } = JSON.parse(event.body); - - const octokit = new Octokit({ - auth: process.env.GITHUB_TOKEN, - }); - - const { data } = await octokit.repos.createOrUpdateFileContents({ - owner, - repo, - path: filePath, - message: commitMessage, - content: Buffer.from(JSON.stringify(content, null, 2)).toString('base64'), - }); - - return { - statusCode: 200, - body: JSON.stringify({ message: 'Data committed to GitHub successfully' }), - }; - } catch (error) { - console.error('Error in commitToGitHub function:', error); - return { - statusCode: 500, - body: JSON.stringify({ error: 'Failed to commit data to GitHub' }), - }; - } -}; \ No newline at end of file diff --git a/netlify/functions/getMeetingSummaries.js b/netlify/functions/getMeetingSummaries.js deleted file mode 100644 index 5028f1f..0000000 --- a/netlify/functions/getMeetingSummaries.js +++ /dev/null @@ -1,29 +0,0 @@ -// netlify/functions/getMeetingSummaries.js -import { supabase } from '../../lib/supabaseClient'; - -export const handler = async (event, context) => { - try { - const { data, error } = await supabase - .from('meetingsummaries') - .select('summary'); - - if (error) { - console.error('Error retrieving meeting summaries:', error); - return { - statusCode: 500, - body: JSON.stringify({ error: 'Failed to retrieve meeting summaries' }), - }; - } - - return { - statusCode: 200, - body: JSON.stringify(data), - }; - } catch (error) { - console.error('Error in getMeetingSummaries function:', error); - return { - statusCode: 500, - body: JSON.stringify({ error: 'Internal server error' }), - }; - } -}; \ No newline at end of file