From bb489120246cfcf6a13d0fb7a94fd8583c14e1ad Mon Sep 17 00:00:00 2001 From: Arun Kumar S Date: Thu, 2 Oct 2025 21:21:31 +0530 Subject: [PATCH] added seroski-dupbot to detect duplicates in the issue --- .github/scripts/check-duplicates.js | 486 ++++++++++++++++++++ .github/scripts/cleanup-closed-issue.js | 271 +++++++++++ .github/scripts/cleanup-duplicates.js | 195 ++++++++ .github/scripts/cleanup-specific-issue.js | 148 ++++++ .github/scripts/clear-all-vectors.js | 197 ++++++++ .github/scripts/debug-pinecone.js | 96 ++++ .github/scripts/populate-existing-issues.js | 349 ++++++++++++++ .github/scripts/validate-apis.js | 175 +++++++ .github/workflows/api-validation.yml | 84 ++++ .github/workflows/database-operations.yml | 104 +++++ .github/workflows/duplicate-issue.yml | 99 ++++ .github/workflows/issue_greetings.yml | 25 - 12 files changed, 2204 insertions(+), 25 deletions(-) create mode 100644 .github/scripts/check-duplicates.js create mode 100644 .github/scripts/cleanup-closed-issue.js create mode 100644 .github/scripts/cleanup-duplicates.js create mode 100644 .github/scripts/cleanup-specific-issue.js create mode 100644 .github/scripts/clear-all-vectors.js create mode 100644 .github/scripts/debug-pinecone.js create mode 100644 .github/scripts/populate-existing-issues.js create mode 100644 .github/scripts/validate-apis.js create mode 100644 .github/workflows/api-validation.yml create mode 100644 .github/workflows/database-operations.yml create mode 100644 .github/workflows/duplicate-issue.yml delete mode 100644 .github/workflows/issue_greetings.yml diff --git a/.github/scripts/check-duplicates.js b/.github/scripts/check-duplicates.js new file mode 100644 index 0000000..9683379 --- /dev/null +++ b/.github/scripts/check-duplicates.js @@ -0,0 +1,486 @@ +import { Octokit } from "@octokit/rest"; +import fetch from "node-fetch"; +import { Pinecone } from "@pinecone-database/pinecone"; + +const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); +const OWNER = process.env.GITHUB_REPOSITORY.split("/")[0]; +const REPO = process.env.GITHUB_REPOSITORY.split("/")[1]; +const ISSUE_NUMBER = Number(process.env.ISSUE_NUMBER); +const SIMILARITY_THRESHOLD = parseFloat( + process.env.SIMILARITY_THRESHOLD || "0.5" +); + +// Initialize Pinecone client +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; + +// Retry logic for API calls +async function retryApiCall(apiCall, maxRetries = 3, delay = 1000) { + for (let i = 0; i < maxRetries; i++) { + try { + return await apiCall(); + } catch (error) { + if (i === maxRetries - 1) throw error; + if (error.status === 429 || error.status >= 500) { + console.log( + `API call failed (attempt ${i + 1}), retrying in ${delay}ms...` + ); + await new Promise((resolve) => setTimeout(resolve, delay)); + delay *= 2; + } else { + throw error; + } + } + } +} + +// Safe vector operation with fallback +async function safeVectorOperation(operation, fallbackMessage) { + try { + return await operation(); + } catch (error) { + console.error("โŒ Vector database error:", error.message); + + await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: + `๐Ÿ”ง **Temporary Service Issue** ๐Ÿ”ง\n\n` + + `${fallbackMessage}\n\n` + + `Our duplicate detection service is temporarily unavailable. ` + + `A maintainer will review this issue manually.\n\n` + + `*This comment was generated automatically by Seroski-DupBot ๐Ÿค–*` + + `\n\nCheck out the developer: [Portfolio](https://portfolio.rosk.dev)`, + }); + + throw error; + } +} + +async function run() { + console.log(`\n=== Checking issue #${ISSUE_NUMBER} for duplicates ===`); + + const { data: newIssue } = await retryApiCall(async () => { + return await octokit.issues.get({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + }); + }); + + if (newIssue.pull_request) { + console.log("โญ๏ธ Skipping pull request - not an issue"); + return; + } + + const newText = `${newIssue.title} ${newIssue.body || ""}`.trim(); + console.log(`Issue text: ${newText.substring(0, 100)}...`); + + if (newText.length < 10) { + console.log("โš ๏ธ Issue text too short for meaningful duplicate detection"); + await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: + `๐Ÿ“ **Issue Too Short for Analysis** ๐Ÿ“\n\n` + + `This issue appears to have very little content. For better duplicate detection, please consider:\n\n` + + `- Adding more details about the problem\n` + + `- Including steps to reproduce\n` + + `- Describing expected vs actual behavior\n\n` + + `*This comment was generated automatically by Seroski-DupBot ๐Ÿค–*` + + `\n\nCheck out the developer: [Portfolio](https://portfolio.rosk.dev)`, + }); + return; + } + + console.log("Generating embedding for the new issue..."); + + const generateEmbedding = async (text) => { + return await retryApiCall(async () => { + const response = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=${process.env.GEMINI_API_KEY}`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: "models/text-embedding-004", + content: { parts: [{ text: text }] }, + }), + } + ); + const data = await response.json(); + + if (data.error || !data.embedding || !data.embedding.values) { + console.error("Embedding error:", data.error || "Invalid response"); + return Array(1024).fill(0.01); + } + + let embedding = data.embedding.values; + if (embedding.length < 1024) { + embedding = [...embedding, ...Array(1024 - embedding.length).fill(0)]; + } else if (embedding.length > 1024) { + embedding = embedding.slice(0, 1024); + } + + return embedding; + }); + }; + + const newEmbedding = await generateEmbedding(newText); + console.log("โœ… Generated embedding for new issue"); + + const index = pinecone.Index(indexName); + console.log("Checking if issue already exists in vector database..."); + + let existingVectorIds = []; + let isEditingExistingIssue = false; + + try { + await safeVectorOperation(async () => { + // Try to find existing vectors using metadata filter + const queryResponse = await index.query({ + vector: Array(1024).fill(0.1), + topK: 100, + includeValues: false, + includeMetadata: true, + filter: { + issue_number: ISSUE_NUMBER, + }, + }); + + if (queryResponse.matches && queryResponse.matches.length > 0) { + for (const match of queryResponse.matches) { + existingVectorIds.push(match.id); + console.log(` ๐Ÿ“Œ Found existing vector via filter: ${match.id}`); + } + } else { + console.log( + " ๐Ÿ”„ Filter query returned no results, trying list approach..." + ); + let paginationToken = null; + + do { + const listOptions = { limit: 100 }; + if (paginationToken) { + listOptions.paginationToken = paginationToken; + } + + const listResponse = await index.listPaginated(listOptions); + + if (listResponse.vectors) { + for (const vector of listResponse.vectors) { + if (vector.metadata?.issue_number === ISSUE_NUMBER) { + existingVectorIds.push(vector.id); + console.log( + ` ๐Ÿ“Œ Found existing vector via list: ${vector.id}` + ); + } + } + } + + paginationToken = listResponse.pagination?.next; + } while (paginationToken); + } + + isEditingExistingIssue = existingVectorIds.length > 0; + console.log( + `Issue exists in DB: ${isEditingExistingIssue ? "YES" : "NO"} (${ + existingVectorIds.length + } vectors found)` + ); + }, "Could not check for existing issue vectors in the database."); + } catch (error) { + console.error( + "Vector database check failed, continuing with basic processing..." + ); + } + + let results = []; + let filteredResults = []; + let duplicates = []; + + try { + await safeVectorOperation(async () => { + console.log("Querying Pinecone for similar issues..."); + const queryResponse = await index.query({ + vector: newEmbedding, + topK: 10, + includeValues: false, + includeMetadata: true, + }); + + results = queryResponse.matches || []; + console.log(`Found ${results.length} potential matches`); + + filteredResults = results.filter( + (r) => r.metadata?.issue_number !== ISSUE_NUMBER + ); + + console.log( + `After filtering out current issue: ${filteredResults.length} matches` + ); + + // Get all potential duplicates above 0.55 threshold for 3-tier system + duplicates = filteredResults + .filter((r) => r.score >= 0.55) + .map((r) => ({ + number: r.metadata?.issue_number || "Unknown", + similarity: r.score, + title: r.metadata?.title || "Unknown", + })) + .sort((a, b) => b.similarity - a.similarity); // Sort by highest similarity first + + console.log( + `Found ${duplicates.length} potential matches above 0.55 similarity threshold` + ); + + filteredResults.forEach((result, index) => { + const score = result.score || 0; + let category = "โœ… Below threshold"; + if (score >= 0.85) category = "๐Ÿšจ HIGH DUPLICATE"; + else if (score >= 0.55) category = "๐Ÿค” POTENTIALLY RELATED"; + + console.log( + ` ${index + 1}. Issue #${ + result.metadata?.issue_number || "Unknown" + } - Score: ${score.toFixed(4)} ${category}` + ); + console.log(` Title: "${result.metadata?.title || "No title"}"`); + }); + }, "Could not query the vector database for similar issues."); + } catch (error) { + console.error("Duplicate detection failed, treating as unique issue..."); + } + + // 3-tier duplicate detection system + let commentBody = ""; + let shouldUpdateVector = true; + let shouldAutoClose = false; + let duplicateAction = "none"; + + // Categorize duplicates by similarity score + const highSimilarityDuplicates = duplicates.filter(d => d.similarity >= 0.85); + const mediumSimilarityDuplicates = duplicates.filter(d => d.similarity >= 0.55 && d.similarity < 0.85); + + if (highSimilarityDuplicates.length > 0) { + // TIER 1: High similarity (>= 0.85) - Auto-close as duplicate + duplicateAction = "auto-close"; + shouldUpdateVector = false; + shouldAutoClose = !isEditingExistingIssue; + + const topMatch = highSimilarityDuplicates[0]; + const similarityPercent = (topMatch.similarity * 100).toFixed(1); + + if (isEditingExistingIssue) { + commentBody = `๐Ÿšจ **Warning: Edited Issue Now Appears as Duplicate** ๐Ÿšจ\n\n`; + commentBody += `After your recent edit, this issue appears to be a duplicate of:\n\n`; + commentBody += `- Issue #${topMatch.number}: "${topMatch.title}" (${similarityPercent}% similar)\n`; + commentBody += ` Link: https://github.com/${OWNER}/${REPO}/issues/${topMatch.number}\n\n`; + commentBody += `โš ๏ธ **Note**: Since this was previously a unique issue, we've kept it open but flagged this high similarity for your attention.\n\n`; + } else { + commentBody = `๐Ÿšจ **Duplicate Detected** ๐Ÿšจ\n\n`; + commentBody += `This issue appears to be a duplicate of:\n\n`; + commentBody += `- Issue #${topMatch.number}: "${topMatch.title}" (${similarityPercent}% similar)\n`; + commentBody += ` Link: https://github.com/${OWNER}/${REPO}/issues/${topMatch.number}\n\n`; + commentBody += `๐Ÿ”’ **This issue has been automatically closed as a duplicate.**\n\n`; + commentBody += `Please continue the discussion in the original issue above. If your problem is different, please open a new issue with more specific details.\n\n`; + } + + console.log(`๐Ÿšจ HIGH SIMILARITY DUPLICATE detected! Similarity: ${similarityPercent}% with issue #${topMatch.number}`); + + } else if (mediumSimilarityDuplicates.length > 0) { + // TIER 2: Medium similarity (0.55-0.84) - Flag as potentially related + duplicateAction = "flag-related"; + shouldUpdateVector = true; // Still add to vector DB for unique issues + shouldAutoClose = false; + + const topMatch = mediumSimilarityDuplicates[0]; + const similarityPercent = (topMatch.similarity * 100).toFixed(1); + + if (isEditingExistingIssue) { + commentBody = `๐Ÿค” **Potentially Related Issue After Edit** ๐Ÿค”\n\n`; + commentBody += `After your recent edit, this issue seems related to:\n\n`; + } else { + commentBody = `๐Ÿค” **Potentially Related Issue Found** ๐Ÿค”\n\n`; + commentBody += `This issue seems related to:\n\n`; + } + + commentBody += `- Issue #${topMatch.number}: "${topMatch.title}" (${similarityPercent}% similar)\n`; + commentBody += ` Link: https://github.com/${OWNER}/${REPO}/issues/${topMatch.number}\n\n`; + commentBody += `This issue is not identical but may be related. A maintainer will review to determine if they should be linked or if this is indeed a separate issue.\n\n`; + + console.log(`๐Ÿค” POTENTIALLY RELATED issue detected! Similarity: ${similarityPercent}% with issue #${topMatch.number}`); + + } else { + // TIER 3: Low similarity (< 0.55) - Treat as unique + duplicateAction = "unique"; + shouldUpdateVector = true; + shouldAutoClose = false; + + if (isEditingExistingIssue) { + commentBody = `โœ… **Issue Updated Successfully** โœ…\n\n`; + commentBody += `@${newIssue.user.login}, your edit has been processed and the issue still appears to be unique. Our duplicate detection database has been updated with your changes.\n\n`; + commentBody += `Thank you for keeping your issue up to date! ๐Ÿ”„\n\n`; + } else { + commentBody += `Thank you @${newIssue.user.login} for finding and contributing this unique issue! This appears to be a new problem that hasn't been reported before.\n\n`; + commentBody += `Your contribution helps make this project better. We appreciate you taking the time to report this! ๐Ÿ™\n\n`; + } + + console.log(`โœ… UNIQUE issue confirmed. No similar issues found above 0.55 threshold.`); + } + + commentBody += `*This comment was generated automatically by Seroski-DupBot ๐Ÿค–*\n\nCheck out the developer: [Portfolio](https://portfolio.rosk.dev)`; + + console.log(`๐Ÿ“Š Duplicate Detection Summary:`); + console.log(` Action: ${duplicateAction}`); + console.log(` Will auto-close: ${shouldAutoClose}`); + console.log(` Will update vectors: ${shouldUpdateVector}`); + + // Post the comment first + await retryApiCall(async () => { + return await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: commentBody, + }); + }); + console.log("Comment posted on the issue."); + + // Handle auto-closure for high similarity duplicates (>= 0.85) + if (shouldAutoClose && duplicateAction === "auto-close") { + try { + console.log(`๐Ÿ”„ Auto-closing issue #${ISSUE_NUMBER} as duplicate...`); + + // First add the duplicate label + await retryApiCall(async () => { + return await octokit.issues.addLabels({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + labels: ['duplicate'] + }); + }); + + console.log(`๐Ÿท๏ธ Added 'duplicate' label to issue #${ISSUE_NUMBER}`); + + // Then close the issue with 'not_planned' state reason + await retryApiCall(async () => { + return await octokit.issues.update({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + state: 'closed', + state_reason: 'duplicate' + }); + }); + + console.log(`๐Ÿ”’ Issue #${ISSUE_NUMBER} has been auto-closed as duplicate`); + + } catch (error) { + console.error(`โŒ Failed to auto-close issue #${ISSUE_NUMBER}:`, error.message); + + // Post error comment if automatic closure fails + try { + await retryApiCall(async () => { + return await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: `โš ๏ธ **Auto-close Failed** โš ๏ธ\n\nThis issue was detected as a high-confidence duplicate but could not be automatically closed. A maintainer will review this manually.\n\n*Error: ${error.message}*` + }); + }); + } catch (commentError) { + console.error(`โŒ Failed to post error comment: ${commentError.message}`); + } + } + } else if (duplicateAction === "flag-related") { + console.log(`๐Ÿค” Issue #${ISSUE_NUMBER} flagged as potentially related - no auto-action taken`); + } else if (duplicateAction === "unique") { + console.log(`โœ… Issue #${ISSUE_NUMBER} confirmed as unique - will process normally`); + } + + // Continue with vector database updates only for unique issues + if (shouldUpdateVector) { + try { + await safeVectorOperation(async () => { + if (isEditingExistingIssue) { + console.log("Updating existing issue vectors in Pinecone..."); + + if (existingVectorIds.length > 0) { + await index.deleteMany(existingVectorIds); + console.log( + `๐Ÿ—‘๏ธ Deleted ${existingVectorIds.length} old vector(s)` + ); + } + + const vectorId = `issue-${ISSUE_NUMBER}-${Date.now()}`; + await index.upsert([ + { + id: vectorId, + values: newEmbedding, + metadata: { + issue_number: ISSUE_NUMBER, + title: newIssue.title, + content: newText, + created_at: newIssue.created_at, + updated_at: newIssue.updated_at, + url: newIssue.html_url, + }, + }, + ]); + + console.log( + "โœ… Updated issue embedding in Pinecone with new content." + ); + } else { + console.log("Adding new issue embedding to Pinecone..."); + + const vectorId = `issue-${ISSUE_NUMBER}-${Date.now()}`; + await index.upsert([ + { + id: vectorId, + values: newEmbedding, + metadata: { + issue_number: ISSUE_NUMBER, + title: newIssue.title, + content: newText, + created_at: newIssue.created_at, + url: newIssue.html_url, + }, + }, + ]); + + console.log( + "โœ… New issue embedding stored in Pinecone for future duplicate detection." + ); + } + }, "Could not update the vector database."); + } catch (error) { + console.error( + "Failed to update vector database, but issue processing completed." + ); + } + } else { + if (duplicateAction === "auto-close") { + console.log("โญ๏ธ Skipped adding to Pinecone due to high-confidence duplicate detection and auto-closure."); + } else if (duplicateAction === "flag-related") { + console.log("โœ… Added to Pinecone despite potential relation - issue treated as separate."); + } else if (isEditingExistingIssue) { + console.log("โš ๏ธ Keeping existing vectors unchanged due to similarity detected after edit."); + } + } + + console.log( + `\n=== Duplicate check completed for issue #${ISSUE_NUMBER} ===\n` + ); +} + +run().catch((err) => console.error(err)); diff --git a/.github/scripts/cleanup-closed-issue.js b/.github/scripts/cleanup-closed-issue.js new file mode 100644 index 0000000..6f378a8 --- /dev/null +++ b/.github/scripts/cleanup-closed-issue.js @@ -0,0 +1,271 @@ +import { Octokit } from "@octokit/rest"; +import { Pinecone } from "@pinecone-database/pinecone"; + +const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); +const OWNER = + process.env.GITHUB_REPOSITORY?.split("/")[0] || process.env.GITHUB_OWNER; +const REPO = + process.env.GITHUB_REPOSITORY?.split("/")[1] || process.env.GITHUB_REPO; +const ISSUE_NUMBER = Number(process.env.ISSUE_NUMBER); + +// Initialize Pinecone client +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; + +// Retry logic for API calls +async function retryApiCall(apiCall, maxRetries = 3, delay = 1000) { + for (let i = 0; i < maxRetries; i++) { + try { + return await apiCall(); + } catch (error) { + if (i === maxRetries - 1) throw error; + if (error.status === 429 || error.status >= 500) { + console.log( + `API call failed (attempt ${i + 1}), retrying in ${delay}ms...` + ); + await new Promise((resolve) => setTimeout(resolve, delay)); + delay *= 2; // Exponential backoff + } else { + throw error; // Don't retry for other errors + } + } + } +} + +async function cleanupClosedIssue() { + console.log( + `\n=== Cleaning up closed issue #${ISSUE_NUMBER} from vector database ===` + ); + console.log(`Repository: ${OWNER}/${REPO}`); + console.log(`Pinecone Index: ${indexName}`); + + if (!OWNER || !REPO) { + console.error( + "โŒ Repository owner and name must be specified via GITHUB_REPOSITORY or GITHUB_OWNER/GITHUB_REPO environment variables" + ); + process.exit(1); + } + + if (!ISSUE_NUMBER) { + console.error( + "โŒ Issue number must be specified via ISSUE_NUMBER environment variable" + ); + process.exit(1); + } + + try { + // Initialize Pinecone index + const index = pinecone.Index(indexName); + console.log("โœ… Connected to Pinecone index"); + + // Fetch the closed issue details for logging with retry logic + const { data: closedIssue } = await retryApiCall(async () => { + return await octokit.issues.get({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + }); + }); + + // Skip if it's actually a pull request + if (closedIssue.pull_request) { + console.log("โญ๏ธ Skipping pull request cleanup - not an issue"); + return; + } + + console.log(`๐Ÿ“„ Issue details:`); + console.log(` Title: "${closedIssue.title}"`); + console.log(` State: ${closedIssue.state}`); + console.log(` Closed at: ${closedIssue.closed_at}`); + + // Query Pinecone to find vectors for this issue with retry logic + console.log( + `๐Ÿ” Searching for vectors related to issue #${ISSUE_NUMBER}...` + ); + + const vectorsToDelete = []; + + try { + await retryApiCall(async () => { + // First, try using metadata filter (same as check-duplicates.js) + const queryResponse = await index.query({ + vector: Array(1024).fill(0.1), // dummy vector for metadata filtering + topK: 100, + includeValues: false, + includeMetadata: true, + filter: { + issue_number: ISSUE_NUMBER, + }, + }); + + // If filter query works, use those results + if (queryResponse.matches && queryResponse.matches.length > 0) { + for (const match of queryResponse.matches) { + vectorsToDelete.push(match.id); + console.log(` ๐Ÿ“Œ Found vector via filter: ${match.id}`); + } + } else { + // Fallback to listing all vectors (paginated approach) + console.log( + " ๐Ÿ”„ Filter query returned no results, trying list approach..." + ); + let paginationToken = null; + + do { + const listOptions = { limit: 100 }; + if (paginationToken) { + listOptions.paginationToken = paginationToken; + } + + const listResponse = await index.listPaginated(listOptions); + + if (listResponse.vectors) { + for (const vector of listResponse.vectors) { + if (vector.metadata?.issue_number === ISSUE_NUMBER) { + vectorsToDelete.push(vector.id); + console.log(` ๐Ÿ“Œ Found vector via list: ${vector.id}`); + } + } + } + + paginationToken = listResponse.pagination?.next; + } while (paginationToken); + } + }); + } catch (error) { + console.error( + "โŒ Failed to search vectors from Pinecone:", + error.message + ); + throw error; + } + + console.log(`Found ${vectorsToDelete.length} vector(s) to delete`); + + if (vectorsToDelete.length === 0) { + console.log( + `โ„น๏ธ No vectors found for issue #${ISSUE_NUMBER}. It may have been a duplicate issue that was never added to the vector database.` + ); + + // Still post a cleanup confirmation comment with retry logic + await retryApiCall(async () => { + return await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: + `๐Ÿงน **Issue Cleanup Completed** ๐Ÿงน\n\n` + + `This issue has been closed and checked for cleanup. No vectors were found in the database ` + + `(likely because it was detected as a duplicate and never stored).\n\n` + + `*This comment was generated automatically by Seroski-DupBot ๐Ÿค–*` + + `\n\nCheck out the developer: [Portfolio](https://portfolio.rosk.dev)`, + }); + }); + + console.log("โœ… Cleanup confirmation comment posted"); + return; + } + + // Delete the vectors from Pinecone with retry logic + console.log( + `๐Ÿ—‘๏ธ Deleting ${vectorsToDelete.length} vector(s) from Pinecone...` + ); + + try { + await retryApiCall(async () => { + return await index.deleteMany(vectorsToDelete); + }); + console.log( + `โœ… Successfully deleted ${vectorsToDelete.length} vector(s) from Pinecone` + ); + } catch (deleteError) { + console.error(`โŒ Error deleting vectors:`, deleteError.message); + throw deleteError; + } + + // Post a comment on the closed issue confirming cleanup with retry logic + const commentBody = + `๐Ÿงน **Issue Cleanup Completed** ๐Ÿงน\n\n` + + `This closed issue has been automatically removed from our duplicate detection database.\n\n` + + `**Cleanup Details:**\n` + + `- Vectors removed: ${vectorsToDelete.length}\n` + + `- Cleaned at: ${new Date().toISOString()}\n\n` + + `This helps keep our duplicate detection system accurate and prevents closed issues ` + + `from being referenced in future duplicate checks.\n\n` + + `*This comment was generated automatically by Seroski-DupBot ๐Ÿค–*` + + `\n\nCheck out the developer: [Portfolio](https://portfolio.rosk.dev)`; + + await retryApiCall(async () => { + return await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: commentBody, + }); + }); + + console.log("โœ… Cleanup confirmation comment posted on the issue"); + + console.log(`\n=== Cleanup Summary ===`); + console.log(`๐Ÿ“Š Issue #${ISSUE_NUMBER}: "${closedIssue.title}"`); + console.log(`๐Ÿ—‘๏ธ Vectors deleted: ${vectorsToDelete.length}`); + console.log(`โœ… Database cleanup completed successfully`); + console.log(`๐Ÿ’ฌ Confirmation comment posted`); + } catch (error) { + console.error("โŒ Error during cleanup:", error); + + // Try to post an error comment if possible with retry logic + try { + await retryApiCall(async () => { + return await octokit.issues.createComment({ + owner: OWNER, + repo: REPO, + issue_number: ISSUE_NUMBER, + body: + `โš ๏ธ **Issue Cleanup Failed** โš ๏ธ\n\n` + + `There was an error while trying to clean up this closed issue from our duplicate detection database.\n\n` + + `**Error:** ${error.message}\n\n` + + `A maintainer may need to manually review the vector database cleanup.\n\n` + + `*This comment was generated automatically by Seroski-DupBot ๐Ÿค–*` + + `\n\nCheck out the developer: [Portfolio](https://portfolio.rosk.dev)`, + }); + }); + } catch (commentError) { + console.error("โŒ Failed to post error comment:", commentError.message); + } + + process.exit(1); + } +} + +// Handle command line arguments +const args = process.argv.slice(2); +if (args.includes("--help") || args.includes("-h")) { + console.log(` +๐Ÿ“– Usage: node scripts/cleanup-closed-issue.js + +๐Ÿ”ง Required Environment Variables: + - GITHUB_TOKEN: GitHub personal access token + - GITHUB_REPOSITORY: Repository in format "owner/repo" (or use GITHUB_OWNER + GITHUB_REPO) + - ISSUE_NUMBER: Issue number to clean up + - PINECONE_API_KEY: Pinecone API key + - PINECONE_INDEX: Pinecone index name + +๐Ÿ“ This script will: + 1. Find all vectors in Pinecone related to the specified issue number + 2. Delete those vectors from the Pinecone index + 3. Post a confirmation comment on the closed issue + +โš ๏ธ Note: This script is typically called automatically by GitHub Actions when issues are closed. + `); + process.exit(0); +} + +// Run the cleanup script +cleanupClosedIssue().catch((error) => { + console.error("๐Ÿ’ฅ Cleanup script failed:", error); + process.exit(1); +}); diff --git a/.github/scripts/cleanup-duplicates.js b/.github/scripts/cleanup-duplicates.js new file mode 100644 index 0000000..8fc085b --- /dev/null +++ b/.github/scripts/cleanup-duplicates.js @@ -0,0 +1,195 @@ +import { Pinecone } from "@pinecone-database/pinecone"; +import dotenv from "dotenv"; + +// Load environment variables +dotenv.config(); + +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; + +// Add delay to respect API rate limits +function delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +async function cleanupDuplicates() { + console.log(`\n=== Cleaning up duplicate vectors in Pinecone ===`); + console.log(`Pinecone Index: ${indexName}`); + + try { + const index = pinecone.Index(indexName); + console.log("โœ… Connected to Pinecone index"); + + // Get all vectors + console.log("๐Ÿ“ฅ Fetching all vectors..."); + const allVectors = await index.query({ + vector: Array(1024).fill(0.1), + topK: 1000, // Should be enough for all vectors + includeMetadata: true, + includeValues: false + }); + + if (!allVectors.matches || allVectors.matches.length === 0) { + console.log("โ„น๏ธ No vectors found in the index."); + return; + } + + console.log(`๐Ÿ“Š Found ${allVectors.matches.length} total vectors`); + + // Group vectors by issue number + const vectorsByIssue = new Map(); + + for (const vector of allVectors.matches) { + const issueNumber = vector.metadata?.issue_number; + if (issueNumber) { + if (!vectorsByIssue.has(issueNumber)) { + vectorsByIssue.set(issueNumber, []); + } + vectorsByIssue.get(issueNumber).push(vector); + } + } + + console.log(`๐Ÿ” Found vectors for ${vectorsByIssue.size} different issues`); + + // Find duplicates and decide which to keep + const vectorsToDelete = []; + const vectorsToKeep = []; + + for (const [issueNumber, vectors] of vectorsByIssue) { + console.log(`\n๐Ÿ“‹ Issue #${issueNumber}: ${vectors.length} vector(s)`); + + if (vectors.length === 1) { + console.log(` โœ… No duplicates for issue #${issueNumber}`); + vectorsToKeep.push(vectors[0]); + } else { + console.log(` ๐Ÿ” Found ${vectors.length} vectors, selecting which to keep...`); + + // Sort vectors: prefer non-timestamped IDs (clean format) + vectors.sort((a, b) => { + const aHasTimestamp = /-\d{13}/.test(a.id); + const bHasTimestamp = /-\d{13}/.test(b.id); + + if (!aHasTimestamp && bHasTimestamp) return -1; // a comes first (keep a) + if (aHasTimestamp && !bHasTimestamp) return 1; // b comes first (keep b) + return a.id.localeCompare(b.id); // alphabetical if both same type + }); + + const toKeep = vectors[0]; + const toDelete = vectors.slice(1); + + console.log(` โœ… Keeping: ${toKeep.id}`); + vectorsToKeep.push(toKeep); + + toDelete.forEach(v => { + console.log(` ๐Ÿ—‘๏ธ Deleting: ${v.id}`); + vectorsToDelete.push(v.id); + }); + } + } + + console.log(`\n๐Ÿ“Š Summary:`); + console.log(` โœ… Vectors to keep: ${vectorsToKeep.length}`); + console.log(` ๐Ÿ—‘๏ธ Vectors to delete: ${vectorsToDelete.length}`); + + if (vectorsToDelete.length === 0) { + console.log("๐ŸŽ‰ No cleanup needed! All vectors are unique."); + return; + } + + // Confirm before deletion + console.log(`\nโš ๏ธ About to delete ${vectorsToDelete.length} duplicate vectors.`); + console.log("๐Ÿ” Vectors to delete:"); + vectorsToDelete.forEach(id => console.log(` - ${id}`)); + + // Delete in batches + console.log("\n๐Ÿงน Starting cleanup..."); + const batchSize = 100; // Pinecone delete limit + let deleted = 0; + + for (let i = 0; i < vectorsToDelete.length; i += batchSize) { + const batch = vectorsToDelete.slice(i, i + batchSize); + + try { + await index.deleteMany(batch); + deleted += batch.length; + console.log(` ๐Ÿ—‘๏ธ Deleted batch: ${batch.length} vectors (total: ${deleted}/${vectorsToDelete.length})`); + + // Add delay between batches + await delay(1000); + } catch (error) { + console.error(` โŒ Failed to delete batch:`, error.message); + console.error(` Batch IDs: ${batch.join(', ')}`); + } + } + + console.log(`\n๐ŸŽ‰ Cleanup completed!`); + console.log(`โœ… Deleted: ${deleted}/${vectorsToDelete.length} duplicate vectors`); + console.log(`๐Ÿ“Š Remaining vectors: ${vectorsToKeep.length} (one per issue)`); + + // Verify cleanup + console.log("\n๐Ÿ” Verifying cleanup..."); + await delay(2000); // Wait for Pinecone to sync + + const finalStats = await index.describeIndexStats(); + const finalCount = finalStats.totalRecordCount || 0; + console.log(`๐Ÿ“Š Final vector count: ${finalCount}`); + + if (finalCount === vectorsToKeep.length) { + console.log("โœ… Cleanup verification successful!"); + } else { + console.log(`โš ๏ธ Expected ${vectorsToKeep.length} vectors, but found ${finalCount}`); + } + + } catch (error) { + console.error("โŒ Error during cleanup:", error); + process.exit(1); + } +} + +// Handle command line arguments +const args = process.argv.slice(2); +if (args.includes('--help') || args.includes('-h')) { + console.log(` +๐Ÿ“– Usage: node scripts/cleanup-duplicates.js + +๐Ÿ”ง Required Environment Variables: + - PINECONE_API_KEY: Pinecone API key + - PINECONE_INDEX: Pinecone index name + +๐Ÿ“ This script will: + 1. Find all vectors in your Pinecone index + 2. Group them by issue number + 3. Identify and remove duplicate vectors + 4. Keep only one vector per issue (preferring clean IDs) + +โš ๏ธ WARNING: This will permanently delete duplicate vectors! + `); + process.exit(0); +} + +// Confirmation prompt for safety +if (!args.includes('--force')) { + console.log(` +โš ๏ธ WARNING: This script will delete duplicate vectors from your Pinecone index! + +๐Ÿ“‹ What it will do: + โ€ข Find all vectors with the same issue_number + โ€ข Keep the vector with the cleanest ID format (without timestamp) + โ€ข Delete all other duplicate vectors + +๐Ÿšจ This action cannot be undone! + +To proceed, run: node scripts/cleanup-duplicates.js --force +To see help: node scripts/cleanup-duplicates.js --help + `); + process.exit(0); +} + +// Run the cleanup +cleanupDuplicates().catch(error => { + console.error("๐Ÿ’ฅ Script failed:", error); + process.exit(1); +}); \ No newline at end of file diff --git a/.github/scripts/cleanup-specific-issue.js b/.github/scripts/cleanup-specific-issue.js new file mode 100644 index 0000000..44b1ef3 --- /dev/null +++ b/.github/scripts/cleanup-specific-issue.js @@ -0,0 +1,148 @@ +import { Pinecone } from "@pinecone-database/pinecone"; +import dotenv from "dotenv"; + +// Load environment variables +dotenv.config(); + +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; +const ISSUE_TO_DELETE = process.env.ISSUE_NUMBER || process.argv[2]; + +async function deleteIssueVectors() { + console.log(`\n=== Deleting vectors for Issue #${ISSUE_TO_DELETE} ===`); + console.log(`Pinecone Index: ${indexName}`); + + if (!ISSUE_TO_DELETE) { + console.error("โŒ Please provide an issue number:"); + console.error(" Usage: ISSUE_NUMBER=6 node scripts/cleanup-specific-issue.js"); + console.error(" Or: node scripts/cleanup-specific-issue.js 6"); + process.exit(1); + } + + try { + const index = pinecone.Index(indexName); + console.log("โœ… Connected to Pinecone index"); + + // Find all vectors for this issue + console.log(`๐Ÿ” Searching for vectors related to issue #${ISSUE_TO_DELETE}...`); + + const vectorsToDelete = []; + + try { + // First, try using metadata filter + const queryResponse = await index.query({ + vector: Array(1024).fill(0.1), // dummy vector for metadata filtering + topK: 100, + includeValues: false, + includeMetadata: true, + filter: { + issue_number: parseInt(ISSUE_TO_DELETE) + } + }); + + if (queryResponse.matches && queryResponse.matches.length > 0) { + for (const match of queryResponse.matches) { + vectorsToDelete.push(match.id); + console.log(` ๐Ÿ“Œ Found vector via filter: ${match.id}`); + console.log(` Metadata:`, JSON.stringify(match.metadata, null, 2)); + } + } else { + console.log(" ๐Ÿ”„ Filter query returned no results, trying list approach..."); + + // Fallback: List all vectors and filter + let paginationToken = null; + + do { + const listOptions = { limit: 100 }; + if (paginationToken) { + listOptions.paginationToken = paginationToken; + } + + const listResponse = await index.listPaginated(listOptions); + + if (listResponse.vectors) { + for (const vector of listResponse.vectors) { + if (vector.metadata?.issue_number === parseInt(ISSUE_TO_DELETE)) { + vectorsToDelete.push(vector.id); + console.log(` ๐Ÿ“Œ Found vector via list: ${vector.id}`); + console.log(` Metadata:`, JSON.stringify(vector.metadata, null, 2)); + } + } + } + + paginationToken = listResponse.pagination?.next; + } while (paginationToken); + } + } catch (searchError) { + console.error("โŒ Error searching for vectors:", searchError.message); + throw searchError; + } + + console.log(`\nFound ${vectorsToDelete.length} vector(s) to delete for Issue #${ISSUE_TO_DELETE}`); + + if (vectorsToDelete.length === 0) { + console.log(`โ„น๏ธ No vectors found for Issue #${ISSUE_TO_DELETE}. Nothing to delete.`); + return; + } + + // Show what we're about to delete + console.log(`\n๐Ÿ—‘๏ธ About to delete the following vectors:`); + vectorsToDelete.forEach((id, index) => { + console.log(` ${index + 1}. ${id}`); + }); + + // Confirm deletion + console.log(`\nโš ๏ธ This action cannot be undone!`); + + // Delete the vectors + console.log(`\n๐Ÿ—‘๏ธ Deleting ${vectorsToDelete.length} vector(s)...`); + + try { + await index.deleteMany(vectorsToDelete); + console.log(`โœ… Successfully deleted ${vectorsToDelete.length} vector(s) for Issue #${ISSUE_TO_DELETE}`); + } catch (deleteError) { + console.error(`โŒ Error deleting vectors:`, deleteError.message); + throw deleteError; + } + + console.log(`\n=== Cleanup Summary ===`); + console.log(`๐Ÿ“Š Issue #${ISSUE_TO_DELETE} vectors deleted: ${vectorsToDelete.length}`); + console.log(`โœ… Database cleanup completed successfully`); + console.log(`\n๐ŸŽฏ You can now edit Issue #${ISSUE_TO_DELETE} to test the update functionality!`); + + } catch (error) { + console.error("โŒ Error during cleanup:", error); + process.exit(1); + } +} + +// Handle command line arguments +const args = process.argv.slice(2); +if (args.includes('--help') || args.includes('-h')) { + console.log(` +๐Ÿ“– Usage: + ISSUE_NUMBER=6 node scripts/cleanup-specific-issue.js + node scripts/cleanup-specific-issue.js 6 + +๐Ÿ”ง Required Environment Variables: + - PINECONE_API_KEY: Pinecone API key + - PINECONE_INDEX: Pinecone index name + +๐Ÿ“ This script will: + 1. Find all vectors in Pinecone related to the specified issue number + 2. Delete those vectors from the Pinecone index + 3. Show a summary of what was deleted + +โš ๏ธ Note: This action cannot be undone! Use carefully. + `); + process.exit(0); +} + +// Run the cleanup script +deleteIssueVectors().catch(error => { + console.error("๐Ÿ’ฅ Cleanup script failed:", error); + process.exit(1); +}); \ No newline at end of file diff --git a/.github/scripts/clear-all-vectors.js b/.github/scripts/clear-all-vectors.js new file mode 100644 index 0000000..52d2db4 --- /dev/null +++ b/.github/scripts/clear-all-vectors.js @@ -0,0 +1,197 @@ +import { Pinecone } from "@pinecone-database/pinecone"; +import dotenv from "dotenv"; + +// Load environment variables +dotenv.config(); + +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; + +// Add delay to respect API rate limits +function delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +async function clearAllVectors() { + console.log(`\n๐Ÿšจ === CLEARING ALL VECTORS FROM PINECONE INDEX ===`); + console.log(`Pinecone Index: ${indexName}`); + console.log(`โš ๏ธ WARNING: This will delete ALL vectors permanently!`); + + try { + const index = pinecone.Index(indexName); + console.log("โœ… Connected to Pinecone index"); + + // Get current stats + console.log("๐Ÿ“Š Getting current index statistics..."); + const initialStats = await index.describeIndexStats(); + const totalVectors = initialStats.totalRecordCount || 0; + + console.log(`๐Ÿ“‹ Current state:`); + console.log(` - Total vectors: ${totalVectors}`); + console.log(` - Index dimension: ${initialStats.dimension}`); + console.log(` - Index fullness: ${initialStats.indexFullness}`); + + if (totalVectors === 0) { + console.log("โ„น๏ธ Index is already empty. Nothing to clear."); + return; + } + + // Final confirmation in logs + console.log(`\n๐Ÿšจ PROCEEDING TO DELETE ALL ${totalVectors} VECTORS`); + console.log("โš ๏ธ This action cannot be undone!"); + + // Method 1: Try to delete all vectors by namespace (fastest) + try { + console.log("\n๐Ÿงน Attempting to clear entire namespace..."); + await index.deleteAll(); + console.log("โœ… Successfully cleared entire namespace"); + + // Wait for operation to complete + await delay(5000); + + } catch (deleteAllError) { + console.log("โš ๏ธ deleteAll() failed, trying alternative method..."); + console.error("Error:", deleteAllError.message); + + // Method 2: Get all vectors and delete them in batches + console.log("๐Ÿ” Fetching all vectors for batch deletion..."); + + const allVectors = await index.query({ + vector: Array(1024).fill(0.1), + topK: 10000, // Max limit + includeMetadata: false, + includeValues: false + }); + + if (allVectors.matches && allVectors.matches.length > 0) { + console.log(`๐Ÿ“‹ Found ${allVectors.matches.length} vectors to delete`); + + // Delete in batches + const batchSize = 1000; + let deleted = 0; + + for (let i = 0; i < allVectors.matches.length; i += batchSize) { + const batch = allVectors.matches.slice(i, i + batchSize); + const batchIds = batch.map(v => v.id); + + try { + await index.deleteMany(batchIds); + deleted += batch.length; + console.log(` ๐Ÿ—‘๏ธ Deleted batch: ${batch.length} vectors (total: ${deleted}/${allVectors.matches.length})`); + + await delay(1000); + } catch (batchError) { + console.error(` โŒ Failed to delete batch:`, batchError.message); + } + } + + console.log(`โœ… Batch deletion completed: ${deleted}/${allVectors.matches.length} vectors`); + } + } + + // Verify the clearing + console.log("\n๐Ÿ” Verifying index is cleared..."); + await delay(3000); // Wait for Pinecone to sync + + const finalStats = await index.describeIndexStats(); + const remainingVectors = finalStats.totalRecordCount || 0; + + console.log(`\n๐Ÿ“Š Final Results:`); + console.log(` - Initial vectors: ${totalVectors}`); + console.log(` - Remaining vectors: ${remainingVectors}`); + console.log(` - Vectors cleared: ${totalVectors - remainingVectors}`); + + if (remainingVectors === 0) { + console.log("๐ŸŽ‰ SUCCESS: All vectors have been cleared from the index!"); + console.log("๐Ÿ’ก You can now repopulate with fresh data using the populate script."); + } else { + console.log(`โš ๏ธ WARNING: ${remainingVectors} vectors still remain in the index.`); + console.log("This might be due to Pinecone sync delays. Check again in a few minutes."); + } + + } catch (error) { + console.error("โŒ Error during clearing:", error); + process.exit(1); + } +} + +// Handle command line arguments +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h')) { + console.log(` +๐Ÿ“– Usage: node scripts/clear-all-vectors.js --force + +๐Ÿ”ง Required Environment Variables: + - PINECONE_API_KEY: Pinecone API key + - PINECONE_INDEX: Pinecone index name + +๐Ÿ“ This script will: + 1. Connect to your Pinecone index + 2. Delete ALL vectors in the index + 3. Verify the clearing operation + +๐Ÿšจ WARNING: This will permanently delete ALL data in your Pinecone index! +โš ๏ธ This action cannot be undone! + +๐Ÿ›ก๏ธ Safety: Requires --force flag to run + `); + process.exit(0); +} + +// Safety check - require --force flag +if (!args.includes('--force')) { + console.log(` +๐Ÿšจ DANGER: This script will delete ALL vectors from your Pinecone index! + +๐Ÿ“‹ What it will do: + โ€ข Connect to index: ${indexName} + โ€ข Delete every single vector in the database + โ€ข Clear all issue embeddings and similarity data + +๐Ÿšจ THIS ACTION CANNOT BE UNDONE! + +๐Ÿ›ก๏ธ For safety, this script requires the --force flag: + node scripts/clear-all-vectors.js --force + +๐Ÿ’ก Alternative: Use the cleanup script to remove only duplicates: + node scripts/cleanup-duplicates.js --force + +๐Ÿ“– For help: node scripts/clear-all-vectors.js --help + `); + process.exit(0); +} + +// Final confirmation before destruction +console.log(` +โš ๏ธ FINAL WARNING โš ๏ธ + +You are about to DELETE ALL VECTORS from Pinecone index: ${indexName} + +This will: +- Remove all issue embeddings +- Destroy all similarity data +- Require repopulation from scratch + +Proceeding in 3 seconds... +`); + +// 3 second countdown +setTimeout(() => { + console.log("3..."); + setTimeout(() => { + console.log("2..."); + setTimeout(() => { + console.log("1..."); + setTimeout(() => { + clearAllVectors().catch(error => { + console.error("๐Ÿ’ฅ Script failed:", error); + process.exit(1); + }); + }, 1000); + }, 1000); + }, 1000); +}, 1000); \ No newline at end of file diff --git a/.github/scripts/debug-pinecone.js b/.github/scripts/debug-pinecone.js new file mode 100644 index 0000000..9d13067 --- /dev/null +++ b/.github/scripts/debug-pinecone.js @@ -0,0 +1,96 @@ +import { Pinecone } from "@pinecone-database/pinecone"; +import dotenv from "dotenv"; + +// Load environment variables +dotenv.config(); + +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; + +async function debugPinecone() { + console.log("=== Pinecone Debug Information ==="); + console.log(`Index: ${indexName}`); + + try { + const index = pinecone.Index(indexName); + + // Get index stats + console.log("\n1. Index Statistics:"); + const stats = await index.describeIndexStats(); + console.log("Full stats object:", JSON.stringify(stats, null, 2)); + + // Try to query some vectors + console.log("\n2. Sample Query (first 10 vectors):"); + try { + const queryResult = await index.query({ + vector: Array(1024).fill(0.1), + topK: 10, + includeMetadata: true, + includeValues: false + }); + + console.log(`Found ${queryResult.matches?.length || 0} vectors`); + if (queryResult.matches && queryResult.matches.length > 0) { + queryResult.matches.forEach((match, i) => { + console.log(` ${i + 1}. ID: ${match.id}, Score: ${match.score}`); + if (match.metadata) { + console.log(` Metadata:`, match.metadata); + } + }); + } + } catch (queryError) { + console.error("Query failed:", queryError.message); + } + + // Try specific fetch for known IDs + console.log("\n3. Testing specific ID fetch:"); + const testIds = ['issue-1', 'issue-3', 'issue-4', 'issue-5', 'issue-6', 'issue-7', 'issue-8']; + + try { + const fetchResult = await index.fetch(testIds); + console.log(`Fetch result keys: ${Object.keys(fetchResult.vectors || {}).join(', ')}`); + + if (fetchResult.vectors) { + Object.entries(fetchResult.vectors).forEach(([id, vector]) => { + console.log(` Found: ${id}`); + if (vector.metadata) { + console.log(` Issue #: ${vector.metadata.issue_number}`); + console.log(` Title: ${vector.metadata.title?.substring(0, 50)}...`); + } + }); + } + } catch (fetchError) { + console.error("Fetch failed:", fetchError.message); + } + + // Try with different ID patterns (in case they have timestamps) + console.log("\n4. Checking for timestamped IDs:"); + try { + const allQuery = await index.query({ + vector: Array(1024).fill(0.1), + topK: 100, + includeMetadata: true, + includeValues: false + }); + + if (allQuery.matches && allQuery.matches.length > 0) { + console.log("All vector IDs found:"); + allQuery.matches.forEach(match => { + console.log(` - ${match.id} (issue #${match.metadata?.issue_number || 'unknown'})`); + }); + } else { + console.log("No vectors found in query"); + } + } catch (allQueryError) { + console.error("All query failed:", allQueryError.message); + } + + } catch (error) { + console.error("Debug failed:", error); + } +} + +debugPinecone().catch(console.error); \ No newline at end of file diff --git a/.github/scripts/populate-existing-issues.js b/.github/scripts/populate-existing-issues.js new file mode 100644 index 0000000..281a028 --- /dev/null +++ b/.github/scripts/populate-existing-issues.js @@ -0,0 +1,349 @@ +import { Octokit } from "@octokit/rest"; +import fetch from "node-fetch"; +import { Pinecone } from "@pinecone-database/pinecone"; +import dotenv from "dotenv"; + +// Load environment variables +dotenv.config(); + +const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); +const OWNER = process.env.GITHUB_REPOSITORY?.split("/")[0] || process.env.GITHUB_OWNER; +const REPO = process.env.GITHUB_REPOSITORY?.split("/")[1] || process.env.GITHUB_REPO; + +// Initialize Pinecone client +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY, +}); + +const indexName = process.env.PINECONE_INDEX; + +// Gemini embedding function +async function generateEmbedding(text) { + try { + const response = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=${process.env.GEMINI_API_KEY}`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: "models/text-embedding-004", + content: { parts: [{ text: text }] } + }), + } + ); + + const data = await response.json(); + + if (data.error) { + console.error("Gemini API Error:", data.error); + return Array(1024).fill(0.01); + } + + if (!data.embedding || !data.embedding.values) { + console.error("Invalid embedding response:", data); + return Array(1024).fill(0.01); + } + + // Pad or truncate to match Pinecone index dimension (1024) + let embedding = data.embedding.values; + if (embedding.length < 1024) { + embedding = [...embedding, ...Array(1024 - embedding.length).fill(0)]; + } else if (embedding.length > 1024) { + embedding = embedding.slice(0, 1024); + } + + return embedding; + } catch (error) { + console.error("Error generating embedding:", error); + return Array(1024).fill(0.01); + } +} + +// Add delay to respect API rate limits +function delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +async function populateExistingIssues() { + console.log(`\n=== Populating Pinecone with existing open issues ===`); + console.log(`Repository: ${OWNER}/${REPO}`); + console.log(`Pinecone Index: ${indexName}`); + + if (!OWNER || !REPO) { + console.error("โŒ Repository owner and name must be specified via GITHUB_REPOSITORY or GITHUB_OWNER/GITHUB_REPO environment variables"); + process.exit(1); + } + + try { + // Initialize Pinecone index + const index = pinecone.Index(indexName); + console.log("โœ… Connected to Pinecone index"); + + // Fetch all open issues from the repository + console.log("๐Ÿ“ฅ Fetching open issues from GitHub..."); + + let allIssues = []; + let page = 1; + const perPage = 100; + + while (true) { + const { data: issues } = await octokit.issues.listForRepo({ + owner: OWNER, + repo: REPO, + state: 'open', + per_page: perPage, + page: page, + }); + + if (issues.length === 0) break; + + // Filter out pull requests (they show up in issues API) + const actualIssues = issues.filter(issue => !issue.pull_request); + allIssues = allIssues.concat(actualIssues); + + console.log(` ๐Ÿ“„ Fetched page ${page} - ${actualIssues.length} issues`); + page++; + + // Add delay to respect GitHub API rate limits + await delay(1000); + } + + console.log(`โœ… Total open issues found: ${allIssues.length}`); + + if (allIssues.length === 0) { + console.log("โ„น๏ธ No open issues found. Nothing to populate."); + return; + } + + // Check if issues already exist in Pinecone to avoid duplicates + console.log("๐Ÿ” Checking for existing issues in Pinecone..."); + + const existingIssueNumbers = new Set(); + + try { + // Get index statistics first + const stats = await index.describeIndexStats(); + const totalVectors = stats.totalRecordCount || 0; + console.log(` ๐Ÿ“Š Index contains ${totalVectors} total vectors`); + + if (totalVectors === 0) { + console.log(" โ„น๏ธ Index is empty, all issues will be processed"); + } else { + // Use multiple approaches to check for existing vectors + console.log(" ๐Ÿ” Checking for existing issue vectors..."); + + // Method 1: Try to query with a sample vector to get some existing vectors + try { + console.log(" ๐Ÿ” Sampling existing vectors..."); + const sampleQuery = await index.query({ + vector: Array(1024).fill(0.1), + topK: Math.min(100, totalVectors), + includeMetadata: true + }); + + if (sampleQuery.matches && sampleQuery.matches.length > 0) { + console.log(` ๐Ÿ“‹ Found ${sampleQuery.matches.length} sample vectors`); + for (const match of sampleQuery.matches) { + if (match.metadata?.issue_number) { + existingIssueNumbers.add(match.metadata.issue_number); + console.log(` โœ“ Found existing issue #${match.metadata.issue_number}`); + } + } + } + } catch (sampleError) { + console.log(" โš ๏ธ Sample query failed, trying direct fetch approach"); + } + + // Method 2: Try to fetch vectors by their expected IDs + console.log(" ๐Ÿ” Checking by direct ID lookup..."); + for (let i = 0; i < allIssues.length; i += 10) { + const batch = allIssues.slice(i, i + 10); + + // Try to fetch vectors by their expected IDs + const vectorIds = batch.map(issue => `issue-${issue.number}`); + + try { + const fetchResult = await index.fetch(vectorIds); + + if (fetchResult.vectors) { + Object.keys(fetchResult.vectors).forEach(vectorId => { + const match = vectorId.match(/issue-(\d+)/); + if (match) { + const issueNum = parseInt(match[1]); + if (!existingIssueNumbers.has(issueNum)) { + existingIssueNumbers.add(issueNum); + console.log(` โœ“ Found existing issue #${issueNum} by ID`); + } + } + }); + } + } catch (fetchError) { + // If fetch fails, try metadata filter queries for this batch + console.log(` โš ๏ธ Fetch failed for batch, trying metadata queries...`); + for (const issue of batch) { + try { + const queryResult = await index.query({ + vector: Array(1024).fill(0.1), + filter: { issue_number: { $eq: issue.number } }, + topK: 1, + includeMetadata: true + }); + + if (queryResult.matches && queryResult.matches.length > 0) { + if (!existingIssueNumbers.has(issue.number)) { + existingIssueNumbers.add(issue.number); + console.log(` โœ“ Found existing issue #${issue.number} by query`); + } + } + } catch (queryError) { + // Silently continue - assume issue doesn't exist + } + } + } + + // Small delay between batches + await delay(300); + } + } + } catch (error) { + console.log(" โš ๏ธ Error checking existing issues:", error.message); + console.log(" ๐Ÿ”„ Will process all issues to be safe"); + } + + console.log(`Found ${existingIssueNumbers.size} existing issues in Pinecone`); + + // Filter out issues that already exist in Pinecone + const newIssues = allIssues.filter(issue => !existingIssueNumbers.has(issue.number)); + const skippedCount = allIssues.length - newIssues.length; + + console.log(`๐Ÿ“ ${newIssues.length} new issues to process`); + console.log(`โญ๏ธ ${skippedCount} issues skipped (already exist in Pinecone)`); + + if (skippedCount > 0) { + console.log(` Skipped issues: ${Array.from(existingIssueNumbers).sort((a, b) => a - b).join(', ')}`); + } + + if (newIssues.length === 0) { + console.log("โœ… All open issues are already in Pinecone. Nothing to add."); + return; + } + + // Process issues in batches to avoid overwhelming the APIs + const batchSize = 10; + let processed = 0; + let successful = 0; + let failed = 0; + + for (let i = 0; i < newIssues.length; i += batchSize) { + const batch = newIssues.slice(i, i + batchSize); + console.log(`\n๐Ÿ“ฆ Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(newIssues.length / batchSize)}`); + + const vectors = []; + + for (const issue of batch) { + try { + console.log(` ๐Ÿ”„ Processing issue #${issue.number}: "${issue.title.substring(0, 50)}..."`); + + // Combine title and body for embedding + const issueText = `${issue.title} ${issue.body || ""}`; + + // Generate embedding + const embedding = await generateEmbedding(issueText); + + // Prepare vector for Pinecone - use consistent ID format + const vectorId = `issue-${issue.number}`; + vectors.push({ + id: vectorId, + values: embedding, + metadata: { + issue_number: issue.number, + title: issue.title, + content: issueText, + created_at: issue.created_at, + updated_at: issue.updated_at, + url: issue.html_url, + state: issue.state, + labels: issue.labels?.map(label => label.name).join(', ') || '', + author: issue.user?.login || 'unknown' + } + }); + + processed++; + console.log(` โœ… Issue #${issue.number} prepared`); + + // Add delay between API calls to respect rate limits + await delay(500); + + } catch (error) { + console.error(` โŒ Failed to process issue #${issue.number}:`, error.message); + failed++; + } + } + + // Upsert batch to Pinecone + if (vectors.length > 0) { + try { + console.log(` ๐Ÿ”„ Upserting ${vectors.length} vectors to Pinecone...`); + await index.upsert(vectors); + successful += vectors.length; + console.log(` โœ… Batch upserted to Pinecone: ${vectors.length} vectors`); + } catch (error) { + console.error(` โŒ Failed to upsert batch to Pinecone:`, error.message); + // Log which specific issues failed + console.error(` Failed issues: ${vectors.map(v => v.metadata.issue_number).join(', ')}`); + failed += vectors.length; + } + } + + // Add delay between batches + await delay(2000); + } + + console.log(`\n=== Population Summary ===`); + console.log(`๐Ÿ“Š Total issues processed: ${processed}`); + console.log(`โœ… Successfully added to Pinecone: ${successful}`); + console.log(`โŒ Failed: ${failed}`); + console.log(`๐Ÿ“ˆ Success rate: ${((successful / processed) * 100).toFixed(1)}%`); + + if (successful > 0) { + console.log(`\n๐ŸŽ‰ Successfully populated Pinecone with ${successful} issue embeddings!`); + console.log(`๐Ÿค– Your duplicate detection bot is now ready to work with existing issues.`); + } + + } catch (error) { + console.error("โŒ Error during population:", error); + process.exit(1); + } +} + +// Handle command line arguments +const args = process.argv.slice(2); +if (args.includes('--help') || args.includes('-h')) { + console.log(` +๐Ÿ“– Usage: node scripts/populate-existing-issues.js + +๐Ÿ”ง Required Environment Variables: + - GITHUB_TOKEN: GitHub personal access token + - GITHUB_REPOSITORY: Repository in format "owner/repo" (or use GITHUB_OWNER + GITHUB_REPO) + - PINECONE_API_KEY: Pinecone API key + - PINECONE_INDEX: Pinecone index name + - GEMINI_API_KEY: Google Gemini API key + +๐Ÿ“ This script will: + 1. Fetch all open issues from your GitHub repository + 2. Generate embeddings using Google Gemini + 3. Store them in your Pinecone vector database + 4. Skip issues that already exist in Pinecone + +โš ๏ธ Note: This script respects API rate limits and processes issues in batches. + `); + process.exit(0); +} + +// Run the population script +populateExistingIssues().catch(error => { + console.error("๐Ÿ’ฅ Script failed:", error); + process.exit(1); +}); \ No newline at end of file diff --git a/.github/scripts/validate-apis.js b/.github/scripts/validate-apis.js new file mode 100644 index 0000000..0dc1b5d --- /dev/null +++ b/.github/scripts/validate-apis.js @@ -0,0 +1,175 @@ +import { Pinecone } from "@pinecone-database/pinecone"; +import { Octokit } from "@octokit/rest"; +import fetch from "node-fetch"; +import dotenv from "dotenv"; + +// Load environment variables for local development +dotenv.config(); + +// Validation functions +async function validatePinecone() { + try { + if (!process.env.PINECONE_API_KEY) { + throw new Error("PINECONE_API_KEY not found in environment variables"); + } + if (!process.env.PINECONE_INDEX) { + throw new Error("PINECONE_INDEX not found in environment variables"); + } + + const pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY }); + const index = pinecone.Index(process.env.PINECONE_INDEX); + + const stats = await index.describeIndexStats(); + + console.log('โœ… Pinecone connection successful'); + console.log(`๐Ÿ“Š Index: ${process.env.PINECONE_INDEX}`); + console.log(`๐Ÿ“ˆ Total vectors: ${stats.totalRecordCount || 0}`); + console.log(`๐Ÿ“ Dimension: ${stats.dimension}`); + + return { success: true, stats }; + } catch (error) { + console.error('โŒ Pinecone validation failed:', error.message); + return { success: false, error: error.message }; + } +} + +async function validateGitHub() { + try { + if (!process.env.GITHUB_TOKEN) { + throw new Error("GITHUB_TOKEN not found in environment variables"); + } + + const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); + + // Test with current repository or fallback + const owner = process.env.GITHUB_REPOSITORY?.split("/")[0] || process.env.GITHUB_OWNER || "seroski-ai"; + const repo = process.env.GITHUB_REPOSITORY?.split("/")[1] || process.env.GITHUB_REPO || "seroski-dupbot"; + + const result = await octokit.repos.get({ owner, repo }); + + console.log('โœ… GitHub connection successful'); + console.log(`๐Ÿ“‹ Repository: ${result.data.full_name}`); + console.log(`๐Ÿ”“ Access: ${result.data.permissions?.admin ? 'Admin' : result.data.permissions?.push ? 'Write' : 'Read'}`); + + return { success: true, repo: result.data }; + } catch (error) { + console.error('โŒ GitHub validation failed:', error.message); + return { success: false, error: error.message }; + } +} + +async function validateGemini() { + try { + if (!process.env.GEMINI_API_KEY) { + throw new Error("GEMINI_API_KEY not found in environment variables"); + } + + const response = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=${process.env.GEMINI_API_KEY}`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "models/text-embedding-004", + content: { parts: [{ text: "connection test" }] } + }), + } + ); + + if (!response.ok) { + const errorData = await response.text(); + throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorData}`); + } + + const data = await response.json(); + + if (data.error) { + throw new Error(data.error.message || 'Unknown Gemini API error'); + } + + console.log('โœ… Gemini API connection successful'); + console.log('๐Ÿง  Model: text-embedding-004'); + console.log(`๐Ÿ“Š Embedding dimension: ${data.embedding?.values?.length || 'unknown'}`); + + return { success: true, embedding: data.embedding }; + } catch (error) { + console.error('โŒ Gemini validation failed:', error.message); + return { success: false, error: error.message }; + } +} + +async function validateAllConnections() { + console.log('๐Ÿ” === API Connection Validation ===\n'); + + const results = { + pinecone: await validatePinecone(), + github: await validateGitHub(), + gemini: await validateGemini() + }; + + console.log('\n๐Ÿ“‹ === Validation Summary ==='); + + const successful = Object.values(results).filter(r => r.success).length; + const total = Object.keys(results).length; + + console.log(`โœ… Successful: ${successful}/${total}`); + console.log(`โŒ Failed: ${total - successful}/${total}`); + + if (successful === total) { + console.log('\n๐ŸŽ‰ All API connections are working correctly!'); + process.exit(0); + } else { + console.log('\nโš ๏ธ Some API connections failed. Check the errors above.'); + process.exit(1); + } +} + +// Handle command line arguments +const args = process.argv.slice(2); +const service = args[0]; + +if (args.includes('--help') || args.includes('-h')) { + console.log(` +๐Ÿ“– Usage: node scripts/validate-apis.js [service] + +๐Ÿ”ง Available Services: + pinecone - Test Pinecone vector database connection + github - Test GitHub API connection + gemini - Test Google Gemini API connection + all - Test all connections (default) + +๐Ÿ”ง Required Environment Variables: + - PINECONE_API_KEY: Pinecone API key + - PINECONE_INDEX: Pinecone index name + - GITHUB_TOKEN: GitHub personal access token + - GEMINI_API_KEY: Google Gemini API key + - GITHUB_REPOSITORY: Repository in format "owner/repo" (optional) + +๐Ÿ“ Examples: + node scripts/validate-apis.js # Test all connections + node scripts/validate-apis.js pinecone # Test only Pinecone + node scripts/validate-apis.js gemini # Test only Gemini + `); + process.exit(0); +} + +// Run specific service or all +switch (service) { + case 'pinecone': + validatePinecone().then(result => { + process.exit(result.success ? 0 : 1); + }); + break; + case 'github': + validateGitHub().then(result => { + process.exit(result.success ? 0 : 1); + }); + break; + case 'gemini': + validateGemini().then(result => { + process.exit(result.success ? 0 : 1); + }); + break; + default: + validateAllConnections(); +} \ No newline at end of file diff --git a/.github/workflows/api-validation.yml b/.github/workflows/api-validation.yml new file mode 100644 index 0000000..04ba934 --- /dev/null +++ b/.github/workflows/api-validation.yml @@ -0,0 +1,84 @@ +name: API Validation + +on: + workflow_dispatch: + inputs: + validation_scope: + description: 'Which APIs to validate' + required: true + default: 'all-apis' + type: choice + options: + - 'all-apis' + - 'pinecone-only' + - 'github-only' + - 'gemini-only' + +permissions: + issues: read + contents: read + +jobs: + validate-apis: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: 20 + + - name: Install dependencies + run: npm install + + - name: Validate All APIs + if: github.event.inputs.validation_scope == 'all-apis' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + GITHUB_REPOSITORY: ${{ github.repository }} + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿ” Validating all API connections..." + node .github/scripts/validate-apis.js + + - name: Validate Pinecone Only + if: github.event.inputs.validation_scope == 'pinecone-only' + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿ” Validating Pinecone database connection..." + node .github/scripts/validate-apis.js pinecone + + - name: Validate GitHub Only + if: github.event.inputs.validation_scope == 'github-only' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + echo "๐Ÿ” Validating GitHub API connection..." + node .github/scripts/validate-apis.js github + + - name: Validate Gemini Only + if: github.event.inputs.validation_scope == 'gemini-only' + env: + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + run: | + echo "๐Ÿ” Validating Gemini API connection..." + node .github/scripts/validate-apis.js gemini + + - name: Validation Summary + if: always() + run: | + echo "### ๐Ÿ” API Validation Summary" >> $GITHUB_STEP_SUMMARY + echo "- **Validation Scope:** ${{ github.event.inputs.validation_scope }}" >> $GITHUB_STEP_SUMMARY + echo "- **Repository:** ${{ github.repository }}" >> $GITHUB_STEP_SUMMARY + echo "- **Triggered by:** @${{ github.actor }}" >> $GITHUB_STEP_SUMMARY + echo "- **Timestamp:** $(date -u)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "โœ… Run this before database operations to ensure API connectivity." >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/database-operations.yml b/.github/workflows/database-operations.yml new file mode 100644 index 0000000..901c0fc --- /dev/null +++ b/.github/workflows/database-operations.yml @@ -0,0 +1,104 @@ +name: Database Operations + +on: + workflow_dispatch: + inputs: + action: + description: 'Database operation to perform' + required: true + default: 'populate-issues' + type: choice + options: + - 'populate-issues' + - 'cleanup-duplicates' + - 'debug-database' + - 'clear-all-vectors' + force: + description: 'Force action (required for destructive operations)' + required: false + default: false + type: boolean + +permissions: + issues: read + contents: read + +jobs: + database-operation: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: 20 + + - name: Install dependencies + run: npm install + + - name: Populate Issues to Database + if: github.event.inputs.action == 'populate-issues' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + GITHUB_REPOSITORY: ${{ github.repository }} + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿš€ Populating existing issues to database..." + echo "This will skip issues that already exist in the database." + node .github/scripts/populate-existing-issues.js + + - name: Cleanup Duplicate Vectors + if: github.event.inputs.action == 'cleanup-duplicates' + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿงน Cleaning up duplicate vectors..." + if [ "${{ github.event.inputs.force }}" = "true" ]; then + node .github/scripts/cleanup-duplicates.js --force + else + echo "โŒ Cleanup requires force flag to be enabled for safety!" + echo "Please re-run the workflow with 'Force action' checked." + exit 1 + fi + + - name: Debug Database + if: github.event.inputs.action == 'debug-database' + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿ” Running database diagnostics..." + node .github/scripts/debug-pinecone.js + + - name: Clear All Vectors + if: github.event.inputs.action == 'clear-all-vectors' + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿšจ DANGER: Clearing all vectors from database..." + if [ "${{ github.event.inputs.force }}" = "true" ]; then + echo "โš ๏ธ This will delete ALL vectors in the database!" + node .github/scripts/clear-all-vectors.js --force + else + echo "โŒ Clear all requires force flag to be enabled for safety!" + echo "Please re-run the workflow with 'Force action' checked." + exit 1 + fi + + - name: Operation Summary + if: always() + run: | + echo "### ๐Ÿ—„๏ธ Database Operation Summary" >> $GITHUB_STEP_SUMMARY + echo "- **Operation:** ${{ github.event.inputs.action }}" >> $GITHUB_STEP_SUMMARY + echo "- **Force Flag:** ${{ github.event.inputs.force }}" >> $GITHUB_STEP_SUMMARY + echo "- **Repository:** ${{ github.repository }}" >> $GITHUB_STEP_SUMMARY + echo "- **Timestamp:** $(date -u)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "๐Ÿ”— Use 'API Validation' workflow to test connections before database operations." >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/duplicate-issue.yml b/.github/workflows/duplicate-issue.yml new file mode 100644 index 0000000..a157bef --- /dev/null +++ b/.github/workflows/duplicate-issue.yml @@ -0,0 +1,99 @@ +name: Duplicate Issue Management + +on: + issues: + types: [opened, edited, closed, reopened] + workflow_dispatch: + inputs: + issue_number: + description: 'Issue number to manually check for duplicates' + required: true + type: number + +permissions: + issues: write + +jobs: + check-duplicates: + if: github.event.action == 'opened' || github.event.action == 'edited' || github.event.action == 'reopened' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + env: + SIMILARITY_THRESHOLD: 0.7 + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: 20 + + - name: Install dependencies + run: npm install + + - name: Set issue number + id: issue-number + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "ISSUE_NUMBER=${{ github.event.inputs.issue_number }}" >> $GITHUB_ENV + echo "Manual check for issue #${{ github.event.inputs.issue_number }}" + else + echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV + echo "Automatic check for issue #${{ github.event.issue.number }}" + fi + + - name: Run duplicate check + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + GITHUB_REPOSITORY: ${{ github.repository }} + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿ” Checking issue #$ISSUE_NUMBER for duplicates..." + echo "๐Ÿ“Š Similarity threshold: $SIMILARITY_THRESHOLD" + node .github/scripts/check-duplicates.js + continue-on-error: true + + - name: Handle check failure + if: failure() + run: | + echo "โš ๏ธ Duplicate check failed, but continuing workflow" + echo "This might be due to API limits or temporary issues" + echo "Issue #$ISSUE_NUMBER will be processed normally" + + cleanup-closed-issue: + if: github.event.action == 'closed' + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: 20 + + - name: Install dependencies + run: npm install + + - name: Remove closed issue from vector database + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + GITHUB_REPOSITORY: ${{ github.repository }} + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_INDEX: ${{ secrets.PINECONE_INDEX }} + run: | + echo "๐Ÿงน Removing closed issue #$ISSUE_NUMBER from database..." + node .github/scripts/cleanup-closed-issue.js + continue-on-error: true + + - name: Handle cleanup failure + if: failure() + run: | + echo "โš ๏ธ Cleanup of closed issue failed, but this is non-critical" + echo "Issue #$ISSUE_NUMBER may remain in the vector database" + echo "You can manually clean it up later using the database management workflow" diff --git a/.github/workflows/issue_greetings.yml b/.github/workflows/issue_greetings.yml deleted file mode 100644 index 0f5e3f4..0000000 --- a/.github/workflows/issue_greetings.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Issue Greetings - -on: - issues: - types: [assigned] - -jobs: - greeting: - runs-on: ubuntu-latest - permissions: - issues: write - steps: - - name: Add greeting comment - uses: actions/github-script@v7 - with: - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: `๐ŸŽ‰ Thank you @${context.payload.assignee?.login || context.payload.issue.user.login} for picking up this issue! - Please make sure to follow our contributing guidelines here ๐Ÿ‘‰ [Contributing Guidelines](https://github.com/DevSyncx/DevSync/blob/main/CONTRIBUTING.md) - Happy coding !! - and Don't forget to give this repo a ๐ŸŒŸ` - })