diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 7386ad5..b570e38 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -16,4 +16,4 @@ reviews: # Optional: By default, draft pull requests are not reviewed. # Set to true if you want Coderabbit to review drafts as well. - drafts: false \ No newline at end of file + drafts: false diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..ea7af28 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,10 @@ +{ + "printWidth": 120, + "trailingComma": "none", + "singleQuote": true, + "semi": true, + "useTabs": false, + "tabWidth": 2, + "arrowParens": "always", + "jsxSingleQuote": true +} diff --git a/.vscode/launch.json b/.vscode/launch.json index e597682..0b8d61e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,24 +1,22 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "type": "node", - "request": "launch", - "name": "Debug redisDedupeListener", - "skipFiles": [ - "/**" - ], - "program": "${workspaceFolder}/src/redisDedupeListener.ts", - "runtimeExecutable": "npx", - "runtimeArgs": ["tsx"], - "console": "integratedTerminal", - "internalConsoleOptions": "openOnSessionStart", - "env": { - "NODE_OPTIONS": "--enable-source-maps" - } - } - ] -} \ No newline at end of file + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Debug redisDedupeListener", + "skipFiles": ["/**"], + "program": "${workspaceFolder}/src/redisDedupeListener.ts", + "runtimeExecutable": "npx", + "runtimeArgs": ["tsx"], + "console": "integratedTerminal", + "internalConsoleOptions": "openOnSessionStart", + "env": { + "NODE_OPTIONS": "--enable-source-maps" + } + } + ] +} diff --git a/README.md b/README.md index bb92d03..700d872 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ # classifier + AI engine for classifying market sentiment, topics, and trustworthiness from raw data. diff --git a/data/sample_posts.json b/data/sample_posts.json index e9025f3..955bf89 100644 --- a/data/sample_posts.json +++ b/data/sample_posts.json @@ -32,4 +32,4 @@ "url": "https://x.com/1003", "date": "2025-09-03T18:20:00.000Z" } -] \ No newline at end of file +] diff --git a/package-lock.json b/package-lock.json index f4f872e..6c37d2b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,7 @@ }, "devDependencies": { "@types/node": "^24.3.1", + "prettier": "^3.6.2", "tsx": "^4.20.5", "typescript": "^5.9.2" }, @@ -676,6 +677,21 @@ } } }, + "node_modules/prettier": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz", + "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", + "dev": true, + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/redis": { "version": "5.8.2", "resolved": "https://registry.npmjs.org/redis/-/redis-5.8.2.tgz", diff --git a/package.json b/package.json index 422cf9c..e6f729b 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,8 @@ "start": "node dist/run-classifier.js", "seed": "tsx src/seed.ts", "seed:clear": "tsx src/seed.ts clear", - "seed:verify": "tsx src/seed.ts verify" + "seed:verify": "tsx src/seed.ts verify", + "format": "npx prettier --write ." }, "dependencies": { "@prisma/client": "^6.15.0", @@ -26,6 +27,7 @@ }, "devDependencies": { "@types/node": "^24.3.1", + "prettier": "^3.6.2", "tsx": "^4.20.5", "typescript": "^5.9.2" } diff --git a/run-classifier.ts b/run-classifier.ts index 0807747..48a7d47 100644 --- a/run-classifier.ts +++ b/run-classifier.ts @@ -9,62 +9,62 @@ import { generateTitlesForPosts } from './src/generateTitle.js'; const command = process.argv[2]; async function main() { - console.log("๐Ÿค– SentioPulse Classifier Tool"); - console.log("==============================\n"); + console.log('๐Ÿค– SentioPulse Classifier Tool'); + console.log('==============================\n'); - switch (command) { - case 'complete': - case 'all': - console.log("Running complete analysis (categorization + sentiment + title generation)..."); - await runCompleteAnalysisDemo(); - break; + switch (command) { + case 'complete': + case 'all': + console.log('Running complete analysis (categorization + sentiment + title generation)...'); + await runCompleteAnalysisDemo(); + break; - case 'categorize': - case 'category': - console.log("Running categorization only..."); - await runCategorization(); - break; + case 'categorize': + case 'category': + console.log('Running categorization only...'); + await runCategorization(); + break; - case 'sentiment': { - console.log("Running sentiment analysis only..."); - const samplePosts = [ - "Bitcoin is going to skyrocket after the halving event next month! The fundamentals are incredibly strong and institutional adoption is accelerating. This could be the start of the next major bull run.", - "Ethereum might drop below $1000 soon due to the current risky market conditions. The macro environment is deteriorating and there's too much leverage in the system right now.", - "The market seems calm today with no major moves in either direction. Bitcoin is trading sideways and most altcoins are following suit. It's a good time to accumulate quality projects." - ]; - const sentimentResults = await analyzeMultiplePosts(samplePosts); - console.log("Sentiment Results:", JSON.stringify(sentimentResults, null, 2)); - break; - } + case 'sentiment': { + console.log('Running sentiment analysis only...'); + const samplePosts = [ + 'Bitcoin is going to skyrocket after the halving event next month! The fundamentals are incredibly strong and institutional adoption is accelerating. This could be the start of the next major bull run.', + "Ethereum might drop below $1000 soon due to the current risky market conditions. The macro environment is deteriorating and there's too much leverage in the system right now.", + "The market seems calm today with no major moves in either direction. Bitcoin is trading sideways and most altcoins are following suit. It's a good time to accumulate quality projects." + ]; + const sentimentResults = await analyzeMultiplePosts(samplePosts); + console.log('Sentiment Results:', JSON.stringify(sentimentResults, null, 2)); + break; + } - case 'title': - case 'titles': { - console.log("Running title generation only..."); - const titlePosts = [ - "Benchmarking tiny on-device ML models for edge inference โ€” latency down 40% with the new quantization pipeline.", - "Q2 fintech update: payments startup doubled TPV and improved take rate; unit economics are trending positive.", - "Reading a new whitepaper on Web3 compliance and institutional custody โ€” regulatory clarity is the next catalyst for adoption." - ]; - const titleResults = await generateTitlesForPosts(titlePosts); - console.log("Title Results:", JSON.stringify(titleResults, null, 2)); - break; - } + case 'title': + case 'titles': { + console.log('Running title generation only...'); + const titlePosts = [ + 'Benchmarking tiny on-device ML models for edge inference โ€” latency down 40% with the new quantization pipeline.', + 'Q2 fintech update: payments startup doubled TPV and improved take rate; unit economics are trending positive.', + 'Reading a new whitepaper on Web3 compliance and institutional custody โ€” regulatory clarity is the next catalyst for adoption.' + ]; + const titleResults = await generateTitlesForPosts(titlePosts); + console.log('Title Results:', JSON.stringify(titleResults, null, 2)); + break; + } - case 'help': - case '--help': - case '-h': - showHelp(); - break; + case 'help': + case '--help': + case '-h': + showHelp(); + break; - default: - console.log("No command specified. Running complete analysis by default..."); - await runCompleteAnalysisDemo(); - break; - } + default: + console.log('No command specified. Running complete analysis by default...'); + await runCompleteAnalysisDemo(); + break; + } } function showHelp() { - console.log(` + console.log(` Usage: npm run classify [command] Commands: @@ -86,6 +86,6 @@ If no command is provided, complete analysis will run by default. // Handle errors gracefully main().catch((error) => { - console.error("โŒ Error running classifier:", error); - process.exit(1); + console.error('โŒ Error running classifier:', error); + process.exit(1); }); diff --git a/src/analyzeSentiment.ts b/src/analyzeSentiment.ts index 80f1992..c2b9c44 100644 --- a/src/analyzeSentiment.ts +++ b/src/analyzeSentiment.ts @@ -4,21 +4,21 @@ import { callOpenAIWithValidation } from './openaiValidationUtil.js'; import { z } from 'zod'; import { generateTitleForPost } from './generateTitle.js'; -// Sentiment analysis result type +// Sentiment analysis result type export type SentimentResult = { - post: string; - sentiment: "BULLISH" | "BEARISH" | "NEUTRAL"; + post: string; + sentiment: 'BULLISH' | 'BEARISH' | 'NEUTRAL'; }; // Zod schema for sentiment validation const SentimentSchema = z.object({ - sentiment: z.enum(["BULLISH", "BEARISH", "NEUTRAL"]) + sentiment: z.enum(['BULLISH', 'BEARISH', 'NEUTRAL']) }); // Analyze multiple posts at once export async function analyzeMultiplePosts(posts: string[]): Promise { - const results: SentimentResult[] = []; - const systemPrompt = `You are a sentiment analysis system for crypto-related posts. + const results: SentimentResult[] = []; + const systemPrompt = `You are a sentiment analysis system for crypto-related posts. Classify the sentiment of posts into one of: BULLISH, BEARISH, NEUTRAL. Important: Always return the sentiment in uppercase letters exactly like this: BULLISH, BEARISH, NEUTRAL, @@ -29,52 +29,52 @@ Return only valid JSON in this format: "sentiment": "BULLISH" }`; - for (const post of posts) { - try { - const validated = await callOpenAIWithValidation({ - client: openai, - systemPrompt, - userPrompt: post, - schema: SentimentSchema, - retryCount: 3 - }); + for (const post of posts) { + try { + const validated = await callOpenAIWithValidation({ + client: openai, + systemPrompt, + userPrompt: post, + schema: SentimentSchema, + retryCount: 3 + }); - if (validated) { - results.push({ post, sentiment: validated.sentiment }); - } - } catch (e) { - console.error("Error analyzing post:", post, e); - continue; - } + if (validated) { + results.push({ post, sentiment: validated.sentiment }); + } + } catch (e) { + console.error('Error analyzing post:', post, e); + continue; } - return results; + } + return results; } async function runExample() { - // Example runner - const posts = [ - "Bitcoin is going to skyrocket after the halving event next month! The fundamentals are incredibly strong and institutional adoption is accelerating. This could be the start of the next major bull run that takes us to new all-time highs.", - "Ethereum might drop below $1000 soon due to the current risky market conditions. The macro environment is deteriorating and there's too much leverage in the system. I'm expecting a significant correction in the coming weeks.", - "The market seems calm today with no major moves in either direction. Bitcoin is trading sideways and most altcoins are following suit. It's a good time to accumulate quality projects at these levels." - ]; + // Example runner + const posts = [ + 'Bitcoin is going to skyrocket after the halving event next month! The fundamentals are incredibly strong and institutional adoption is accelerating. This could be the start of the next major bull run that takes us to new all-time highs.', + "Ethereum might drop below $1000 soon due to the current risky market conditions. The macro environment is deteriorating and there's too much leverage in the system. I'm expecting a significant correction in the coming weeks.", + "The market seems calm today with no major moves in either direction. Bitcoin is trading sideways and most altcoins are following suit. It's a good time to accumulate quality projects at these levels." + ]; - const results = await analyzeMultiplePosts(posts); - const resultsByPost = new Map(results.map(r => [r.post, r.sentiment])); - for (const post of posts) { - const sentiment = resultsByPost.get(post); - const title = await generateTitleForPost(post); - console.log(`Post: ${post}`); - if (title) { - console.log(`Title: ${title}`); - } - if (sentiment) { - console.log(`Sentiment: ${sentiment}`); - } - console.log('โ”€'.repeat(40)); + const results = await analyzeMultiplePosts(posts); + const resultsByPost = new Map(results.map((r) => [r.post, r.sentiment])); + for (const post of posts) { + const sentiment = resultsByPost.get(post); + const title = await generateTitleForPost(post); + console.log(`Post: ${post}`); + if (title) { + console.log(`Title: ${title}`); } + if (sentiment) { + console.log(`Sentiment: ${sentiment}`); + } + console.log('โ”€'.repeat(40)); + } } // Run example if this file is executed directly (not imported) if (require.main === module) { - runExample(); -} \ No newline at end of file + runExample(); +} diff --git a/src/classifyWithOpenAI.ts b/src/classifyWithOpenAI.ts index ea3e05c..035b641 100644 --- a/src/classifyWithOpenAI.ts +++ b/src/classifyWithOpenAI.ts @@ -1,6 +1,6 @@ // No top-level execution. Export the runner and allow explicit CLI invocation with --run. import type OpenAI from 'openai'; -import type { ChatCompletionMessageParam } from "openai/resources"; +import type { ChatCompletionMessageParam } from 'openai/resources'; import openai from './openaiClient.js'; import { callOpenAIWithValidation } from './openaiValidationUtil.js'; import { z } from 'zod'; @@ -9,23 +9,25 @@ import { CATEGORIES } from './constants.js'; // Zod schema for categorization validation const CategorizationSchema = z.object({ - categories: z.array(z.string()) - .min(1, "At least one category is required") - .max(2, "At most 2 categories are allowed") - .refine((arr) => arr.every((c) => CATEGORIES.includes(c)), { - message: "All categories must be from the allowed list", - }), - subcategories: z.array(z.string().min(1, "Subcategories cannot be empty strings")) - .min(1, "At least one subcategory is required"), + categories: z + .array(z.string()) + .min(1, 'At least one category is required') + .max(2, 'At most 2 categories are allowed') + .refine((arr) => arr.every((c) => CATEGORIES.includes(c)), { + message: 'All categories must be from the allowed list' + }), + subcategories: z + .array(z.string().min(1, 'Subcategories cannot be empty strings')) + .min(1, 'At least one subcategory is required') }); export type Categorization = z.infer; export async function categorizePost(post: string, clientOverride?: OpenAI) { - const usedClient: OpenAI = clientOverride ?? openai; - const systemPrompt = `You are a post categorization system. + const usedClient: OpenAI = clientOverride ?? openai; + const systemPrompt = `You are a post categorization system. -Main categories: ${CATEGORIES.join(", ")}. +Main categories: ${CATEGORIES.join(', ')}. Instructions: 1. Choose up to 2 main categories that best fit the post. @@ -38,28 +40,29 @@ Instructions: Be strict: return only raw JSON with exactly that shape; no code fences or prose.`; - return await callOpenAIWithValidation({ - client: usedClient, - systemPrompt, - userPrompt: post, - schema: CategorizationSchema, - retryCount: 3 - }); + return await callOpenAIWithValidation({ + client: usedClient, + systemPrompt, + userPrompt: post, + schema: CategorizationSchema, + retryCount: 3 + }); } // Named function to run categorization export async function runCategorization() { - const post = "How to maximize yield farming returns safely in DeFi protocols while managing risk exposure. The key is to diversify across multiple platforms and always do thorough research on the smart contracts. Never put all your funds into a single protocol, and always keep some reserves for unexpected market movements."; - const title = await generateTitleForPost(post); - const result = await categorizePost(post); - console.log("Post:", post); - if (title) { - console.log("Title:", title); - } - console.log("Categorization Result:", JSON.stringify(result, null, 2)); + const post = + 'How to maximize yield farming returns safely in DeFi protocols while managing risk exposure. The key is to diversify across multiple platforms and always do thorough research on the smart contracts. Never put all your funds into a single protocol, and always keep some reserves for unexpected market movements.'; + const title = await generateTitleForPost(post); + const result = await categorizePost(post); + console.log('Post:', post); + if (title) { + console.log('Title:', title); + } + console.log('Categorization Result:', JSON.stringify(result, null, 2)); } // Run categorization if this file is executed directly (not imported) if (require.main === module) { - runCategorization(); -} \ No newline at end of file + runCategorization(); +} diff --git a/src/completePostAnalysis.ts b/src/completePostAnalysis.ts index 12025b1..b9b7061 100644 --- a/src/completePostAnalysis.ts +++ b/src/completePostAnalysis.ts @@ -6,144 +6,138 @@ import { generateTitleForPost, type TitleResult } from './generateTitle.js'; // Combined result type for complete post analysis export type PostAnalysisResult = { - post: string; - title: string; - categorization: Categorization; - sentiment: "BULLISH" | "BEARISH" | "NEUTRAL"; - errors?: string[]; + post: string; + title: string; + categorization: Categorization; + sentiment: 'BULLISH' | 'BEARISH' | 'NEUTRAL'; + errors?: string[]; }; // Process a single post with all analysis -export async function analyzeCompletePost( - post: string, - clientOverride?: OpenAI -): Promise { - const usedClient: OpenAI = clientOverride ?? openai; - // Initialize with empty/default values - const result: PostAnalysisResult = { - post, - title: '', - categorization: { categories: [], subcategories: [] }, - sentiment: "NEUTRAL", - errors: [] - }; - - // Generate title - try { - const title = await generateTitleForPost(post, usedClient); - if (title === null) { - throw new Error(`Title generation failed for post: ${post}`); - } - result.title = title; - } catch (e) { - result.errors?.push(`Title generation failed: ${e}`); - console.error("Title generation error:", e); +export async function analyzeCompletePost(post: string, clientOverride?: OpenAI): Promise { + const usedClient: OpenAI = clientOverride ?? openai; + // Initialize with empty/default values + const result: PostAnalysisResult = { + post, + title: '', + categorization: { categories: [], subcategories: [] }, + sentiment: 'NEUTRAL', + errors: [] + }; + + // Generate title + try { + const title = await generateTitleForPost(post, usedClient); + if (title === null) { + throw new Error(`Title generation failed for post: ${post}`); } - - // Categorize post - try { - const categorization = await categorizePost(post, usedClient); - if (categorization) { - result.categorization = categorization; - } - } catch (e) { - result.errors?.push(`Categorization failed: ${e}`); - console.error("Categorization error:", e); + result.title = title; + } catch (e) { + result.errors?.push(`Title generation failed: ${e}`); + console.error('Title generation error:', e); + } + + // Categorize post + try { + const categorization = await categorizePost(post, usedClient); + if (categorization) { + result.categorization = categorization; } - - // Analyze sentiment - try { - const sentimentResults = await analyzeMultiplePosts([post]); - if (sentimentResults.length > 0) { - result.sentiment = sentimentResults[0].sentiment; - } - } catch (e) { - result.errors?.push(`Sentiment analysis failed: ${e}`); - console.error("Sentiment analysis error:", e); + } catch (e) { + result.errors?.push(`Categorization failed: ${e}`); + console.error('Categorization error:', e); + } + + // Analyze sentiment + try { + const sentimentResults = await analyzeMultiplePosts([post]); + if (sentimentResults.length > 0) { + result.sentiment = sentimentResults[0].sentiment; } - - if (result.errors && result.errors.length > 0) { - throw new Error(result.errors.join('; ')); - } - return result; + } catch (e) { + result.errors?.push(`Sentiment analysis failed: ${e}`); + console.error('Sentiment analysis error:', e); + } + + if (result.errors && result.errors.length > 0) { + throw new Error(result.errors.join('; ')); + } + return result; } // Process multiple posts with all analysis export async function analyzeMultipleCompletePosts( - posts: string[], - clientOverride?: OpenAI + posts: string[], + clientOverride?: OpenAI ): Promise { - const results: PostAnalysisResult[] = []; - - for (const post of posts) { - try { - const analysis = await analyzeCompletePost(post, clientOverride); - results.push(analysis); - } catch (e) { - console.error("Error analyzing post:", post, e); - results.push({ - post, - title: '', - categorization: { categories: [], subcategories: [] }, - sentiment: "NEUTRAL", - errors: [`Complete analysis failed: ${e}`] - }); - } + const results: PostAnalysisResult[] = []; + + for (const post of posts) { + try { + const analysis = await analyzeCompletePost(post, clientOverride); + results.push(analysis); + } catch (e) { + console.error('Error analyzing post:', post, e); + results.push({ + post, + title: '', + categorization: { categories: [], subcategories: [] }, + sentiment: 'NEUTRAL', + errors: [`Complete analysis failed: ${e}`] + }); } + } - return results; + return results; } // Utility function to extract post content from JSON data export function extractPostContents(jsonData: any[]): string[] { - return jsonData - .filter(item => item && item.content) - .map(item => item.content); + return jsonData.filter((item) => item && item.content).map((item) => item.content); } // Demo function to run complete analysis on sample data export async function runCompleteAnalysisDemo() { - try { - // Import sample data - const fs = await import('fs'); - const path = await import('path'); - const sampleDataPath = path.resolve('./data/sample_posts.json'); - const sampleDataContent = fs.readFileSync(sampleDataPath, 'utf-8'); - const sampleData = JSON.parse(sampleDataContent); + try { + // Import sample data + const fs = await import('fs'); + const path = await import('path'); + const sampleDataPath = path.resolve('./data/sample_posts.json'); + const sampleDataContent = fs.readFileSync(sampleDataPath, 'utf-8'); + const sampleData = JSON.parse(sampleDataContent); - const posts = extractPostContents(sampleData); + const posts = extractPostContents(sampleData); - console.log(`\n๐Ÿš€ Running complete analysis on ${posts.length} posts...\n`); + console.log(`\n๐Ÿš€ Running complete analysis on ${posts.length} posts...\n`); - const results = await analyzeMultipleCompletePosts(posts); + const results = await analyzeMultipleCompletePosts(posts); - results.forEach((result, index) => { - console.log(`\n๐Ÿ“ Post ${index + 1}:`); - console.log(`Content: "${result.post.substring(0, 100)}${result.post.length > 100 ? '...' : ''}"`); + results.forEach((result, index) => { + console.log(`\n๐Ÿ“ Post ${index + 1}:`); + console.log(`Content: "${result.post.substring(0, 100)}${result.post.length > 100 ? '...' : ''}"`); - if (result.title) { - console.log(`๐Ÿ’ก Title: "${result.title}"`); - } + if (result.title) { + console.log(`๐Ÿ’ก Title: "${result.title}"`); + } - if (result.categorization) { - console.log(`๐Ÿ“‚ Categories: ${result.categorization.categories.join(', ')}`); - console.log(`๐Ÿ“‹ Subcategories: ${result.categorization.subcategories.join(', ')}`); - } + if (result.categorization) { + console.log(`๐Ÿ“‚ Categories: ${result.categorization.categories.join(', ')}`); + console.log(`๐Ÿ“‹ Subcategories: ${result.categorization.subcategories.join(', ')}`); + } - if (result.sentiment) { - console.log(`๐Ÿ“ˆ Sentiment: ${result.sentiment}`); - } + if (result.sentiment) { + console.log(`๐Ÿ“ˆ Sentiment: ${result.sentiment}`); + } - if (result.errors && result.errors.length > 0) { - console.log(`โš ๏ธ Errors: ${result.errors.join('; ')}`); - } + if (result.errors && result.errors.length > 0) { + console.log(`โš ๏ธ Errors: ${result.errors.join('; ')}`); + } - console.log('โ”€'.repeat(50)); - }); + console.log('โ”€'.repeat(50)); + }); - console.log(`\nโœ… Analysis completed for ${results.length} posts`); - - } catch (error) { - console.error("Demo failed:", error); - } + console.log(`\nโœ… Analysis completed for ${results.length} posts`); + } catch (error) { + console.error('Demo failed:', error); + } } diff --git a/src/constants.ts b/src/constants.ts index 3dbd283..d480ff5 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -1,22 +1,22 @@ // src/constants.ts export const CATEGORIES = [ - "Cryptocurrency", - "Blockchain Technology", - "Decentralized Finance (DeFi)", - "Non-Fungible Tokens (NFTs)", - "Trading & Investing", - "Staking & Yield Farming", - "Smart Contracts", - "Security & Privacy", - "Regulation & Compliance", - "Web3 & dApps", - "Artificial Intelligence (AI)", - "Machine Learning (ML)", - "Fintech", - "Infrastructure & Scaling", - "Market Analysis", - "Economics", - "Development & Engineering", - "Research & Innovation" + 'Cryptocurrency', + 'Blockchain Technology', + 'Decentralized Finance (DeFi)', + 'Non-Fungible Tokens (NFTs)', + 'Trading & Investing', + 'Staking & Yield Farming', + 'Smart Contracts', + 'Security & Privacy', + 'Regulation & Compliance', + 'Web3 & dApps', + 'Artificial Intelligence (AI)', + 'Machine Learning (ML)', + 'Fintech', + 'Infrastructure & Scaling', + 'Market Analysis', + 'Economics', + 'Development & Engineering', + 'Research & Innovation' ]; diff --git a/src/embeddingTest.ts b/src/embeddingTest.ts index 7cb0a90..771725c 100644 --- a/src/embeddingTest.ts +++ b/src/embeddingTest.ts @@ -1,8 +1,7 @@ -import openai from "./openaiClient.js"; - +import openai from './openaiClient.js'; export async function getEmbeddingWithRetry(input: string, opts?: { model?: string; maxRetries?: number }) { - const model = opts?.model ?? "text-embedding-3-small"; + const model = opts?.model ?? 'text-embedding-3-small'; const maxRetries = opts?.maxRetries ?? 3; const baseDelay = 500; // ms @@ -15,17 +14,17 @@ export async function getEmbeddingWithRetry(input: string, opts?: { model?: stri const type = err?.error && err.error.type; // If user has exhausted quota, provide guidance and fallback to mock - if (type === "insufficient_quota" || (status === 429 && err?.error?.type === "insufficient_quota")) { - console.error("OpenAI quota exhausted:", err?.error?.message || err); - console.error("Action: check your OpenAI billing & quota or set a different API key in OPENAI_API_KEY."); + if (type === 'insufficient_quota' || (status === 429 && err?.error?.type === 'insufficient_quota')) { + console.error('OpenAI quota exhausted:', err?.error?.message || err); + console.error('Action: check your OpenAI billing & quota or set a different API key in OPENAI_API_KEY.'); throw err; } // Retry on transient rate limits (HTTP 429) - if (status === 429 || type === "rate_limit" || err?.code === "rate_limit") { + if (status === 429 || type === 'rate_limit' || err?.code === 'rate_limit') { console.warn(`OpenAI rate-limited (attempt ${attempt}/${maxRetries}). Retrying...`); if (attempt === maxRetries) { - console.error("Max retries reached for OpenAI. Falling back to mock embedding."); + console.error('Max retries reached for OpenAI. Falling back to mock embedding.'); throw err; } const delay = baseDelay * 2 ** (attempt - 1); @@ -37,5 +36,5 @@ export async function getEmbeddingWithRetry(input: string, opts?: { model?: stri throw err; } } - throw new Error("Failed to get embedding after retries"); + throw new Error('Failed to get embedding after retries'); } diff --git a/src/generateTitle.ts b/src/generateTitle.ts index 9546b9a..b21aee3 100644 --- a/src/generateTitle.ts +++ b/src/generateTitle.ts @@ -5,34 +5,34 @@ import { z } from 'zod'; // Title generation result type export type TitleResult = { - post: string; - title: string; + post: string; + title: string; }; // Zod schema for title validation const TitleSchema = z.object({ - title: z.string().min(5, "Title must be at least 5 characters").max(100, "Title must not exceed 100 characters") + title: z.string().min(5, 'Title must be at least 5 characters').max(100, 'Title must not exceed 100 characters') }); // Sentiment summaries result type export type SentimentSummaries = { - bullishSummary?: string; - bearishSummary?: string; - neutralSummary?: string; + bullishSummary?: string; + bearishSummary?: string; + neutralSummary?: string; }; // Zod schema for sentiment summaries validation const SentimentSummariesSchema = z.object({ - bullishSummary: z.string().min(10).max(500).optional(), - bearishSummary: z.string().min(10).max(500).optional(), - neutralSummary: z.string().min(10).max(500).optional(), + bullishSummary: z.string().min(10).max(500).optional(), + bearishSummary: z.string().min(10).max(500).optional(), + neutralSummary: z.string().min(10).max(500).optional() }); // Generate title for a single post export async function generateTitleForPost(post: string, clientOverride?: OpenAI): Promise { - const usedClient: OpenAI = clientOverride ?? openai; + const usedClient: OpenAI = clientOverride ?? openai; - const systemPrompt = `You are a title generation system for social media posts, particularly finance and tech-related content. + const systemPrompt = `You are a title generation system for social media posts, particularly finance and tech-related content. Instructions: 1. Generate a concise, engaging title that captures the essence of the post @@ -46,30 +46,33 @@ Instructions: Be strict: return only raw JSON with exactly that shape; no code fences or prose.`; - try { - const validated = await callOpenAIWithValidation({ - client: usedClient, - systemPrompt, - userPrompt: post, - schema: TitleSchema, - retryCount: 3 - }); - - if (!validated?.title) { - throw new Error(`Title generation failed for post: ${post}`); - } - return validated.title; - } catch (e) { - console.error("Error generating title for post:", post, e); - throw e; + try { + const validated = await callOpenAIWithValidation({ + client: usedClient, + systemPrompt, + userPrompt: post, + schema: TitleSchema, + retryCount: 3 + }); + + if (!validated?.title) { + throw new Error(`Title generation failed for post: ${post}`); } + return validated.title; + } catch (e) { + console.error('Error generating title for post:', post, e); + throw e; + } } // Generate sentiment-based summaries for a group of posts -export async function generateSentimentSummariesForGroup(posts: any[], clientOverride?: OpenAI): Promise { - const usedClient: OpenAI = clientOverride ?? openai; +export async function generateSentimentSummariesForGroup( + posts: any[], + clientOverride?: OpenAI +): Promise { + const usedClient: OpenAI = clientOverride ?? openai; - const systemPrompt = `You are a sentiment analysis and summary generation system for social media posts, particularly finance and tech-related content. + const systemPrompt = `You are a sentiment analysis and summary generation system for social media posts, particularly finance and tech-related content. Instructions: 1. Analyze the provided posts and their sentiments (BULLISH, BEARISH, NEUTRAL). @@ -86,51 +89,51 @@ Instructions: Be strict: return only raw JSON with exactly that shape; no code fences or prose.`; - const postsJson = JSON.stringify(posts); + const postsJson = JSON.stringify(posts); - try { - const validated = await callOpenAIWithValidation({ - client: usedClient, - systemPrompt, - userPrompt: postsJson, - schema: SentimentSummariesSchema, - retryCount: 3, - maxTokens: 800 - }); - - if (!validated?.bullishSummary && !validated?.bearishSummary && !validated?.neutralSummary) { - throw new Error(`Sentiment summaries generation failed for posts: no summaries returned.`); - } - // Only return summaries for sentiments present in the posts - return { - ...(validated.bullishSummary ? { bullishSummary: validated.bullishSummary } : {}), - ...(validated.bearishSummary ? { bearishSummary: validated.bearishSummary } : {}), - ...(validated.neutralSummary ? { neutralSummary: validated.neutralSummary } : {}) - }; - } catch (e) { - console.error("Error generating sentiment summaries.", { - postCount: Array.isArray(posts) ? posts.length : undefined, - error: e instanceof Error ? e.message : String(e) - }); - throw e; + try { + const validated = await callOpenAIWithValidation({ + client: usedClient, + systemPrompt, + userPrompt: postsJson, + schema: SentimentSummariesSchema, + retryCount: 3, + maxTokens: 800 + }); + + if (!validated?.bullishSummary && !validated?.bearishSummary && !validated?.neutralSummary) { + throw new Error(`Sentiment summaries generation failed for posts: no summaries returned.`); } + // Only return summaries for sentiments present in the posts + return { + ...(validated.bullishSummary ? { bullishSummary: validated.bullishSummary } : {}), + ...(validated.bearishSummary ? { bearishSummary: validated.bearishSummary } : {}), + ...(validated.neutralSummary ? { neutralSummary: validated.neutralSummary } : {}) + }; + } catch (e) { + console.error('Error generating sentiment summaries.', { + postCount: Array.isArray(posts) ? posts.length : undefined, + error: e instanceof Error ? e.message : String(e) + }); + throw e; + } } // Generate titles for multiple posts export async function generateTitlesForPosts(posts: string[]): Promise { - const results: TitleResult[] = []; - - for (const post of posts) { - try { - const title = await generateTitleForPost(post); - if (title) { - results.push({ post, title }); - } - } catch (e) { - console.error("Error generating title for post:", post, e); - continue; - } + const results: TitleResult[] = []; + + for (const post of posts) { + try { + const title = await generateTitleForPost(post); + if (title) { + results.push({ post, title }); + } + } catch (e) { + console.error('Error generating title for post:', post, e); + continue; } + } - return results; + return results; } diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 277e03b..db4b346 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -1,11 +1,7 @@ // Small shared utilities -export const sleep = (ms: number): Promise => - new Promise((resolve) => setTimeout(resolve, ms)); +export const sleep = (ms: number): Promise => new Promise((resolve) => setTimeout(resolve, ms)); // Exponential backoff with small jitter to avoid thundering herd -export const jitteredBackoff = ( - baseMs: number, - attempt: number, - jitterMs: number = 100 -): number => baseMs * 2 ** (attempt - 1) + Math.floor(Math.random() * jitterMs); +export const jitteredBackoff = (baseMs: number, attempt: number, jitterMs: number = 100): number => + baseMs * 2 ** (attempt - 1) + Math.floor(Math.random() * jitterMs); diff --git a/src/openaiClient.ts b/src/openaiClient.ts index f2cee26..e081050 100644 --- a/src/openaiClient.ts +++ b/src/openaiClient.ts @@ -1,15 +1,15 @@ // src/openaiClient.ts -import OpenAI from "openai"; -import dotenv from "dotenv"; +import OpenAI from 'openai'; +import dotenv from 'dotenv'; dotenv.config(); // Fail fast if the API key is missing or empty. const rawKey = process.env.OPENAI_API_KEY; -if (!rawKey || rawKey.trim() === "") { - throw new Error("OPENAI_API_KEY is required. Please set it in your environment."); +if (!rawKey || rawKey.trim() === '') { + throw new Error('OPENAI_API_KEY is required. Please set it in your environment.'); } const openai = new OpenAI({ - apiKey: rawKey, + apiKey: rawKey }); export default openai; diff --git a/src/openaiValidationUtil.ts b/src/openaiValidationUtil.ts index 112fa9a..d3147a5 100644 --- a/src/openaiValidationUtil.ts +++ b/src/openaiValidationUtil.ts @@ -1,5 +1,5 @@ import type OpenAI from 'openai'; -import type { ChatCompletionMessageParam } from "openai/resources"; +import type { ChatCompletionMessageParam } from 'openai/resources'; import { ZodSchema } from 'zod'; import { sleep, jitteredBackoff } from './lib/utils.js'; @@ -8,66 +8,67 @@ import { sleep, jitteredBackoff } from './lib/utils.js'; * Returns the validated result or null if all attempts fail. */ export async function callOpenAIWithValidation(params: { - client: OpenAI, - systemPrompt: string, - userPrompt: string, - schema: ZodSchema, - retryCount?: number, - maxTokens?: number + client: OpenAI; + systemPrompt: string; + userPrompt: string; + schema: ZodSchema; + retryCount?: number; + maxTokens?: number; }): Promise { - let lastError: unknown = null; - const baseDelayMs = 500; - const maxAttempts = params.retryCount ?? 3; + let lastError: unknown = null; + const baseDelayMs = 500; + const maxAttempts = params.retryCount ?? 3; - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - const messages: ChatCompletionMessageParam[] = [ - { role: "system", content: params.systemPrompt }, - { role: "user", content: params.userPrompt } - ]; + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + const messages: ChatCompletionMessageParam[] = [ + { role: 'system', content: params.systemPrompt }, + { role: 'user', content: params.userPrompt } + ]; - // Add feedback about previous failed attempt - if (attempt > 1 && lastError) { - const errorMessage = lastError instanceof Error ? lastError.message : String(lastError); - messages.push({ - role: "user", - content: `Your previous response was invalid: "${errorMessage}". Please provide only valid JSON with the exact format specified.` - }); - } + // Add feedback about previous failed attempt + if (attempt > 1 && lastError) { + const errorMessage = lastError instanceof Error ? lastError.message : String(lastError); + messages.push({ + role: 'user', + content: `Your previous response was invalid: "${errorMessage}". Please provide only valid JSON with the exact format specified.` + }); + } - const chatParams = { - model: "gpt-4o-mini", - messages, - response_format: { type: "json_object" } as const, - max_tokens: params.maxTokens ?? 200 - }; - try { - const response = await params.client.chat.completions.create(chatParams); - let raw: string | undefined; - if ('choices' in response && Array.isArray(response.choices)) { - const content = response.choices[0]?.message?.content; - raw = typeof content === 'string' ? content : undefined; - } - if (!raw) throw new Error('No content returned from OpenAI'); - const parsed = JSON.parse(raw); - return params.schema.parse(parsed); - } catch (err) { - lastError = err; - const anyErr = err as any; - const status = anyErr?.status || anyErr?.code || anyErr?.error?.code; - const type = anyErr?.error?.type || anyErr?.type; - const isRateLimit = status === 429 || type === 'rate_limit'; - const isServerError = typeof status === 'number' && status >= 500; - const isQuota = type === 'insufficient_quota'; - if (isQuota) break; - if ((isRateLimit || isServerError) && attempt < maxAttempts) { - const delay = jitteredBackoff(baseDelayMs, attempt); - console.warn(`OpenAI transient error (attempt ${attempt}/${maxAttempts}). Retrying in ${delay}ms...`); - await sleep(delay); - continue; - } - break; - } + const chatParams = { + model: 'gpt-4o-mini', + messages, + response_format: { type: 'json_object' } as const, + max_tokens: params.maxTokens ?? 200 + }; + try { + const response = await params.client.chat.completions.create(chatParams); + let raw: string | undefined; + if ('choices' in response && Array.isArray(response.choices)) { + const content = response.choices[0]?.message?.content; + raw = typeof content === 'string' ? content : undefined; + } + if (!raw) throw new Error('No content returned from OpenAI'); + const parsed = JSON.parse(raw); + return params.schema.parse(parsed); + } catch (err) { + lastError = err; + const anyErr = err as any; + const status = anyErr?.status || anyErr?.code || anyErr?.error?.code; + const type = anyErr?.error?.type || anyErr?.type; + const isRateLimit = status === 429 || type === 'rate_limit'; + const isServerError = typeof status === 'number' && status >= 500; + const isQuota = type === 'insufficient_quota'; + if (isQuota) break; + if ((isRateLimit || isServerError) && attempt < maxAttempts) { + const delay = jitteredBackoff(baseDelayMs, attempt); + console.warn(`OpenAI transient error (attempt ${attempt}/${maxAttempts}). Retrying in ${delay}ms...`); + await sleep(delay); + continue; + } + break; } - throw new Error(`OpenAI validation failed after ${maxAttempts} attempts. Last error: ${lastError instanceof Error ? lastError.message : String(lastError)}`); + } + throw new Error( + `OpenAI validation failed after ${maxAttempts} attempts. Last error: ${lastError instanceof Error ? lastError.message : String(lastError)}` + ); } - diff --git a/src/postGroup.ts b/src/postGroup.ts index 77dd3b3..7ecbb28 100644 --- a/src/postGroup.ts +++ b/src/postGroup.ts @@ -1,112 +1,108 @@ - import cron from 'node-cron'; import { generateTitleForPost, generateSentimentSummariesForGroup, type SentimentSummaries } from './generateTitle'; import { initRedis, getRedisClient } from './redisClient'; export type Post = { - id: string; - content: string; - sentiment: "BULLISH" | "BEARISH" | "NEUTRAL"; - source: "TWITTER" | "REDDIT" | "YOUTUBE" | "TELEGRAM" | "FARCASTER"; - categories: string[]; - subcategories: string[]; - link?: string; - createdAt: string; - updatedAt: string; + id: string; + content: string; + sentiment: 'BULLISH' | 'BEARISH' | 'NEUTRAL'; + source: 'TWITTER' | 'REDDIT' | 'YOUTUBE' | 'TELEGRAM' | 'FARCASTER'; + categories: string[]; + subcategories: string[]; + link?: string; + createdAt: string; + updatedAt: string; }; export type PostGroup = { - id: string; - posts: Post[]; - title?: string; - bullishSummary?: string; - bearishSummary?: string; - neutralSummary?: string; + id: string; + posts: Post[]; + title?: string; + bullishSummary?: string; + bearishSummary?: string; + neutralSummary?: string; }; // Generate a title for a PostGroup by aggregating its posts' content export async function generateTitleForPostGroup(postGroup: PostGroup): Promise { - const combinedContent = postGroup.posts.map(post => post.content).join('\n\n'); - return await generateTitleForPost(combinedContent); + const combinedContent = postGroup.posts.map((post) => post.content).join('\n\n'); + return await generateTitleForPost(combinedContent); } // Generate sentiment summaries for a PostGroup based on its posts export async function generateSentimentSummariesForPostGroup(postGroup: PostGroup): Promise { - return await generateSentimentSummariesForGroup(postGroup.posts); + return await generateSentimentSummariesForGroup(postGroup.posts); } - // Fetch all PostGroups from Redis (expects a key 'PostGroup' with a JSON array, or adapt as needed) export async function fetchPostGroupsFromRedis(): Promise { - await initRedis(); - const redis = getRedisClient(); - // Adjust the key if you use a different one - const data = await redis.get('post-groups'); - if (!data) return []; - try { - return JSON.parse(data); - } catch (e) { - console.error('Failed to parse post-groups data from Redis:', e); - return []; - } + await initRedis(); + const redis = getRedisClient(); + // Adjust the key if you use a different one + const data = await redis.get('post-groups'); + if (!data) return []; + try { + return JSON.parse(data); + } catch (e) { + console.error('Failed to parse post-groups data from Redis:', e); + return []; + } } - - // Reusable function to generate and log the title and sentiment summaries for all PostGroups from Redis export async function logTitlesForAllPostGroups(context: 'CRON' | 'MANUAL' = 'MANUAL') { - const postGroups = await fetchPostGroupsFromRedis(); - if (!postGroups.length) { - console.log('No PostGroups found in Redis.'); - return; - } - const postGroupsWithOrderedKeys = []; - for (const group of postGroups) { - try { - // Always generate and update title - const title = await generateTitleForPostGroup(group); - // Always generate and update sentiment summaries - const summaries = await generateSentimentSummariesForPostGroup(group); + const postGroups = await fetchPostGroupsFromRedis(); + if (!postGroups.length) { + console.log('No PostGroups found in Redis.'); + return; + } + const postGroupsWithOrderedKeys = []; + for (const group of postGroups) { + try { + // Always generate and update title + const title = await generateTitleForPostGroup(group); + // Always generate and update sentiment summaries + const summaries = await generateSentimentSummariesForPostGroup(group); - if (context === 'CRON') { - console.log(`[CRON] Generated Title for PostGroup (id: ${group.id}) at ${new Date().toISOString()}:`, title); - } else { - console.log(`Title for PostGroup (id: ${group.id}):`, title); - } + if (context === 'CRON') { + console.log(`[CRON] Generated Title for PostGroup (id: ${group.id}) at ${new Date().toISOString()}:`, title); + } else { + console.log(`Title for PostGroup (id: ${group.id}):`, title); + } - postGroupsWithOrderedKeys.push({ - id: group.id, - title, - bullishSummary: summaries.bullishSummary, - bearishSummary: summaries.bearishSummary, - neutralSummary: summaries.neutralSummary, - posts: group.posts - }); - } catch (e) { - if (context === 'CRON') { - console.error(`[CRON] Error generating title/summaries for PostGroup (id: ${group.id}):`, e); - } else { - console.error(`Error generating title/summaries for PostGroup (id: ${group.id}):`, e); - } - } - } - // Always save updated PostGroups with titles and summaries back to Redis - await initRedis(); - const redis = getRedisClient(); - await redis.set('post-groups', JSON.stringify(postGroupsWithOrderedKeys)); - if (context === 'CRON') { - console.log('[CRON] Updated post-groups with titles and sentiment summaries saved to Redis.'); - } else { - console.log('Updated post-groups with titles and sentiment summaries saved to Redis.'); + postGroupsWithOrderedKeys.push({ + id: group.id, + title, + bullishSummary: summaries.bullishSummary, + bearishSummary: summaries.bearishSummary, + neutralSummary: summaries.neutralSummary, + posts: group.posts + }); + } catch (e) { + if (context === 'CRON') { + console.error(`[CRON] Error generating title/summaries for PostGroup (id: ${group.id}):`, e); + } else { + console.error(`Error generating title/summaries for PostGroup (id: ${group.id}):`, e); + } } + } + // Always save updated PostGroups with titles and summaries back to Redis + await initRedis(); + const redis = getRedisClient(); + await redis.set('post-groups', JSON.stringify(postGroupsWithOrderedKeys)); + if (context === 'CRON') { + console.log('[CRON] Updated post-groups with titles and sentiment summaries saved to Redis.'); + } else { + console.log('Updated post-groups with titles and sentiment summaries saved to Redis.'); + } } // Schedule a cron job to generate and log the title for all PostGroups every 6 hours cron.schedule('0 */6 * * *', async () => { - await logTitlesForAllPostGroups('CRON'); + await logTitlesForAllPostGroups('CRON'); }); // If this file is run directly, generate and log the title for all PostGroups if (require.main === module) { - logTitlesForAllPostGroups('MANUAL'); -} \ No newline at end of file + logTitlesForAllPostGroups('MANUAL'); +} diff --git a/src/redisCheck.ts b/src/redisCheck.ts index 67cb9f6..99dbbab 100644 --- a/src/redisCheck.ts +++ b/src/redisCheck.ts @@ -1,12 +1,12 @@ -import { initRedis } from "./redisClient.js"; +import { initRedis } from './redisClient.js'; async function checkRedisSeed() { const client = await initRedis(); try { - const posts = JSON.parse((await client.get("posts")) || "[]"); + const posts = JSON.parse((await client.get('posts')) || '[]'); console.log(JSON.stringify(posts)); } catch (err) { - console.error("Error checking Redis seed:", err); + console.error('Error checking Redis seed:', err); } finally { await client.disconnect(); } diff --git a/src/redisClient.ts b/src/redisClient.ts index 14bc671..ce4905f 100644 --- a/src/redisClient.ts +++ b/src/redisClient.ts @@ -1,21 +1,21 @@ -import dotenv from "dotenv"; -import { createClient, RedisClientType } from "redis"; +import dotenv from 'dotenv'; +import { createClient, RedisClientType } from 'redis'; dotenv.config(); // Prefer IPv4 loopback by default to avoid ::1/IPv6 resolution issues on some systems -const REDIS_URL = (process.env.REDIS_URL || "redis://127.0.0.1:6379").replace("localhost", "127.0.0.1"); +const REDIS_URL = (process.env.REDIS_URL || 'redis://127.0.0.1:6379').replace('localhost', '127.0.0.1'); function safeRedisUrlForLog(url: string) { try { const u = new URL(url); // show protocol, host and port only; hide auth/userinfo - const host = u.hostname || ""; - const port = u.port ? `:${u.port}` : ""; + const host = u.hostname || ''; + const port = u.port ? `:${u.port}` : ''; return `${u.protocol}//${host}${port}`; } catch (e) { // fallback: remove everything between // and @ if present - return url.replace(/\/\/.*@/, "//"); + return url.replace(/\/\/.*@/, '//'); } } @@ -39,8 +39,8 @@ export async function initRedis(): Promise { connecting = (async (): Promise => { const newClient = createClient({ url: REDIS_URL }); - newClient.on("error", (err: unknown) => { - console.error("Redis Client Error:", err); + newClient.on('error', (err: unknown) => { + console.error('Redis Client Error:', err); }); const maxRetries = 5; @@ -63,10 +63,10 @@ export async function initRedis(): Promise { // ignore errors from disconnect // eslint-disable-next-line @typescript-eslint/no-explicit-any if ((newClient as any).isOpen) await newClient.disconnect(); - } catch (e) { } + } catch (e) {} client = null; connecting = null; - console.error("All Redis connection attempts failed."); + console.error('All Redis connection attempts failed.'); throw err; } const delay = baseDelayMs * 2 ** (attempt - 1); @@ -76,7 +76,7 @@ export async function initRedis(): Promise { // unreachable, but satisfy TypeScript connecting = null; - throw new Error("Redis connect failed"); + throw new Error('Redis connect failed'); })(); return connecting; @@ -84,7 +84,7 @@ export async function initRedis(): Promise { export function getRedisClient(): RedisClientType { if (!client || !client.isOpen) { - throw new Error("Redis not initialized or client is closed. Call initRedis() first."); + throw new Error('Redis not initialized or client is closed. Call initRedis() first.'); } return client; diff --git a/src/redisDedupeListener.ts b/src/redisDedupeListener.ts index 4d9ca5f..743c670 100644 --- a/src/redisDedupeListener.ts +++ b/src/redisDedupeListener.ts @@ -1,99 +1,100 @@ -import { initRedis } from "./redisClient.js"; -import { getEmbeddingWithRetry } from "./embeddingTest.js"; +import { initRedis } from './redisClient.js'; +import { getEmbeddingWithRetry } from './embeddingTest.js'; function cosineSimilarity(a: number[], b: number[]): number { - if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length) return 0; - let dot = 0, na = 0, nb = 0; - for (let i = 0; i < a.length; i++) { - const ai = a[i] ?? 0; - const bi = b[i] ?? 0; - dot += ai * bi; - na += ai * ai; - nb += bi * bi; - } - if (na === 0 || nb === 0) return 0; - return dot / (Math.sqrt(na) * Math.sqrt(nb)); + if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length) return 0; + let dot = 0, + na = 0, + nb = 0; + for (let i = 0; i < a.length; i++) { + const ai = a[i] ?? 0; + const bi = b[i] ?? 0; + dot += ai * bi; + na += ai * ai; + nb += bi * bi; + } + if (na === 0 || nb === 0) return 0; + return dot / (Math.sqrt(na) * Math.sqrt(nb)); } - async function main() { - const redisClient = await initRedis(); - const redisSubscriber = redisClient.duplicate(); - await redisSubscriber.connect(); + const redisClient = await initRedis(); + const redisSubscriber = redisClient.duplicate(); + await redisSubscriber.connect(); - // First, let's verify keyspace notifications are enabled - const config = await redisClient.configGet('notify-keyspace-events'); + // First, let's verify keyspace notifications are enabled + const config = await redisClient.configGet('notify-keyspace-events'); - { - const current = config['notify-keyspace-events'] ?? ''; - const required = ['K', 'E', 'A']; - const merged = Array.from(new Set([...current, ...required])).join(''); - if (merged !== current) { - console.log("Merging keyspace notification flags:", merged); - await redisClient.configSet('notify-keyspace-events', merged); - console.log("Keyspace notifications updated."); - } + { + const current = config['notify-keyspace-events'] ?? ''; + const required = ['K', 'E', 'A']; + const merged = Array.from(new Set([...current, ...required])).join(''); + if (merged !== current) { + console.log('Merging keyspace notification flags:', merged); + await redisClient.configSet('notify-keyspace-events', merged); + console.log('Keyspace notifications updated.'); } + } - - // Also try keyspace pattern (different format) - await redisSubscriber.pSubscribe("__keyspace@0__:posts", async (message, pattern) => { - - const lockKey = "dedupe:lock:posts"; - const gotLock = await redisClient.set(lockKey, "1", { NX: true, PX: 10000 }); - if (!gotLock) return; + // Also try keyspace pattern (different format) + await redisSubscriber.pSubscribe('__keyspace@0__:posts', async (message, pattern) => { + const lockKey = 'dedupe:lock:posts'; + const gotLock = await redisClient.set(lockKey, '1', { NX: true, PX: 10000 }); + if (!gotLock) return; + try { + if (message === 'set') { + console.log('Posts key was set via keyspace, processing update...'); + const postsRaw = await redisClient.get('posts'); + if (!postsRaw) return; + let posts: unknown; try { - if (message === "set") { - console.log("Posts key was set via keyspace, processing update..."); - const postsRaw = await redisClient.get("posts"); - if (!postsRaw) return; - let posts: unknown; - try { - posts = JSON.parse(postsRaw); - } catch (e) { - console.error("Failed to parse posts JSON:", e); - return; - } - if (!Array.isArray(posts)) { - console.warn("Expected posts array; got:", typeof posts); - return; - } - // ...existing deduplication logic... - const latestPost = posts[posts.length - 1]; // Assume last is new - const latestCacheKey = `emb:${latestPost.id}`; - let latestEmbedding = await redisClient.get(latestCacheKey).then((s: any) => s ? JSON.parse(s) : null); - if (!latestEmbedding) { - latestEmbedding = await getEmbeddingWithRetry(latestPost.content); - await redisClient.set(latestCacheKey, JSON.stringify(latestEmbedding), { EX: 86400 }); - } - console.log(latestEmbedding); + posts = JSON.parse(postsRaw); + } catch (e) { + console.error('Failed to parse posts JSON:', e); + return; + } + if (!Array.isArray(posts)) { + console.warn('Expected posts array; got:', typeof posts); + return; + } + // ...existing deduplication logic... + const latestPost = posts[posts.length - 1]; // Assume last is new + const latestCacheKey = `emb:${latestPost.id}`; + let latestEmbedding = await redisClient.get(latestCacheKey).then((s: any) => (s ? JSON.parse(s) : null)); + if (!latestEmbedding) { + latestEmbedding = await getEmbeddingWithRetry(latestPost.content); + await redisClient.set(latestCacheKey, JSON.stringify(latestEmbedding), { EX: 86400 }); + } + console.log(latestEmbedding); - for (let i = 0; i < posts.length - 1; i++) { - const otherPost = posts[i]; - const otherCacheKey = `emb:${otherPost.id}`; - let otherEmbedding = await redisClient.get(otherCacheKey).then((s: any) => s ? JSON.parse(s) : null); - if (!otherEmbedding) { - otherEmbedding = await getEmbeddingWithRetry(otherPost.content); - await redisClient.set(otherCacheKey, JSON.stringify(otherEmbedding), { EX: 86400 }); - } - const similarity = cosineSimilarity(latestEmbedding, otherEmbedding); - if (similarity > 0.95) { - console.log(`Duplicate detected: ${latestPost.id} is similar to post ${otherPost.id}`); - // Remove the duplicate post (latestPost) from the array - const updatedPosts = posts.filter((p: any) => p.id !== latestPost.id); - await redisClient.set("posts", JSON.stringify(updatedPosts)); - console.log(`Removed duplicate post ${latestPost.id} from Redis.`); - break; - } - console.log("similarity:", similarity.toFixed(4)); - } - } - } finally { - try { await redisClient.del(lockKey); } catch { } + for (let i = 0; i < posts.length - 1; i++) { + const otherPost = posts[i]; + const otherCacheKey = `emb:${otherPost.id}`; + let otherEmbedding = await redisClient.get(otherCacheKey).then((s: any) => (s ? JSON.parse(s) : null)); + if (!otherEmbedding) { + otherEmbedding = await getEmbeddingWithRetry(otherPost.content); + await redisClient.set(otherCacheKey, JSON.stringify(otherEmbedding), { EX: 86400 }); + } + const similarity = cosineSimilarity(latestEmbedding, otherEmbedding); + if (similarity > 0.95) { + console.log(`Duplicate detected: ${latestPost.id} is similar to post ${otherPost.id}`); + // Remove the duplicate post (latestPost) from the array + const updatedPosts = posts.filter((p: any) => p.id !== latestPost.id); + await redisClient.set('posts', JSON.stringify(updatedPosts)); + console.log(`Removed duplicate post ${latestPost.id} from Redis.`); + break; + } + console.log('similarity:', similarity.toFixed(4)); } - }); + } + } finally { + try { + await redisClient.del(lockKey); + } catch {} + } + }); - console.log("Listening for changes to posts array and deduping using embeddings..."); + console.log('Listening for changes to posts array and deduping using embeddings...'); } -main().catch(console.error); \ No newline at end of file +main().catch(console.error); diff --git a/src/redisSeed.ts b/src/redisSeed.ts index 12c3367..4f66fd5 100644 --- a/src/redisSeed.ts +++ b/src/redisSeed.ts @@ -1,6 +1,6 @@ -import { initRedis } from "./redisClient.js"; -import * as fs from "fs"; -import * as path from "path"; +import { initRedis } from './redisClient.js'; +import * as fs from 'fs'; +import * as path from 'path'; type InputPost = { id: string; @@ -14,21 +14,21 @@ async function seedRedis() { const redisClient = await initRedis(); // Try to read user-provided input from data/sample_posts.json (project root) - const dataPath = path.resolve(process.cwd(), "data", "sample_posts.json"); + const dataPath = path.resolve(process.cwd(), 'data', 'sample_posts.json'); let inputPosts: InputPost[] = []; if (fs.existsSync(dataPath)) { try { - const raw = fs.readFileSync(dataPath, "utf8"); + const raw = fs.readFileSync(dataPath, 'utf8'); inputPosts = JSON.parse(raw); console.log(`Loaded ${inputPosts.length} posts from data/sample_posts.json`); } catch (err) { - console.error("Failed to parse data/sample_posts.json, falling back to bundled sample:", err); + console.error('Failed to parse data/sample_posts.json, falling back to bundled sample:', err); } } try { - await redisClient.set("posts", JSON.stringify(inputPosts)); + await redisClient.set('posts', JSON.stringify(inputPosts)); console.log(`Redis seeding done! Seeded ${inputPosts.length} posts.`); } finally { // Always attempt to close the client. If disconnect fails, surface the error @@ -36,7 +36,7 @@ async function seedRedis() { try { await redisClient.quit(); } catch (err) { - console.error("Failed to disconnect Redis client:", err); + console.error('Failed to disconnect Redis client:', err); // rethrow so callers see the original failure if needed throw err; } diff --git a/src/redisTest.ts b/src/redisTest.ts index 9153e86..204f18e 100644 --- a/src/redisTest.ts +++ b/src/redisTest.ts @@ -1,10 +1,10 @@ -import { initRedis, getRedisClient } from "./redisClient.js"; +import { initRedis, getRedisClient } from './redisClient.js'; async function testRedis() { const client = await initRedis(); const pong = await client.ping(); - console.log("Ping Response:", pong); // Output: "PONG" means connection success + console.log('Ping Response:', pong); // Output: "PONG" means connection success await client.disconnect(); } diff --git a/src/seed.ts b/src/seed.ts index f262e39..f6ffb8e 100644 --- a/src/seed.ts +++ b/src/seed.ts @@ -4,7 +4,7 @@ import { seedDatabase, clearDatabase, verifySeeding } from './seedDatabase'; /** * Command-line script to seed the Upstash Redis database with mock data. - * + * * Usage: * npx tsx src/seed.ts # Seed the database * npx tsx src/seed.ts clear # Clear all PostGroup data @@ -13,48 +13,47 @@ import { seedDatabase, clearDatabase, verifySeeding } from './seedDatabase'; */ async function main() { - const args = process.argv.slice(2); - const command = args[0]?.toLowerCase(); - - try { - switch (command) { - case 'clear': - await clearDatabase(); - break; - - case 'verify': - await verifySeeding(); - break; - - case '--help': - case '-h': - case 'help': - showHelp(); - break; - - case undefined: - // Default action: seed the database - await seedDatabase(); - await verifySeeding(); // Verify after seeding - break; - - default: - console.error(`โŒ Unknown command: ${command}`); - showHelp(); - process.exit(1); - } - - console.log('โœจ Operation completed successfully!'); - process.exit(0); - - } catch (error) { - console.error('๐Ÿ’ฅ Operation failed:', error); + const args = process.argv.slice(2); + const command = args[0]?.toLowerCase(); + + try { + switch (command) { + case 'clear': + await clearDatabase(); + break; + + case 'verify': + await verifySeeding(); + break; + + case '--help': + case '-h': + case 'help': + showHelp(); + break; + + case undefined: + // Default action: seed the database + await seedDatabase(); + await verifySeeding(); // Verify after seeding + break; + + default: + console.error(`โŒ Unknown command: ${command}`); + showHelp(); process.exit(1); } + + console.log('โœจ Operation completed successfully!'); + process.exit(0); + } catch (error) { + console.error('๐Ÿ’ฅ Operation failed:', error); + process.exit(1); + } } function showHelp() { - console.log(` + console.log(` ๐ŸŒฑ Database Seeding Script Usage: @@ -81,5 +80,5 @@ Environment Requirements: // Run the script if (require.main === module) { - main(); -} \ No newline at end of file + main(); +} diff --git a/src/seedData.ts b/src/seedData.ts index 6d36e3e..c836481 100644 --- a/src/seedData.ts +++ b/src/seedData.ts @@ -1,60 +1,42 @@ import { PostGroup } from './postGroup'; export const seedData: PostGroup[] = [ - { - "id": "group1", - "posts": [ - { - "id": "post1", - "content": "Empery Digital\n@EMPD_BTC\nยท\n11m\nBitcoin Firsts that changed everything:\n- $4B Pizza\n- A nation bets on BTC\n- Wall Street embraces it\n- The Trillion-Dollar Club\nFrom a pizza order to reshaping global finance.\n#Bitcoin #BTC #Blockchain #EmperyDigital", - "sentiment": "BULLISH", - "source": "TWITTER", - "categories": [ - "Cryptocurrency", - "Market Analysis" - ], - "subcategories": [ - "Bitcoin", - "Milestones", - "Adoption" - ], - "createdAt": "2025-09-16T12:00:00.000Z", - "updatedAt": "2025-09-16T12:00:00.000Z" - }, - { - "id": "post2", - "content": "Empery Digital\n@EMPD_BTC\nยท\n11m\nSome notable events in Bitcoin's history include:\n- The purchase of pizza with Bitcoin\n- A country adopting BTC\n- Increased interest from Wall Street\n- Joining the Trillion-Dollar Club\nThese milestones reflect Bitcoin's evolving role in finance.\n#Bitcoin #BTC #Blockchain #EmperyDigital", - "sentiment": "NEUTRAL", - "source": "TWITTER", - "categories": [ - "Cryptocurrency", - "Market Analysis" - ], - "subcategories": [ - "Bitcoin", - "Milestones", - "Adoption" - ], - "createdAt": "2025-09-16T12:00:00.000Z", - "updatedAt": "2025-09-16T12:00:00.000Z" - }, - { - "id": "post3", - "content": "Empery Digital\n@EMPD_BTC\nยท\n11m\nRecent events in Bitcoin's history have raised concerns:\n- The infamous $4B pizza purchase\n- A nation risking its economy on BTC\n- Wall Street's speculative involvement\n- Entering the Trillion-Dollar Club amid volatility\nFrom a simple transaction to ongoing financial uncertainty.\n#Bitcoin #BTC #Blockchain #EmperyDigital", - "sentiment": "BEARISH", - "source": "TWITTER", - "categories": [ - "Cryptocurrency", - "Market Analysis" - ], - "subcategories": [ - "Bitcoin", - "Milestones", - "Risks" - ], - "createdAt": "2025-09-16T12:00:00.000Z", - "updatedAt": "2025-09-16T12:00:00.000Z" - } - ] - } -]; \ No newline at end of file + { + id: 'group1', + posts: [ + { + id: 'post1', + content: + 'Empery Digital\n@EMPD_BTC\nยท\n11m\nBitcoin Firsts that changed everything:\n- $4B Pizza\n- A nation bets on BTC\n- Wall Street embraces it\n- The Trillion-Dollar Club\nFrom a pizza order to reshaping global finance.\n#Bitcoin #BTC #Blockchain #EmperyDigital', + sentiment: 'BULLISH', + source: 'TWITTER', + categories: ['Cryptocurrency', 'Market Analysis'], + subcategories: ['Bitcoin', 'Milestones', 'Adoption'], + createdAt: '2025-09-16T12:00:00.000Z', + updatedAt: '2025-09-16T12:00:00.000Z' + }, + { + id: 'post2', + content: + "Empery Digital\n@EMPD_BTC\nยท\n11m\nSome notable events in Bitcoin's history include:\n- The purchase of pizza with Bitcoin\n- A country adopting BTC\n- Increased interest from Wall Street\n- Joining the Trillion-Dollar Club\nThese milestones reflect Bitcoin's evolving role in finance.\n#Bitcoin #BTC #Blockchain #EmperyDigital", + sentiment: 'NEUTRAL', + source: 'TWITTER', + categories: ['Cryptocurrency', 'Market Analysis'], + subcategories: ['Bitcoin', 'Milestones', 'Adoption'], + createdAt: '2025-09-16T12:00:00.000Z', + updatedAt: '2025-09-16T12:00:00.000Z' + }, + { + id: 'post3', + content: + "Empery Digital\n@EMPD_BTC\nยท\n11m\nRecent events in Bitcoin's history have raised concerns:\n- The infamous $4B pizza purchase\n- A nation risking its economy on BTC\n- Wall Street's speculative involvement\n- Entering the Trillion-Dollar Club amid volatility\nFrom a simple transaction to ongoing financial uncertainty.\n#Bitcoin #BTC #Blockchain #EmperyDigital", + sentiment: 'BEARISH', + source: 'TWITTER', + categories: ['Cryptocurrency', 'Market Analysis'], + subcategories: ['Bitcoin', 'Milestones', 'Risks'], + createdAt: '2025-09-16T12:00:00.000Z', + updatedAt: '2025-09-16T12:00:00.000Z' + } + ] + } +]; diff --git a/src/seedDatabase.ts b/src/seedDatabase.ts index 7e5cc7f..28538e5 100644 --- a/src/seedDatabase.ts +++ b/src/seedDatabase.ts @@ -7,35 +7,34 @@ import { PostGroup } from './postGroup'; * This function will clear existing PostGroup data and populate it with the seed data. */ export async function seedDatabase(): Promise { - try { - console.log('๐ŸŒฑ Starting database seeding...'); - - // Initialize Redis connection - await initRedis(); - const redis = getRedisClient(); - - console.log('โœ… Connected to Redis'); - - // Store the seed data as post-groups data in Redis - // Using the new key structure as required - await redis.set('post-groups', JSON.stringify(seedData)); - - console.log(`โœ… Successfully seeded ${seedData.length} post group(s) to Redis`); - console.log('๐Ÿ“Š Seed data summary:'); - - seedData.forEach((group: PostGroup, index: number) => { - console.log(` Group ${index + 1}: ${group.id} (${group.posts.length} posts)`); - group.posts.forEach((post, postIndex) => { - console.log(` Post ${postIndex + 1}: ${post.id} - ${post.sentiment} (${post.source})`); - }); - }); - - console.log('๐ŸŽ‰ Database seeding completed successfully!'); - - } catch (error) { - console.error('โŒ Error seeding database:', error); - throw error; - } + try { + console.log('๐ŸŒฑ Starting database seeding...'); + + // Initialize Redis connection + await initRedis(); + const redis = getRedisClient(); + + console.log('โœ… Connected to Redis'); + + // Store the seed data as post-groups data in Redis + // Using the new key structure as required + await redis.set('post-groups', JSON.stringify(seedData)); + + console.log(`โœ… Successfully seeded ${seedData.length} post group(s) to Redis`); + console.log('๐Ÿ“Š Seed data summary:'); + + seedData.forEach((group: PostGroup, index: number) => { + console.log(` Group ${index + 1}: ${group.id} (${group.posts.length} posts)`); + group.posts.forEach((post, postIndex) => { + console.log(` Post ${postIndex + 1}: ${post.id} - ${post.sentiment} (${post.source})`); + }); + }); + + console.log('๐ŸŽ‰ Database seeding completed successfully!'); + } catch (error) { + console.error('โŒ Error seeding database:', error); + throw error; + } } /** @@ -43,48 +42,46 @@ export async function seedDatabase(): Promise { * Use with caution - this will delete all existing post groups! */ export async function clearDatabase(): Promise { - try { - console.log('๐Ÿงน Clearing PostGroup data from database...'); - - await initRedis(); - const redis = getRedisClient(); + try { + console.log('๐Ÿงน Clearing PostGroup data from database...'); - await redis.del('post-groups'); + await initRedis(); + const redis = getRedisClient(); - console.log('โœ… Database cleared successfully'); + await redis.del('post-groups'); - } catch (error) { - console.error('โŒ Error clearing database:', error); - throw error; - } + console.log('โœ… Database cleared successfully'); + } catch (error) { + console.error('โŒ Error clearing database:', error); + throw error; + } } /** * Verifies that the seed data was properly stored by retrieving and logging it. */ export async function verifySeeding(): Promise { - try { - console.log('๐Ÿ” Verifying seeded data...'); - - await initRedis(); - const redis = getRedisClient(); + try { + console.log('๐Ÿ” Verifying seeded data...'); - const data = await redis.get('post-groups'); + await initRedis(); + const redis = getRedisClient(); - if (!data) { - console.log('โŒ No data found in database'); - return; - } + const data = await redis.get('post-groups'); - const postGroups: PostGroup[] = JSON.parse(data); + if (!data) { + console.log('โŒ No data found in database'); + return; + } - console.log(`โœ… Found ${postGroups.length} post group(s) in database:`); - postGroups.forEach((group, index) => { - console.log(` Group ${index + 1}: ${group.id} (${group.posts.length} posts)`); - }); + const postGroups: PostGroup[] = JSON.parse(data); - } catch (error) { - console.error('โŒ Error verifying seeded data:', error); - throw error; - } -} \ No newline at end of file + console.log(`โœ… Found ${postGroups.length} post group(s) in database:`); + postGroups.forEach((group, index) => { + console.log(` Group ${index + 1}: ${group.id} (${group.posts.length} posts)`); + }); + } catch (error) { + console.error('โŒ Error verifying seeded data:', error); + throw error; + } +}