diff --git a/developer-guides/llm-sdks-and-frameworks/anthropic.mdx b/developer-guides/llm-sdks-and-frameworks/anthropic.mdx new file mode 100644 index 0000000..7ff259f --- /dev/null +++ b/developer-guides/llm-sdks-and-frameworks/anthropic.mdx @@ -0,0 +1,162 @@ +--- +title: 'Anthropic' +description: 'Use ScrapeGraphAI with Claude for web scraping + AI workflows' +--- + +> Integrate ScrapeGraphAI with Claude to build AI applications powered by web data. + +## Setup + +```bash +npm install scrapegraph-js @anthropic-ai/sdk zod zod-to-json-schema +``` + +Create `.env` file: + +```bash +SGAI_APIKEY=your_scrapegraph_key +ANTHROPIC_API_KEY=your_anthropic_key +``` + + +If using Node < 20, install `dotenv` and add `import 'dotenv/config'` to your code. + + +## Scrape + Summarize + +This example demonstrates a simple workflow: scrape a website and summarize the content using Claude. + +```typescript +import { smartScraper } from 'scrapegraph-js'; +import Anthropic from '@anthropic-ai/sdk'; + +const apiKey = process.env.SGAI_APIKEY; +const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); + +const scrapeResult = await smartScraper( + apiKey, + 'https://scrapegraphai.com', + 'Extract all content from this page' +); + +console.log('Scraped content length:', JSON.stringify(scrapeResult.result).length); + +const message = await anthropic.messages.create({ + model: 'claude-haiku-4-5', + max_tokens: 1024, + messages: [ + { role: 'user', content: `Summarize in 100 words: ${JSON.stringify(scrapeResult.result)}` } + ] +}); + +console.log('Response:', message); +``` + +## Tool Use + +This example shows how to use Claude's tool use feature to let the model decide when to scrape websites based on user requests. + +```typescript +import { smartScraper } from 'scrapegraph-js'; +import { Anthropic } from '@anthropic-ai/sdk'; +import { z } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; + +const apiKey = process.env.SGAI_APIKEY; +const anthropic = new Anthropic({ + apiKey: process.env.ANTHROPIC_API_KEY +}); + +const ScrapeArgsSchema = z.object({ + url: z.string() +}); + +console.log("Sending user message to Claude and requesting tool use if necessary..."); +const response = await anthropic.messages.create({ + model: 'claude-haiku-4-5', + max_tokens: 1024, + tools: [{ + name: 'scrape_website', + description: 'Scrape and extract structured data from a website URL', + input_schema: zodToJsonSchema(ScrapeArgsSchema, 'ScrapeArgsSchema') as any + }], + messages: [{ + role: 'user', + content: 'What is ScrapeGraphAI? Check scrapegraphai.com' + }] +}); + +const toolUse = response.content.find(block => block.type === 'tool_use'); + +if (toolUse && toolUse.type === 'tool_use') { + const input = toolUse.input as { url: string }; + console.log(`Calling tool: ${toolUse.name} | URL: ${input.url}`); + + const result = await smartScraper( + apiKey, + input.url, + 'Extract all content from this page' + ); + + console.log(`Scraped content preview: ${JSON.stringify(result.result)?.substring(0, 300)}...`); + // Continue with the conversation or process the scraped content as needed +} +``` + +## Structured Extraction + +This example demonstrates how to use Claude to extract structured data from scraped website content. + +```typescript +import { smartScraper } from 'scrapegraph-js'; +import Anthropic from '@anthropic-ai/sdk'; +import { z } from 'zod'; + +const apiKey = process.env.SGAI_APIKEY; +const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); + +const CompanyInfoSchema = z.object({ + name: z.string(), + industry: z.string().optional(), + description: z.string().optional() +}); + +const scrapeResult = await smartScraper( + apiKey, + 'https://stripe.com', + 'Extract all content from this page' +); + +const prompt = `Extract company information from this website content. + +Output ONLY valid JSON in this exact format (no markdown, no explanation): + +{ + "name": "Company Name", + "industry": "Industry", + "description": "One sentence description" +} + +Website content: +${JSON.stringify(scrapeResult.result)}`; + +const message = await anthropic.messages.create({ + model: 'claude-haiku-4-5', + max_tokens: 1024, + messages: [ + { role: 'user', content: prompt }, + { role: 'assistant', content: '{' } + ] +}); + +const textBlock = message.content.find(block => block.type === 'text'); + +if (textBlock && textBlock.type === 'text') { + const jsonText = '{' + textBlock.text; + const companyInfo = CompanyInfoSchema.parse(JSON.parse(jsonText)); + + console.log(companyInfo); +} +``` + +For more examples, check the [Claude documentation](https://docs.anthropic.com/claude/docs). diff --git a/developer-guides/llm-sdks-and-frameworks/gemini.mdx b/developer-guides/llm-sdks-and-frameworks/gemini.mdx new file mode 100644 index 0000000..55a2311 --- /dev/null +++ b/developer-guides/llm-sdks-and-frameworks/gemini.mdx @@ -0,0 +1,131 @@ +--- +title: 'Gemini' +description: 'Use ScrapeGraphAI with Google Gemini AI for web scraping + AI workflows' +--- + +> Integrate ScrapeGraphAI with Google's Gemini for AI applications powered by web data. + +## Setup + +```bash +npm install scrapegraph-js @google/genai +``` + +Create `.env` file: + +```bash +SGAI_APIKEY=your_scrapegraph_key +GEMINI_API_KEY=your_gemini_key +``` + + +If using Node < 20, install `dotenv` and add `import 'dotenv/config'` to your code. + + +## Scrape + Summarize + +This example demonstrates a simple workflow: scrape a website and summarize the content using Gemini. + +```typescript +import { smartScraper } from 'scrapegraph-js'; +import { GoogleGenAI } from '@google/genai'; + +const apiKey = process.env.SGAI_APIKEY; +const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY }); + +const scrapeResult = await smartScraper( + apiKey, + 'https://scrapegraphai.com', + 'Extract all content from this page' +); + +console.log('Scraped content length:', JSON.stringify(scrapeResult.result).length); + +const response = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: `Summarize: ${JSON.stringify(scrapeResult.result)}`, +}); + +console.log('Summary:', response.text); +``` + +## Content Analysis + +This example shows how to analyze website content using Gemini's multi-turn conversation capabilities. + +```typescript +import { smartScraper } from 'scrapegraph-js'; +import { GoogleGenAI } from '@google/genai'; + +const apiKey = process.env.SGAI_APIKEY; +const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY }); + +const scrapeResult = await smartScraper( + apiKey, + 'https://news.ycombinator.com/', + 'Extract all content from this page' +); + +console.log('Scraped content length:', JSON.stringify(scrapeResult.result).length); + +const chat = ai.chats.create({ + model: 'gemini-2.5-flash' +}); + +// Ask for the top 3 stories on Hacker News +const result1 = await chat.sendMessage({ + message: `Based on this website content from Hacker News, what are the top 3 stories right now?\n\n${JSON.stringify(scrapeResult.result)}` +}); +console.log('Top 3 Stories:', result1.text); + +// Ask for the 4th and 5th stories on Hacker News +const result2 = await chat.sendMessage({ + message: `Now, what are the 4th and 5th top stories on Hacker News from the same content?` +}); +console.log('4th and 5th Stories:', result2.text); +``` + +## Structured Extraction + +This example demonstrates how to extract structured data using Gemini's JSON mode from scraped website content. + +```typescript +import { smartScraper } from 'scrapegraph-js'; +import { GoogleGenAI, Type } from '@google/genai'; + +const apiKey = process.env.SGAI_APIKEY; +const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY }); + +const scrapeResult = await smartScraper( + apiKey, + 'https://stripe.com', + 'Extract all content from this page' +); + +console.log('Scraped content length:', JSON.stringify(scrapeResult.result).length); + +const response = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: `Extract company information: ${JSON.stringify(scrapeResult.result)}`, + config: { + responseMimeType: 'application/json', + responseSchema: { + type: Type.OBJECT, + properties: { + name: { type: Type.STRING }, + industry: { type: Type.STRING }, + description: { type: Type.STRING }, + products: { + type: Type.ARRAY, + items: { type: Type.STRING } + } + }, + propertyOrdering: ['name', 'industry', 'description', 'products'] + } + } +}); + +console.log('Extracted company info:', response?.text); +``` + +For more examples, check the [Gemini documentation](https://ai.google.dev/docs). diff --git a/docs.json b/docs.json index bf42925..ff87599 100644 --- a/docs.json +++ b/docs.json @@ -99,6 +99,13 @@ "integrations/x402" ] }, + { + "group": "LLM SDKs & Frameworks", + "pages": [ + "developer-guides/llm-sdks-and-frameworks/gemini", + "developer-guides/llm-sdks-and-frameworks/anthropic" + ] + }, { "group": "Contribute", "pages": [