From 45b2317ab7b3721cec59812c57ec5ca4a8721cd2 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Mon, 29 Apr 2024 11:16:48 +0200 Subject: [PATCH 1/7] add json examples --- .gitignore | 1 - examples/gemini/inputs/example.json | 182 ++++++++++++++++++ .../local_models/Docker/inputs/example.json | 182 ++++++++++++++++++ .../local_models/Ollama/inputs/example.json | 182 ++++++++++++++++++ examples/mixed_models/inputs/example.json | 182 ++++++++++++++++++ examples/openai/inputs/example.json | 182 ++++++++++++++++++ tests/graphs/inputs/example.json | 182 ++++++++++++++++++ 7 files changed, 1092 insertions(+), 1 deletion(-) create mode 100644 examples/gemini/inputs/example.json create mode 100644 examples/local_models/Docker/inputs/example.json create mode 100644 examples/local_models/Ollama/inputs/example.json create mode 100644 examples/mixed_models/inputs/example.json create mode 100644 examples/openai/inputs/example.json create mode 100644 tests/graphs/inputs/example.json diff --git a/.gitignore b/.gitignore index 72f4db8a..16240e6d 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,6 @@ venv/ *.google-cookie examples/graph_examples/ScrapeGraphAI_generated_graph examples/**/*.csv -examples/**/*.json main.py poetry.lock diff --git a/examples/gemini/inputs/example.json b/examples/gemini/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/examples/gemini/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file diff --git a/examples/local_models/Docker/inputs/example.json b/examples/local_models/Docker/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/examples/local_models/Docker/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file diff --git a/examples/local_models/Ollama/inputs/example.json b/examples/local_models/Ollama/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/examples/local_models/Ollama/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file diff --git a/examples/mixed_models/inputs/example.json b/examples/mixed_models/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/examples/mixed_models/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file diff --git a/examples/openai/inputs/example.json b/examples/openai/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/examples/openai/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file diff --git a/tests/graphs/inputs/example.json b/tests/graphs/inputs/example.json new file mode 100644 index 00000000..2263184c --- /dev/null +++ b/tests/graphs/inputs/example.json @@ -0,0 +1,182 @@ +{ + "kind":"youtube#searchListResponse", + "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg", + "nextPageToken":"CAUQAA", + "regionCode":"NL", + "pageInfo":{ + "totalResults":1000000, + "resultsPerPage":5 + }, + "items":[ + { + "kind":"youtube#searchResult", + "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ", + "id":{ + "kind":"youtube#video", + "videoId":"TvWDY4Mm5GM" + }, + "snippet":{ + "publishedAt":"2023-07-24T14:15:01Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T14:15:01Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k", + "id":{ + "kind":"youtube#video", + "videoId":"aZM_42CcNZ4" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:09:27Z", + "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA", + "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰", + "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"John Nellis", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:09:27Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY", + "id":{ + "kind":"youtube#video", + "videoId":"wkP3XS3aNAY" + }, + "snippet":{ + "publishedAt":"2023-07-24T16:00:50Z", + "channelId":"UC4EP1dxFDPup_aFLt0ElsDw", + "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL", + "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Shoot for Love", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T16:00:50Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8", + "id":{ + "kind":"youtube#video", + "videoId":"rJkDZ0WvfT8" + }, + "snippet":{ + "publishedAt":"2023-07-24T10:00:39Z", + "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ", + "title":"TOP 10 DEFENDERS 2023", + "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"Home of Football", + "liveBroadcastContent":"none", + "publishTime":"2023-07-24T10:00:39Z" + } + }, + { + "kind":"youtube#searchResult", + "etag":"wtuknXTmI1txoULeH3aWaOuXOow", + "id":{ + "kind":"youtube#video", + "videoId":"XH0rtu4U6SE" + }, + "snippet":{ + "publishedAt":"2023-07-21T16:30:05Z", + "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ", + "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts", + "description":"", + "thumbnails":{ + "default":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg", + "width":120, + "height":90 + }, + "medium":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg", + "width":320, + "height":180 + }, + "high":{ + "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg", + "width":480, + "height":360 + } + }, + "channelTitle":"FC Motivate", + "liveBroadcastContent":"none", + "publishTime":"2023-07-21T16:30:05Z" + } + } + ] +} \ No newline at end of file From 674e64222e41cfcbae01bf54ec82e4d8efdb469f Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Mon, 29 Apr 2024 15:55:21 +0200 Subject: [PATCH 2/7] add first new graphs --- scrapegraphai/graphs/__init__.py | 2 + scrapegraphai/graphs/json_scraper_graph.py | 77 ++++++++++++++++++++++ scrapegraphai/graphs/xml_scraper_graph.py | 77 ++++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 scrapegraphai/graphs/json_scraper_graph.py create mode 100644 scrapegraphai/graphs/xml_scraper_graph.py diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py index a8ee6ac5..b7fbcef7 100644 --- a/scrapegraphai/graphs/__init__.py +++ b/scrapegraphai/graphs/__init__.py @@ -6,3 +6,5 @@ from .speech_graph import SpeechGraph from .search_graph import SearchGraph from .script_creator_graph import ScriptCreatorGraph +from .xml_scraper_graph import XmlScraperGraph +from .json_scraper_graph import JsonScraperGraph diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py new file mode 100644 index 00000000..7f24da6d --- /dev/null +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -0,0 +1,77 @@ +""" +Module for creating the smart scraper +""" +from .base_graph import BaseGraph +from ..nodes import ( + FetchNode, + ParseNode, + RAGNode, + GenerateAnswerNode +) +from .abstract_graph import AbstractGraph + + +class JsonScraperGraph(AbstractGraph): + """ + SmartScraper is a comprehensive web scraping tool that automates the process of extracting + information from web pages using a natural language model to interpret and answer prompts. + """ + + def __init__(self, prompt: str, source: str, config: dict): + """ + Initializes the JsonScraperGraph with a prompt, source, and configuration. + """ + super().__init__(prompt, config, source) + + self.input_key = "url" if source.startswith("http") else "local_dir" + + def _create_graph(self): + """ + Creates the graph of nodes representing the workflow for web scraping. + """ + fetch_node = FetchNode( + input="url | local_dir", + output=["doc"], + ) + parse_node = ParseNode( + input="doc", + output=["parsed_doc"], + node_config={"chunk_size": self.model_token} + ) + rag_node = RAGNode( + input="user_prompt & (parsed_doc | doc)", + output=["relevant_chunks"], + node_config={ + "llm": self.llm_model, + "embedder_model": self.embedder_model + } + ) + generate_answer_node = GenerateAnswerNode( + input="user_prompt & (relevant_chunks | parsed_doc | doc)", + output=["answer"], + node_config={"llm": self.llm_model}, + ) + + return BaseGraph( + nodes=[ + fetch_node, + parse_node, + rag_node, + generate_answer_node, + ], + edges=[ + (fetch_node, parse_node), + (parse_node, rag_node), + (rag_node, generate_answer_node) + ], + entry_point=fetch_node + ) + + def run(self) -> str: + """ + Executes the web scraping process and returns the answer to the prompt. + """ + inputs = {"user_prompt": self.prompt, self.input_key: self.source} + self.final_state, self.execution_info = self.graph.execute(inputs) + + return self.final_state.get("answer", "No answer found.") diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py new file mode 100644 index 00000000..5b1e3282 --- /dev/null +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -0,0 +1,77 @@ +""" +Module for creating the smart scraper +""" +from .base_graph import BaseGraph +from ..nodes import ( + FetchNode, + ParseNode, + RAGNode, + GenerateAnswerNode +) +from .abstract_graph import AbstractGraph + + +class XmlScraperGraph(AbstractGraph): + """ + SmartScraper is a comprehensive web scraping tool that automates the process of extracting + information from web pages using a natural language model to interpret and answer prompts. + """ + + def __init__(self, prompt: str, source: str, config: dict): + """ + Initializes the XmlScraperGraph with a prompt, source, and configuration. + """ + super().__init__(prompt, config, source) + + self.input_key = "url" if source.startswith("http") else "local_dir" + + def _create_graph(self): + """ + Creates the graph of nodes representing the workflow for web scraping. + """ + fetch_node = FetchNode( + input="url | local_dir", + output=["doc"], + ) + parse_node = ParseNode( + input="doc", + output=["parsed_doc"], + node_config={"chunk_size": self.model_token} + ) + rag_node = RAGNode( + input="user_prompt & (parsed_doc | doc)", + output=["relevant_chunks"], + node_config={ + "llm": self.llm_model, + "embedder_model": self.embedder_model + } + ) + generate_answer_node = GenerateAnswerNode( + input="user_prompt & (relevant_chunks | parsed_doc | doc)", + output=["answer"], + node_config={"llm": self.llm_model}, + ) + + return BaseGraph( + nodes=[ + fetch_node, + parse_node, + rag_node, + generate_answer_node, + ], + edges=[ + (fetch_node, parse_node), + (parse_node, rag_node), + (rag_node, generate_answer_node) + ], + entry_point=fetch_node + ) + + def run(self) -> str: + """ + Executes the web scraping process and returns the answer to the prompt. + """ + inputs = {"user_prompt": self.prompt, self.input_key: self.source} + self.final_state, self.execution_info = self.graph.execute(inputs) + + return self.final_state.get("answer", "No answer found.") From 3eacc6fbc372b5b26c46afa5c2b0fac4db1368ab Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Mon, 29 Apr 2024 16:07:11 +0200 Subject: [PATCH 3/7] add paths --- scrapegraphai/graphs/json_scraper_graph.py | 2 +- scrapegraphai/graphs/xml_scraper_graph.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index 7f24da6d..456f5e56 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -30,7 +30,7 @@ def _create_graph(self): Creates the graph of nodes representing the workflow for web scraping. """ fetch_node = FetchNode( - input="url | local_dir", + input="json | json_dir", output=["doc"], ) parse_node = ParseNode( diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index 5b1e3282..07e407ce 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -30,7 +30,7 @@ def _create_graph(self): Creates the graph of nodes representing the workflow for web scraping. """ fetch_node = FetchNode( - input="url | local_dir", + input="xml | xml_dir", output=["doc"], ) parse_node = ParseNode( From deb920a33ec08a212c1ad81d0d100dc2d9f0bf85 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Mon, 29 Apr 2024 16:11:57 +0200 Subject: [PATCH 4/7] fixing json and example --- examples/openai/result.json | 1 + scrapegraphai/graphs/json_scraper_graph.py | 2 +- scrapegraphai/graphs/xml_scraper_graph.py | 2 +- scrapegraphai/nodes/fetch_node.py | 7 +++++-- 4 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 examples/openai/result.json diff --git a/examples/openai/result.json b/examples/openai/result.json new file mode 100644 index 00000000..b5e82a18 --- /dev/null +++ b/examples/openai/result.json @@ -0,0 +1 @@ +{"books": [{"title": "XML Developer's Guide", "author": "Unknown", "genre": "Unknown"}]} \ No newline at end of file diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index 456f5e56..d6287d2b 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -30,7 +30,7 @@ def _create_graph(self): Creates the graph of nodes representing the workflow for web scraping. """ fetch_node = FetchNode( - input="json | json_dir", + input="json_dir", output=["doc"], ) parse_node = ParseNode( diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index 07e407ce..1661113d 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -30,7 +30,7 @@ def _create_graph(self): Creates the graph of nodes representing the workflow for web scraping. """ fetch_node = FetchNode( - input="xml | xml_dir", + input="xml_dir", output=["doc"], ) parse_node = ParseNode( diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 76d80929..9b6cb52f 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -70,9 +70,12 @@ def execute(self, state): input_data = [state[key] for key in input_keys] source = input_data[0] - + if self.input == "json_dir" or self.input == "xml_dir": + compressed_document = [Document(page_content=source, metadata={ + "source": "local_dir" + })] # if it is a local directory - if not source.startswith("http"): + elif not source.startswith("http"): compressed_document = [Document(page_content=remover(source), metadata={ "source": "local_dir" })] From f8917320b33fa5256706b32994cae3043493d421 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Mon, 29 Apr 2024 16:15:24 +0200 Subject: [PATCH 5/7] add xml_example --- examples/openai/result.json | 1 - examples/openai/scrape_xml_openai.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 examples/openai/result.json diff --git a/examples/openai/result.json b/examples/openai/result.json deleted file mode 100644 index b5e82a18..00000000 --- a/examples/openai/result.json +++ /dev/null @@ -1 +0,0 @@ -{"books": [{"title": "XML Developer's Guide", "author": "Unknown", "genre": "Unknown"}]} \ No newline at end of file diff --git a/examples/openai/scrape_xml_openai.py b/examples/openai/scrape_xml_openai.py index 854c5422..402277f3 100644 --- a/examples/openai/scrape_xml_openai.py +++ b/examples/openai/scrape_xml_openai.py @@ -4,7 +4,7 @@ import os from dotenv import load_dotenv -from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.graphs import XmlScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,10 +33,10 @@ } # ************************************************ -# Create the SmartScraperGraph instance and run it +# Create the XmlScraperGraph instance and run it # ************************************************ -smart_scraper_graph = SmartScraperGraph( +smart_scraper_graph = XmlScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config From da2c82a2a2d87c14744cff321b43d8750e7e77c7 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Tue, 30 Apr 2024 10:52:22 +0200 Subject: [PATCH 6/7] add json and xml scraper --- examples/gemini/json_scraper_gemini.py | 57 +++++++++++++++++ examples/gemini/script_generator_gemini.py | 2 +- .../xml_scraper_openai.py} | 4 +- .../Docker/json_scraper_docker.py | 61 ++++++++++++++++++ .../local_models/Docker/xml_scraper_docker.py | 61 ++++++++++++++++++ .../Ollama/json_scraper_ollama.py | 63 +++++++++++++++++++ .../local_models/Ollama/xml_scraper_ollama.py | 63 +++++++++++++++++++ examples/openai/json_scraper_openai.py | 57 +++++++++++++++++ examples/openai/xml_scraper_openai.py | 57 +++++++++++++++++ scrapegraphai/graphs/json_scraper_graph.py | 2 +- scrapegraphai/graphs/xml_scraper_graph.py | 2 +- 11 files changed, 424 insertions(+), 5 deletions(-) create mode 100644 examples/gemini/json_scraper_gemini.py rename examples/{openai/scrape_xml_openai.py => gemini/xml_scraper_openai.py} (93%) create mode 100644 examples/local_models/Docker/json_scraper_docker.py create mode 100644 examples/local_models/Docker/xml_scraper_docker.py create mode 100644 examples/local_models/Ollama/json_scraper_ollama.py create mode 100644 examples/local_models/Ollama/xml_scraper_ollama.py create mode 100644 examples/openai/json_scraper_openai.py create mode 100644 examples/openai/xml_scraper_openai.py diff --git a/examples/gemini/json_scraper_gemini.py b/examples/gemini/json_scraper_gemini.py new file mode 100644 index 00000000..52592dd5 --- /dev/null +++ b/examples/gemini/json_scraper_gemini.py @@ -0,0 +1,57 @@ +""" +Basic example of scraping pipeline using SmartScraper from JSON documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the JSON file +# ************************************************ + +FILE_NAME = "inputs/example.json" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "gemini-pro", + }, +} + +# ************************************************ +# Create the JsonScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = JsonScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/gemini/script_generator_gemini.py b/examples/gemini/script_generator_gemini.py index c07acc37..21459f6c 100644 --- a/examples/gemini/script_generator_gemini.py +++ b/examples/gemini/script_generator_gemini.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": gemini_key, - "model": "gpt-3.5-turbo", + "model": "gemini-pro", }, "library": "beautifoulsoup" } diff --git a/examples/openai/scrape_xml_openai.py b/examples/gemini/xml_scraper_openai.py similarity index 93% rename from examples/openai/scrape_xml_openai.py rename to examples/gemini/xml_scraper_openai.py index 402277f3..fde8246e 100644 --- a/examples/openai/scrape_xml_openai.py +++ b/examples/gemini/xml_scraper_openai.py @@ -1,5 +1,5 @@ """ -Basic example of scraping pipeline using SmartScraper from XML documents +Basic example of scraping pipeline using XmlScraperGraph from XML documents """ import os @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "gemini-pro", }, } diff --git a/examples/local_models/Docker/json_scraper_docker.py b/examples/local_models/Docker/json_scraper_docker.py new file mode 100644 index 00000000..f94cfa4d --- /dev/null +++ b/examples/local_models/Docker/json_scraper_docker.py @@ -0,0 +1,61 @@ +""" +Basic example of scraping pipeline using JsonScraperGraph from JSON documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the JSON file +# ************************************************ + +FILE_NAME = "inputs/example.json" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + } +} + +# ************************************************ +# Create the JsonScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = JsonScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local_models/Docker/xml_scraper_docker.py b/examples/local_models/Docker/xml_scraper_docker.py new file mode 100644 index 00000000..c52a7d0d --- /dev/null +++ b/examples/local_models/Docker/xml_scraper_docker.py @@ -0,0 +1,61 @@ +""" +Basic example of scraping pipeline using XmlScraperGraph from XML documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the XML file +# ************************************************ + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + } +} + +# ************************************************ +# Create the XmlScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = XmlScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local_models/Ollama/json_scraper_ollama.py b/examples/local_models/Ollama/json_scraper_ollama.py new file mode 100644 index 00000000..42d38753 --- /dev/null +++ b/examples/local_models/Ollama/json_scraper_ollama.py @@ -0,0 +1,63 @@ +""" +Basic example of scraping pipeline using JsonScraperGraph from JSON documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the JSON file +# ************************************************ + +FILE_NAME = "inputs/example.json" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily + "base_url": "http://localhost:11434", + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", + } +} + +# ************************************************ +# Create the XmlScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = JsonScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local_models/Ollama/xml_scraper_ollama.py b/examples/local_models/Ollama/xml_scraper_ollama.py new file mode 100644 index 00000000..46f2564d --- /dev/null +++ b/examples/local_models/Ollama/xml_scraper_ollama.py @@ -0,0 +1,63 @@ +""" +Basic example of scraping pipeline using XmlScraperGraph from XML documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the XML file +# ************************************************ + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily + "base_url": "http://localhost:11434", + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", + } +} + +# ************************************************ +# Create the XmlScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = XmlScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/openai/json_scraper_openai.py b/examples/openai/json_scraper_openai.py new file mode 100644 index 00000000..b52f9359 --- /dev/null +++ b/examples/openai/json_scraper_openai.py @@ -0,0 +1,57 @@ +""" +Basic example of scraping pipeline using JsonScraperGraph from JSON documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the JSON file +# ************************************************ + +FILE_NAME = "inputs/example.json" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +openai_key = os.getenv("OPENAI_APIKEY") + +graph_config = { + "llm": { + "api_key": openai_key, + "model": "gpt-3.5-turbo", + }, +} + +# ************************************************ +# Create the XmlScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = JsonScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/openai/xml_scraper_openai.py b/examples/openai/xml_scraper_openai.py new file mode 100644 index 00000000..7057de2b --- /dev/null +++ b/examples/openai/xml_scraper_openai.py @@ -0,0 +1,57 @@ +""" +Basic example of scraping pipeline using XmlScraperGraph from XML documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the XML file +# ************************************************ + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "gpt-3.5-turbo", + }, +} + +# ************************************************ +# Create the XmlScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = XmlScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# ************************************************ +# Get graph execution info +# ************************************************ + +graph_exec_info = smart_scraper_graph.get_execution_info() +print(prettify_exec_info(graph_exec_info)) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index d6287d2b..b6a11179 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -23,7 +23,7 @@ def __init__(self, prompt: str, source: str, config: dict): """ super().__init__(prompt, config, source) - self.input_key = "url" if source.startswith("http") else "local_dir" + self.input_key = "json" if source.endswith("json") else "json_dir" def _create_graph(self): """ diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index 1661113d..f8a697bd 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -23,7 +23,7 @@ def __init__(self, prompt: str, source: str, config: dict): """ super().__init__(prompt, config, source) - self.input_key = "url" if source.startswith("http") else "local_dir" + self.input_key = "xml" if source.endswith("xml") else "xml_dir" def _create_graph(self): """ From 8fba7e5490f916b325588443bba3fff5c0733c17 Mon Sep 17 00:00:00 2001 From: "EURAC\\marperini" Date: Tue, 30 Apr 2024 14:28:30 +0200 Subject: [PATCH 7/7] feat(refactor): changed variable names --- examples/gemini/json_scraper_gemini.py | 12 ++++++------ examples/gemini/xml_scraper_openai.py | 12 ++++++------ examples/local_models/Docker/json_scraper_docker.py | 12 ++++++------ examples/local_models/Docker/xml_scraper_docker.py | 12 ++++++------ examples/local_models/Ollama/json_scraper_ollama.py | 12 ++++++------ examples/local_models/Ollama/xml_scraper_ollama.py | 12 ++++++------ examples/local_models/result.json | 1 + examples/openai/json_scraper_openai.py | 12 ++++++------ examples/openai/result.json | 1 + examples/openai/xml_scraper_openai.py | 12 ++++++------ scrapegraphai/graphs/__init__.py | 4 ++-- scrapegraphai/graphs/json_scraper_graph.py | 2 +- scrapegraphai/graphs/xml_scraper_graph.py | 2 +- 13 files changed, 54 insertions(+), 52 deletions(-) create mode 100644 examples/local_models/result.json create mode 100644 examples/openai/result.json diff --git a/examples/gemini/json_scraper_gemini.py b/examples/gemini/json_scraper_gemini.py index 52592dd5..b038657c 100644 --- a/examples/gemini/json_scraper_gemini.py +++ b/examples/gemini/json_scraper_gemini.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using SmartScraper from JSON documents +Basic example of scraping pipeline using JSONScraperGraph from JSON documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.graphs import JSONScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,23 +33,23 @@ } # ************************************************ -# Create the JsonScraperGraph instance and run it +# Create the JSONScraperGraph instance and run it # ************************************************ -smart_scraper_graph = JsonScraperGraph( +json_scraper_graph = JSONScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = json_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = json_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/gemini/xml_scraper_openai.py b/examples/gemini/xml_scraper_openai.py index fde8246e..e82458ed 100644 --- a/examples/gemini/xml_scraper_openai.py +++ b/examples/gemini/xml_scraper_openai.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using XmlScraperGraph from XML documents +Basic example of scraping pipeline using XMLScraperGraph from XML documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.graphs import XMLScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,23 +33,23 @@ } # ************************************************ -# Create the XmlScraperGraph instance and run it +# Create the XMLScraperGraph instance and run it # ************************************************ -smart_scraper_graph = XmlScraperGraph( +xml_scraper_graph = XMLScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = xml_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = xml_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/local_models/Docker/json_scraper_docker.py b/examples/local_models/Docker/json_scraper_docker.py index f94cfa4d..758de09e 100644 --- a/examples/local_models/Docker/json_scraper_docker.py +++ b/examples/local_models/Docker/json_scraper_docker.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using JsonScraperGraph from JSON documents +Basic example of scraping pipeline using JSONScraperGraph from JSON documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.graphs import JSONScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -37,23 +37,23 @@ } # ************************************************ -# Create the JsonScraperGraph instance and run it +# Create the JSONScraperGraph instance and run it # ************************************************ -smart_scraper_graph = JsonScraperGraph( +json_scraper_graph = JSONScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = json_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = json_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/local_models/Docker/xml_scraper_docker.py b/examples/local_models/Docker/xml_scraper_docker.py index c52a7d0d..6a8c86cc 100644 --- a/examples/local_models/Docker/xml_scraper_docker.py +++ b/examples/local_models/Docker/xml_scraper_docker.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using XmlScraperGraph from XML documents +Basic example of scraping pipeline using XMLScraperGraph from XML documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.graphs import XMLScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -37,23 +37,23 @@ } # ************************************************ -# Create the XmlScraperGraph instance and run it +# Create the XMLScraperGraph instance and run it # ************************************************ -smart_scraper_graph = XmlScraperGraph( +xml_scraper_graph = XMLScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = xml_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = xml_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/local_models/Ollama/json_scraper_ollama.py b/examples/local_models/Ollama/json_scraper_ollama.py index 42d38753..90c4a151 100644 --- a/examples/local_models/Ollama/json_scraper_ollama.py +++ b/examples/local_models/Ollama/json_scraper_ollama.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using JsonScraperGraph from JSON documents +Basic example of scraping pipeline using JSONScraperGraph from JSON documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.graphs import JSONScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -39,23 +39,23 @@ } # ************************************************ -# Create the XmlScraperGraph instance and run it +# Create the JSONScraperGraph instance and run it # ************************************************ -smart_scraper_graph = JsonScraperGraph( +json_scraper_graph = JSONScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = json_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = json_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/local_models/Ollama/xml_scraper_ollama.py b/examples/local_models/Ollama/xml_scraper_ollama.py index 46f2564d..4c149a2b 100644 --- a/examples/local_models/Ollama/xml_scraper_ollama.py +++ b/examples/local_models/Ollama/xml_scraper_ollama.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using XmlScraperGraph from XML documents +Basic example of scraping pipeline using XMLScraperGraph from XML documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.graphs import XMLScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -39,23 +39,23 @@ } # ************************************************ -# Create the XmlScraperGraph instance and run it +# Create the XMLScraperGraph instance and run it # ************************************************ -smart_scraper_graph = XmlScraperGraph( +xml_scraper_graph = XMLScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = xml_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = xml_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/local_models/result.json b/examples/local_models/result.json new file mode 100644 index 00000000..8a4e7057 --- /dev/null +++ b/examples/local_models/result.json @@ -0,0 +1 @@ +{"projects": [{"title": "Rotary Pendulum RL", "description": "Open Source project aimed at controlling a real life rotary pendulum using RL algorithms"}, {"title": "DQN Implementation from scratch", "description": "Developed a Deep Q-Network algorithm to train a simple and double pendulum"}, {"title": "Multi Agents HAED", "description": "University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings."}, {"title": "Wireless ESC for Modular Drones", "description": "Modular drone architecture proposal and proof of concept. The project received maximum grade."}]} \ No newline at end of file diff --git a/examples/openai/json_scraper_openai.py b/examples/openai/json_scraper_openai.py index b52f9359..5e271006 100644 --- a/examples/openai/json_scraper_openai.py +++ b/examples/openai/json_scraper_openai.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using JsonScraperGraph from JSON documents +Basic example of scraping pipeline using JSONScraperGraph from JSON documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import JsonScraperGraph +from scrapegraphai.graphs import JSONScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,23 +33,23 @@ } # ************************************************ -# Create the XmlScraperGraph instance and run it +# Create the JSONScraperGraph instance and run it # ************************************************ -smart_scraper_graph = JsonScraperGraph( +json_scraper_graph = JSONScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = json_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = json_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/examples/openai/result.json b/examples/openai/result.json new file mode 100644 index 00000000..8867c8d6 --- /dev/null +++ b/examples/openai/result.json @@ -0,0 +1 @@ +{"top_5_eyeliner_products_for_gift": [{"product_name": "Tarte Double Take Eyeliner", "type": "Liquid, Gel", "price": "$26", "link": "https://www.sephora.com/product/double-take-eyeliner-P421701"}, {"product_name": "AppleDoll Velvet Liner", "type": "Liquid", "price": "$22", "link": "https://www.appledoll.com/products/velvet-liner"}, {"product_name": "Rare Beauty Perfect Strokes Gel Eyeliner", "type": "Gel", "price": "$19", "link": "https://www.sephora.com/product/perfect-strokes-gel-eyeliner-P468000"}, {"product_name": "Laura Mercier Caviar Tightline Eyeliner", "type": "Gel", "price": "$29", "link": "https://www.sephora.com/product/caviar-tightline-eyeliner-P448800"}, {"product_name": "Ilia Clean Line Liquid Eyeliner", "type": "Liquid", "price": "$28", "link": "https://www.amazon.com/ILIA-Clean-Line-Liquid-Eyeliner/dp/B08Z7JZQZP"}, {"brand": "Tom Ford", "product_name": "Eye Defining Pen", "price": "$62", "type": "Liquid", "colors": 1, "retailer": "Nordstrom"}, {"brand": "Fenty Beauty", "product_name": "Flyliner", "price": "$24", "type": "Liquid", "colors": 2, "retailer": "Sephora"}, {"brand": "Lanc\u00f4me", "product_name": "Le Crayon Kh\u00f4l Smoky Eyeliner", "price": "$28", "type": "Kohl", "colors": 2, "retailer": "Macy's"}, {"brand": "Jillian Dempsey", "product_name": "Kh\u00f4l Eyeliner", "price": "$20", "type": "Kohl", "colors": 6, "retailer": "Amazon"}, {"brand": "R\u00f3en", "product_name": "Eyeline Define Eyeliner Pencil", "price": "$26", "type": "Kohl", "colors": 4, "retailer": "Credo Beauty"}]} \ No newline at end of file diff --git a/examples/openai/xml_scraper_openai.py b/examples/openai/xml_scraper_openai.py index 7057de2b..32b79981 100644 --- a/examples/openai/xml_scraper_openai.py +++ b/examples/openai/xml_scraper_openai.py @@ -1,10 +1,10 @@ """ -Basic example of scraping pipeline using XmlScraperGraph from XML documents +Basic example of scraping pipeline using XMLScraperGraph from XML documents """ import os from dotenv import load_dotenv -from scrapegraphai.graphs import XmlScraperGraph +from scrapegraphai.graphs import XMLScraperGraph from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info load_dotenv() @@ -33,23 +33,23 @@ } # ************************************************ -# Create the XmlScraperGraph instance and run it +# Create the XMLScraperGraph instance and run it # ************************************************ -smart_scraper_graph = XmlScraperGraph( +xml_scraper_graph = XMLScraperGraph( prompt="List me all the authors, title and genres of the books", source=text, # Pass the content of the file, not the file object config=graph_config ) -result = smart_scraper_graph.run() +result = xml_scraper_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ -graph_exec_info = smart_scraper_graph.get_execution_info() +graph_exec_info = xml_scraper_graph.get_execution_info() print(prettify_exec_info(graph_exec_info)) # Save to json or csv diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py index b7fbcef7..d943a4dc 100644 --- a/scrapegraphai/graphs/__init__.py +++ b/scrapegraphai/graphs/__init__.py @@ -6,5 +6,5 @@ from .speech_graph import SpeechGraph from .search_graph import SearchGraph from .script_creator_graph import ScriptCreatorGraph -from .xml_scraper_graph import XmlScraperGraph -from .json_scraper_graph import JsonScraperGraph +from .xml_scraper_graph import XMLScraperGraph +from .json_scraper_graph import JSONScraperGraph diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index b6a11179..02092544 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -11,7 +11,7 @@ from .abstract_graph import AbstractGraph -class JsonScraperGraph(AbstractGraph): +class JSONScraperGraph(AbstractGraph): """ SmartScraper is a comprehensive web scraping tool that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts. diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index f8a697bd..0dad83e3 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -11,7 +11,7 @@ from .abstract_graph import AbstractGraph -class XmlScraperGraph(AbstractGraph): +class XMLScraperGraph(AbstractGraph): """ SmartScraper is a comprehensive web scraping tool that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts.