ScrapeGraphAI · PeriniM · Apr 30, 2024 · Apr 29, 2024 · Apr 29, 2024 · Apr 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -29,7 +29,6 @@ venv/
 *.google-cookie
 examples/graph_examples/ScrapeGraphAI_generated_graph
 examples/**/*.csv
-examples/**/*.json
 main.py
 poetry.lock
 

diff --git a/examples/gemini/inputs/example.json b/examples/gemini/inputs/example.json
@@ -0,0 +1,182 @@
+{
+   "kind":"youtube#searchListResponse",
+   "etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
+   "nextPageToken":"CAUQAA",
+   "regionCode":"NL",
+   "pageInfo":{
+      "totalResults":1000000,
+      "resultsPerPage":5
+   },
+   "items":[
+      {
+         "kind":"youtube#searchResult",
+         "etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
+         "id":{
+            "kind":"youtube#video",
+            "videoId":"TvWDY4Mm5GM"
+         },
+         "snippet":{
+            "publishedAt":"2023-07-24T14:15:01Z",
+            "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
+            "title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
+            "description":"",
+            "thumbnails":{
+               "default":{
+                  "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
+                  "width":120,
+                  "height":90
+               },
+               "medium":{
+                  "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
+                  "width":320,
+                  "height":180
+               },
+               "high":{
+                  "url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
+                  "width":480,
+                  "height":360
+               }
+            },
+            "channelTitle":"FC Motivate",
+            "liveBroadcastContent":"none",
+            "publishTime":"2023-07-24T14:15:01Z"
+         }
+      },
+      {
+         "kind":"youtube#searchResult",
+         "etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
+         "id":{
+            "kind":"youtube#video",
+            "videoId":"aZM_42CcNZ4"
+         },
+         "snippet":{
+            "publishedAt":"2023-07-24T16:09:27Z",
+            "channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
+            "title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
+            "description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
+            "thumbnails":{
+               "default":{
+                  "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
+                  "width":120,
+                  "height":90
+               },
+               "medium":{
+                  "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
+                  "width":320,
+                  "height":180
+               },
+               "high":{
+                  "url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
+                  "width":480,
+                  "height":360
+               }
+            },
+            "channelTitle":"John Nellis",
+            "liveBroadcastContent":"none",
+            "publishTime":"2023-07-24T16:09:27Z"
+         }
+      },
+      {
+         "kind":"youtube#searchResult",
+         "etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
+         "id":{
+            "kind":"youtube#video",
+            "videoId":"wkP3XS3aNAY"
+         },
+         "snippet":{
+            "publishedAt":"2023-07-24T16:00:50Z",
+            "channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
+            "title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
+            "description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
+            "thumbnails":{
+               "default":{
+                  "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
+                  "width":120,
+                  "height":90
+               },
+               "medium":{
+                  "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
+                  "width":320,
+                  "height":180
+               },
+               "high":{
+                  "url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
+                  "width":480,
+                  "height":360
+               }
+            },
+            "channelTitle":"Shoot for Love",
+            "liveBroadcastContent":"none",
+            "publishTime":"2023-07-24T16:00:50Z"
+         }
+      },
+      {
+         "kind":"youtube#searchResult",
+         "etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
+         "id":{
+            "kind":"youtube#video",
+            "videoId":"rJkDZ0WvfT8"
+         },
+         "snippet":{
+            "publishedAt":"2023-07-24T10:00:39Z",
+            "channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
+            "title":"TOP 10 DEFENDERS 2023",
+            "description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
+            "thumbnails":{
+               "default":{
+                  "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
+                  "width":120,
+                  "height":90
+               },
+               "medium":{
+                  "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
+                  "width":320,
+                  "height":180
+               },
+               "high":{
+                  "url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
+                  "width":480,
+                  "height":360
+               }
+            },
+            "channelTitle":"Home of Football",
+            "liveBroadcastContent":"none",
+            "publishTime":"2023-07-24T10:00:39Z"
+         }
+      },
+      {
+         "kind":"youtube#searchResult",
+         "etag":"wtuknXTmI1txoULeH3aWaOuXOow",
+         "id":{
+            "kind":"youtube#video",
+            "videoId":"XH0rtu4U6SE"
+         },
+         "snippet":{
+            "publishedAt":"2023-07-21T16:30:05Z",
+            "channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
+            "title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
+            "description":"",
+            "thumbnails":{
+               "default":{
+                  "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
+                  "width":120,
+                  "height":90
+               },
+               "medium":{
+                  "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
+                  "width":320,
+                  "height":180
+               },
+               "high":{
+                  "url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
+                  "width":480,
+                  "height":360
+               }
+            },
+            "channelTitle":"FC Motivate",
+            "liveBroadcastContent":"none",
+            "publishTime":"2023-07-21T16:30:05Z"
+         }
+      }
+   ]
+}
diff --git a/examples/gemini/json_scraper_gemini.py b/examples/gemini/json_scraper_gemini.py
@@ -0,0 +1,57 @@
+"""
+Basic example of scraping pipeline using JSONScraperGraph from JSON documents
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import JSONScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+load_dotenv()
+
+# ************************************************
+# Read the JSON file
+# ************************************************
+
+FILE_NAME = "inputs/example.json"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+with open(file_path, 'r', encoding="utf-8") as file:
+    text = file.read()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+gemini_key = os.getenv("GOOGLE_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": gemini_key,
+        "model": "gemini-pro",
+    },
+}
+
+# ************************************************
+# Create the JSONScraperGraph instance and run it
+# ************************************************
+
+json_scraper_graph = JSONScraperGraph(
+    prompt="List me all the authors, title and genres of the books",
+    source=text,  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = json_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = json_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/gemini/script_generator_gemini.py b/examples/gemini/script_generator_gemini.py
@@ -19,7 +19,7 @@
 graph_config = {
     "llm": {
         "api_key": gemini_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gemini-pro",
     },
     "library": "beautifoulsoup"
 }

diff --git a/examples/openai/scrape_xml_openai.py → examples/gemini/xml_scraper_openai.py b/examples/openai/scrape_xml_openai.py → examples/gemini/xml_scraper_openai.py
@@ -1,10 +1,10 @@
 """
-Basic example of scraping pipeline using SmartScraper from XML documents
+Basic example of scraping pipeline using XMLScraperGraph from XML documents
 """
 
 import os
 from dotenv import load_dotenv
-from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.graphs import XMLScraperGraph
 from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
 load_dotenv()
 
@@ -28,28 +28,28 @@
 graph_config = {
     "llm": {
         "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
+        "model": "gemini-pro",
     },
 }
 
 # ************************************************
-# Create the SmartScraperGraph instance and run it
+# Create the XMLScraperGraph instance and run it
 # ************************************************
 
-smart_scraper_graph = SmartScraperGraph(
+xml_scraper_graph = XMLScraperGraph(
     prompt="List me all the authors, title and genres of the books",
     source=text,  # Pass the content of the file, not the file object
     config=graph_config
 )
 
-result = smart_scraper_graph.run()
+result = xml_scraper_graph.run()
 print(result)
 
 # ************************************************
 # Get graph execution info
 # ************************************************
 
-graph_exec_info = smart_scraper_graph.get_execution_info()
+graph_exec_info = xml_scraper_graph.get_execution_info()
 print(prettify_exec_info(graph_exec_info))
 
 # Save to json or csv