Update scraper.rb

BfB-Schenefeld · Apr 21, 2024 · 8a27caf · 8a27caf
1 parent 6bda819
commit 8a27caf
Showing 1 changed file with 30 additions and 22 deletions.
diff --git a/scraper.rb b/scraper.rb
@@ -27,38 +27,46 @@
 require 'open-uri'
 
 def scrape_details(url)
-  full_url = "https://www.sitzungsdienst-schenefeld.de/bi/#{url}"
-  document = Nokogiri::HTML(open(full_url))
+  begin
+    full_url = "https://www.sitzungsdienst-schenefeld.de/bi/#{url}"
+    document = Nokogiri::HTML(open(full_url))
 
-  document.css('tbody tr').each do |row|
-    top_link = row.css('td.tonr a').first
-    top_id = top_link['href'][/TOLFDNR=(\d+)/, 1]
-    top_description = row.css('td.tobetreff div a').text.strip
+    document.css('tbody tr').each do |row|
+      top_link = row.css('td.tonr a').first
+      top_id = top_link['href'][/TOLFDNR=(\d+)/, 1]
+      top_description = row.css('td.tobetreff div a').text.strip
 
-    top_url = "https://www.sitzungsdienst-schenefeld.de/bi/to020_r.asp?TOLFDNR=#{top_id}"
+      top_url = "https://www.sitzungsdienst-schenefeld.de/bi/to020_r.asp?TOLFDNR=#{top_id}"
 
-    vo_link = row.css('td.tovonr a').first
-    vo_id = vo_link ? vo_link['href'][/VOLFDNR=(\d+)/, 1] : nil
-    vo_url = vo_link ? "https://www.sitzungsdienst-schenefeld.de/bi/vo020_r.asp?VOLFDNR=#{vo_id}" : "-"
+      vo_link = row.css('td.tovonr a').first
+      vo_id = vo_link ? vo_link['href'][/VOLFDNR=(\d+)/, 1] : nil
+      vo_url = vo_link ? "https://www.sitzungsdienst-schenefeld.de/bi/vo020_r.asp?VOLFDNR=#{vo_id}" : "-"
 
-    puts "  Tagesordnungspunkt: #{top_link.text.strip} #{top_description}, URL: #{top_url}, Beschlussvorlage: #{vo_url}"
+      puts "  Tagesordnungspunkt: #{top_link.text.strip} #{top_description}, URL: #{top_url}, Beschlussvorlage: #{vo_url}"
+    end
+  rescue => e
+    puts "Failed to scrape details: #{e.message}"
   end
 end
 
 def scrape_calendar_data(year, month)
-  url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
-  document = Nokogiri::HTML(open(url))
+  begin
+    url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
+    document = Nokogiri::HTML(open(url))
 
-  # Datum direkt aus dem Link extrahieren
-  document.css('a[href*="si010_r.asp?DD="]').each do |link|
-    day = link['href'][/DD=(\d+)/, 1]
-    month = link['href'][/MM=(\d+)/, 1]
-    year = link['href'][/YY=(\d+)/, 1]
-    formatted_date = "#{day}.#{month}.#{year}"
-    puts "Datum: #{formatted_date}, URL: #{link['href']}"
-    scrape_details(link['href'])
+    document.css('a[href*="si010_r.asp?DD="]').each do |link|
+      day = link['href'][/DD=(\d+)/, 1]
+      month = link['href'][/MM=(\d+)/, 1]
+      year = link['href'][/YY=(\d+)/, 1]
+      formatted_date = "#{day}.#{month}.#{year}"
+      puts "Datum: #{formatted_date}, URL: #{link['href']}"
+      scrape_details(link['href'])
+    end
+  rescue => e
+    puts "Failed to scrape calendar data: #{e.message}"
   end
 end
 
-# Beispiel: Daten für März 2024 scrapen
+# Example: Data scraping for March 2024
 scrape_calendar_data(2024, 3)
+