Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed Apr 21, 2024
1 parent 3c3fda8 commit cd9dd73
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@
# called "data".
require 'nokogiri'
require 'open-uri'
require 'date'

def scrape_details(url)
document = Nokogiri::HTML(open(url))

document.css('tbody tr').each do |row|
top_link = row.css('td.tonr a').first
top_id = top_link['href'][/TOLFDNR=(\d+)/, 1]
top_description = row.css('td.tobetreff a').text.strip # Aktualisierter Selektor für den Betreff
top_description = row.css('td.tobetreff div a').text.strip

top_url = "https://www.sitzungsdienst-schenefeld.de/bi/to020_r.asp?TOLFDNR=#{top_id}"

Expand All @@ -54,9 +55,10 @@ def scrape_calendar_data(year, month)
full_url = link ? "https://www.sitzungsdienst-schenefeld.de/bi/#{link}" : nil

if date_raw && full_url
date_parts = date_raw.match(/([A-Za-z]+)\s+(\d+)/)
if date_parts
formatted_date = "#{date_parts[1]}, #{date_parts[2].rjust(2, '0')}.#{month.to_s.rjust(2, '0')}.#{year}"
date_match = date_raw.match(/([A-Za-z]+)(\d+)/)
if date_match
date_object = Date.new(year, month, date_match[2].to_i)
formatted_date = date_object.strftime("%a, %d.%m.%Y")
puts "Datum: #{formatted_date}, URL: #{full_url}"
scrape_details(full_url)
else
Expand All @@ -69,3 +71,4 @@ def scrape_calendar_data(year, month)
# Beispiel: Daten für April 2024 scrapen
scrape_calendar_data(2024, 4)


0 comments on commit cd9dd73

Please sign in to comment.