Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed Apr 21, 2024
1 parent 6bda819 commit 8a27caf
Showing 1 changed file with 30 additions and 22 deletions.
52 changes: 30 additions & 22 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,38 +27,46 @@
require 'open-uri'

def scrape_details(url)
full_url = "https://www.sitzungsdienst-schenefeld.de/bi/#{url}"
document = Nokogiri::HTML(open(full_url))
begin
full_url = "https://www.sitzungsdienst-schenefeld.de/bi/#{url}"
document = Nokogiri::HTML(open(full_url))

document.css('tbody tr').each do |row|
top_link = row.css('td.tonr a').first
top_id = top_link['href'][/TOLFDNR=(\d+)/, 1]
top_description = row.css('td.tobetreff div a').text.strip
document.css('tbody tr').each do |row|
top_link = row.css('td.tonr a').first
top_id = top_link['href'][/TOLFDNR=(\d+)/, 1]
top_description = row.css('td.tobetreff div a').text.strip

top_url = "https://www.sitzungsdienst-schenefeld.de/bi/to020_r.asp?TOLFDNR=#{top_id}"
top_url = "https://www.sitzungsdienst-schenefeld.de/bi/to020_r.asp?TOLFDNR=#{top_id}"

vo_link = row.css('td.tovonr a').first
vo_id = vo_link ? vo_link['href'][/VOLFDNR=(\d+)/, 1] : nil
vo_url = vo_link ? "https://www.sitzungsdienst-schenefeld.de/bi/vo020_r.asp?VOLFDNR=#{vo_id}" : "-"
vo_link = row.css('td.tovonr a').first
vo_id = vo_link ? vo_link['href'][/VOLFDNR=(\d+)/, 1] : nil
vo_url = vo_link ? "https://www.sitzungsdienst-schenefeld.de/bi/vo020_r.asp?VOLFDNR=#{vo_id}" : "-"

puts " Tagesordnungspunkt: #{top_link.text.strip} #{top_description}, URL: #{top_url}, Beschlussvorlage: #{vo_url}"
puts " Tagesordnungspunkt: #{top_link.text.strip} #{top_description}, URL: #{top_url}, Beschlussvorlage: #{vo_url}"
end
rescue => e
puts "Failed to scrape details: #{e.message}"
end
end

def scrape_calendar_data(year, month)
url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
document = Nokogiri::HTML(open(url))
begin
url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
document = Nokogiri::HTML(open(url))

# Datum direkt aus dem Link extrahieren
document.css('a[href*="si010_r.asp?DD="]').each do |link|
day = link['href'][/DD=(\d+)/, 1]
month = link['href'][/MM=(\d+)/, 1]
year = link['href'][/YY=(\d+)/, 1]
formatted_date = "#{day}.#{month}.#{year}"
puts "Datum: #{formatted_date}, URL: #{link['href']}"
scrape_details(link['href'])
document.css('a[href*="si010_r.asp?DD="]').each do |link|
day = link['href'][/DD=(\d+)/, 1]
month = link['href'][/MM=(\d+)/, 1]
year = link['href'][/YY=(\d+)/, 1]
formatted_date = "#{day}.#{month}.#{year}"
puts "Datum: #{formatted_date}, URL: #{link['href']}"
scrape_details(link['href'])
end
rescue => e
puts "Failed to scrape calendar data: #{e.message}"
end
end

# Beispiel: Daten für März 2024 scrapen
# Example: Data scraping for March 2024
scrape_calendar_data(2024, 3)

0 comments on commit 8a27caf

Please sign in to comment.