Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed Apr 21, 2024
1 parent e1aee8f commit add643e
Showing 1 changed file with 18 additions and 15 deletions.
33 changes: 18 additions & 15 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,28 @@
require 'open-uri'
require 'nokogiri'

def scrape_event_details(event_url)
puts "Accessing event page: #{event_url}"
document = Nokogiri::HTML(open(event_url))

# Assuming the date is in a specific format or contained in a specific element
# This part needs to be adjusted based on actual HTML structure of the event page
date_text = document.at_css('specific_selector_for_date').text.strip
puts "Date found on event page: #{date_text}"
end

def scrape_calendar_data(year, month)
url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
puts "Attempting to access URL: #{url}"
begin
document = Nokogiri::HTML(open(url)) # Use open directly as per Ruby 2.0.0
puts "Page loaded successfully."
links_found = document.css('a[href*="si010_r.asp?DD="]')
puts "Number of matching links found: #{links_found.count}"

if links_found.empty?
puts "No links matching the criteria were found."
else
links_found.each do |link|
day = link['href'][/DD=(\d+)/, 1]
month = link['href'][/MM=(\d+)/, 1]
year = link['href'][/YY=(\d+)/, 1]
formatted_date = "#{day}.#{month}.#{year}"
puts "Datum: #{formatted_date}, URL: #{link['href']}"
end
document = Nokogiri::HTML(open(url))
puts "Calendar page loaded successfully."

event_links = document.css('a[href*="to010_r.asp?SILFDNR="]').map { |link| "https://www.sitzungsdienst-schenefeld.de/bi/#{link['href']}" }
puts "Number of event links found: #{event_links.count}"

event_links.each do |link|
scrape_event_details(link)
end
rescue StandardError => e
puts "Error during calendar data scrape: #{e.message}"
Expand Down

0 comments on commit add643e

Please sign in to comment.