Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed Apr 21, 2024
1 parent 7f31814 commit 519d4d8
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,27 @@

def scrape_calendar_data(year, month)
url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
document = Nokogiri::HTML(open(url)) # Use 'open' directly
document = Nokogiri::HTML(open(url))

document.css('tr').each do |row|
date = row.at_css('td:nth-child(1)').text.strip rescue nil
date_raw = row.at_css('td:nth-child(1)').text.strip rescue nil
time = row.at_css('td:nth-child(2)').text.strip rescue nil
meeting = row.at_css('td:nth-child(3)').text.strip rescue nil
location = row.at_css('td:nth-child(4)').text.strip rescue nil
link = row.at_css('td:nth-child(3) a')['href'] rescue nil
link_full_url = link ? "https://www.sitzungsdienst-schenefeld.de/bi/#{link}" : nil

if date && time && meeting && location
puts "Date: #{date}, Time: #{time}, Meeting: #{meeting}, Location: #{location}"
if date_raw && time && meeting && location
# Adjust date to include leading zero
date_parts = date_raw.scan(/\D*(\d+)$/).flatten.first
date = date_parts ? date_raw.sub(/\d+$/, date_parts.rjust(2, '0')) : date_raw

puts "Date: #{date}, Time: #{time}, Meeting: #{meeting}, Location: #{location}, URL: #{link_full_url}"
end
end
end

# Example: Scrape data for April 2024
scrape_calendar_data(2024, 4)


0 comments on commit 519d4d8

Please sign in to comment.