Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed Apr 22, 2024
1 parent 59fea50 commit 1a8a2b6
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,23 @@

# Methode zur Extraktion und Formatierung des Datums
def extract_and_format_date(dow, dom, month, year)
date = Date.parse("#{dow}, #{dom} #{Date::MONTHNAMES[month.to_i]} #{year}")
formatted_date = date.strftime("%a., %d.%m.%Y")
rescue ArgumentError
'Invalid date'
# Erstellen eines Datumsstrings im deutschen Format "Tag, DD.MM.YYYY"
formatted_date = "#{dow}, #{dom.rjust(2, '0')}.#{month.rjust(2, '0')}.#{year}"
begin
# Parsen des Datums im deutschen Format und Konvertieren in ein Datum-Objekt
date = Date.strptime(formatted_date, "%a, %d.%m.%Y")
# Rückgabe des formatierten Datums
date.strftime("%a., %d.%m.%Y") # Z.B. "Di., 05.03.2024"
rescue ArgumentError
'Invalid date'
end
end

# Methode zum Scrapen der Kalenderdaten (Ebene 1)
def scrape_calendar_data(year, month)
url = "https://www.sitzungsdienst-schenefeld.de/bi/si010_r.asp?MM=#{month}&YY=#{year}"
puts "Zugriff auf Kalenderseite: #{url}"
document = Nokogiri::HTML(URI.open(url))
document = Nokogiri::HTML(open(url))

# Extraktion der Sitzungsdaten aus der Kalendertabelle
document.css('tr:not(.emptyRow)').each do |row|
Expand All @@ -59,3 +65,4 @@ def scrape_calendar_data(year, month)
scrape_calendar_data('2024', '3')



0 comments on commit 1a8a2b6

Please sign in to comment.