Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed Apr 22, 2024
1 parent 1a8a2b6 commit f226e57
Showing 1 changed file with 36 additions and 13 deletions.
49 changes: 36 additions & 13 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,26 @@

# Methode zur Extraktion und Formatierung des Datums
def extract_and_format_date(dow, dom, month, year)
# Erstellen eines Datumsstrings im deutschen Format "Tag, DD.MM.YYYY"
formatted_date = "#{dow}, #{dom.rjust(2, '0')}.#{month.rjust(2, '0')}.#{year}"
# Führende Nullen sicherstellen
dom = dom.to_s.rjust(2, '0')
month = month.to_s.rjust(2, '0')

# Wochentag-Kürzel basierend auf dem Wochentag-String umwandeln
dow_translation = {
'Mo' => 'Mon',
'Di' => 'Tue',
'Mi' => 'Wed',
'Do' => 'Thu',
'Fr' => 'Fri',
'Sa' => 'Sat',
'So' => 'Sun'
}
dow_en = dow_translation[dow]

# Datum objekt erstellen und formatieren
date_str = "#{dow_en}, #{dom} #{Date::MONTHNAMES[month.to_i]} #{year}"
begin
# Parsen des Datums im deutschen Format und Konvertieren in ein Datum-Objekt
date = Date.strptime(formatted_date, "%a, %d.%m.%Y")
# Rückgabe des formatierten Datums
date = Date.parse(date_str)
date.strftime("%a., %d.%m.%Y") # Z.B. "Di., 05.03.2024"
rescue ArgumentError
'Invalid date'
Expand All @@ -49,15 +63,23 @@ def scrape_calendar_data(year, month)

# Extraktion der Sitzungsdaten aus der Kalendertabelle
document.css('tr:not(.emptyRow)').each do |row|
dow = row.at_css('.dow').text
dom = row.at_css('.dom').text.rjust(2, '0')
time = row.at_css('.time div').text
title = row.at_css('.textCol a').text
url = "https://www.sitzungsdienst-schenefeld.de/bi/#{row.at_css('.textCol a')['href']}"
room = row.at_css('.raum div').text
formatted_date = extract_and_format_date(dow, dom, month, year)
dow_element = row.at_css('.dow')
dom_element = row.at_css('.dom')
time_element = row.at_css('.time div')
title_element = row.at_css('.textCol a')
room_element = row.at_css('.raum div')

puts "Datum: #{formatted_date}, Zeit: #{time}, Titel: #{title}, URL: #{url}, Raum: #{room}"
if dow_element && dom_element && time_element && title_element && room_element
dow = dow_element.text
dom = dom_element.text
time = time_element.text
title = title_element.text
url = "https://www.sitzungsdienst-schenefeld.de/bi/#{title_element['href']}"
room = room_element.text
formatted_date = extract_and_format_date(dow, dom, month, year)

puts "Datum: #{formatted_date}, Zeit: #{time}, Titel: #{title}, URL: #{url}, Raum: #{room}"
end
end
end

Expand All @@ -66,3 +88,4 @@ def scrape_calendar_data(year, month)




0 comments on commit f226e57

Please sign in to comment.