Skip to content

Commit

Permalink
Update scraper.rb
Browse files Browse the repository at this point in the history
  • Loading branch information
BfB-Schenefeld committed May 13, 2024
1 parent 7d0e358 commit 1460644
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@ def extract_and_format_date(dow, dom, month, year)
end
end

def scrape_vorlagen_details(vorlagen_url)
def generate_pdf_name(pdf_url, event_date, event_type_abbr, top_number, file_index, pdf_type)
suffix = pdf_type == 'Vorlage' ? '.V' : '.S'
file_name = "#{event_date}.#{event_type_abbr}.TOP#{top_number}.#{file_index}#{suffix}.pdf"
file_name
end

def scrape_vorlagen_details(vorlagen_url, event_date, event_type_abbr, top_number)
puts "Zugriff auf Vorlagenseite: #{vorlagen_url}"
begin
if valid_url?(vorlagen_url)
Expand All @@ -53,11 +59,18 @@ def scrape_vorlagen_details(vorlagen_url)
sammel_pdf_url = document.xpath("//a[contains(@data-simpletooltip-text, 'Vorlage-Sammeldokument')]").first ? "https://www.sitzungsdienst-schenefeld.de/bi/#{document.xpath("//a[contains(@data-simpletooltip-text, 'Vorlage-Sammeldokument')]").first['href']}" : ''
puts "Vorlagen-Sammel-PDF-URL: #{sammel_pdf_url}"

file_index = 1
vorlagen_pdf_name = generate_pdf_name(vorlagen_pdf_url, event_date, event_type_abbr, top_number, file_index, 'Vorlage')
file_index += 1
sammel_pdf_name = generate_pdf_name(sammel_pdf_url, event_date, event_type_abbr, top_number, file_index, 'Sammel')

{
'vorlagenbezeichnung' => vorlagenbezeichnung,
'vorlagenprotokolltext' => vorlagenprotokolltext,
'vorlagen_pdf_url' => vorlagen_pdf_url,
'sammel_pdf_url' => sammel_pdf_url
'vorlagen_pdf_name' => vorlagen_pdf_name,
'sammel_pdf_url' => sammel_pdf_url,
'sammel_pdf_name' => sammel_pdf_name
}
else
puts "Ungültige Vorlagen-URL: #{vorlagen_url}"
Expand Down

0 comments on commit 1460644

Please sign in to comment.