From 7fea3c4956670671263e4a8f01b37093e5a0a10b Mon Sep 17 00:00:00 2001 From: Suzana Date: Sun, 11 May 2014 16:03:27 -0700 Subject: [PATCH] Fork of code from ScraperWiki at https://classic.scraperwiki.com/scrapers/radio_renner_playlist_bremen4/ --- .gitignore | 2 ++ README.textile | 1 + scraper.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 .gitignore create mode 100644 README.textile create mode 100644 scraper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66d464d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Ignore output of scraper +data.sqlite diff --git a/README.textile b/README.textile new file mode 100644 index 0000000..f54b5b1 --- /dev/null +++ b/README.textile @@ -0,0 +1 @@ +This scraper collects the weekly playlist of the radio show "Radio Renner" of the radio station Bremen4. \ No newline at end of file diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..31ab5a7 --- /dev/null +++ b/scraper.py @@ -0,0 +1,58 @@ +import scraperwiki +import urllib +from bs4 import BeautifulSoup + + +url = 'http://www.radiobremen.de/bremenvier/musik/playlists/radiorenner104.html' +# webseite oeffnen +fh = urllib.urlopen(url) +# webseite einlesen +html = fh.read() +soup = BeautifulSoup(html) + +def scrape_playlist(): + + tab = soup.select('.top44_table') + if tab: + tr = tab[0].find_all('tr') + else: + print "Tabelle nicht gefunden" + return + + if not tr: + print "Tabellenzeilen nicht gefunden" + return + + headlines = soup.find_all('h2') + if headlines: + datum = headlines[0].text + datum = datum.replace('Radio Renner mit Tim Renner vom ', '') + + else: + print "Datum nicht gefunden" + return + + for row in tr[1:]: + + entry = {} + entry['Datum'] = datum + feld1 = row.find_next('td', 'top44_table_zelle') + feld2 = feld1.find_next_sibling() + if feld2: + interpret = feld2.text + entry['Interpret'] = interpret + feld3 = feld2.find_next_sibling() + if feld3: + titel = feld3.text + entry['Titel'] = titel + entry['ID'] = interpret + " - " + titel + + scraperwiki.sqlite.save(unique_keys=['ID'], data=entry, table_name="Playlist Radio Renner Bremen4") + + else: + print "Titel nicht gefunden" + +## MAIN ## +scrape_playlist() + +