From 7fea3c4956670671263e4a8f01b37093e5a0a10b Mon Sep 17 00:00:00 2001
From: Suzana <SuzanaK@users.noreply.github.com>
Date: Sun, 11 May 2014 16:03:27 -0700
Subject: [PATCH] Fork of code from ScraperWiki at
 https://classic.scraperwiki.com/scrapers/radio_renner_playlist_bremen4/

---
 .gitignore     |  2 ++
 README.textile |  1 +
 scraper.py     | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.textile
 create mode 100644 scraper.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..66d464d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+# Ignore output of scraper
+data.sqlite
diff --git a/README.textile b/README.textile
new file mode 100644
index 0000000..f54b5b1
--- /dev/null
+++ b/README.textile
@@ -0,0 +1 @@
+This scraper collects the weekly playlist of the radio show "Radio Renner" of the radio station Bremen4. 
\ No newline at end of file
diff --git a/scraper.py b/scraper.py
new file mode 100644
index 0000000..31ab5a7
--- /dev/null
+++ b/scraper.py
@@ -0,0 +1,58 @@
+import scraperwiki
+import urllib 
+from bs4 import BeautifulSoup
+
+
+url = 'http://www.radiobremen.de/bremenvier/musik/playlists/radiorenner104.html'
+# webseite oeffnen
+fh = urllib.urlopen(url)
+# webseite einlesen
+html = fh.read()
+soup = BeautifulSoup(html)
+
+def scrape_playlist():
+
+  tab = soup.select('.top44_table')
+  if tab:
+    tr = tab[0].find_all('tr')
+  else:
+    print "Tabelle nicht gefunden"
+    return 
+
+  if not tr:
+    print "Tabellenzeilen nicht gefunden"
+    return
+
+  headlines =  soup.find_all('h2')
+  if headlines:
+    datum = headlines[0].text
+    datum = datum.replace('Radio Renner mit Tim Renner vom ', '')
+
+  else: 
+    print "Datum nicht gefunden"
+    return 
+
+  for row in tr[1:]:
+
+    entry = {}
+    entry['Datum'] = datum 
+    feld1 = row.find_next('td', 'top44_table_zelle')
+    feld2 = feld1.find_next_sibling()
+    if feld2:
+        interpret = feld2.text
+        entry['Interpret'] = interpret 
+    feld3 = feld2.find_next_sibling()
+    if feld3:
+        titel = feld3.text 
+        entry['Titel'] = titel 
+        entry['ID'] = interpret + " - " + titel
+        
+        scraperwiki.sqlite.save(unique_keys=['ID'], data=entry, table_name="Playlist Radio Renner Bremen4")
+
+    else:
+        print "Titel nicht gefunden"
+
+## MAIN ## 
+scrape_playlist()
+
+