Skip to content
Permalink
Browse files

Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…

  • Loading branch information...
SuzanaK committed May 11, 2014
0 parents commit 7fea3c4956670671263e4a8f01b37093e5a0a10b
Showing with 61 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +1 −0 README.textile
  3. +58 −0 scraper.py
@@ -0,0 +1,2 @@
# Ignore output of scraper
data.sqlite
@@ -0,0 +1 @@
This scraper collects the weekly playlist of the radio show "Radio Renner" of the radio station Bremen4.
@@ -0,0 +1,58 @@
import scraperwiki
import urllib
from bs4 import BeautifulSoup


url = 'http://www.radiobremen.de/bremenvier/musik/playlists/radiorenner104.html'
# webseite oeffnen
fh = urllib.urlopen(url)
# webseite einlesen
html = fh.read()
soup = BeautifulSoup(html)

def scrape_playlist():

tab = soup.select('.top44_table')
if tab:
tr = tab[0].find_all('tr')
else:
print "Tabelle nicht gefunden"
return

if not tr:
print "Tabellenzeilen nicht gefunden"
return

headlines = soup.find_all('h2')
if headlines:
datum = headlines[0].text
datum = datum.replace('Radio Renner mit Tim Renner vom ', '')

else:
print "Datum nicht gefunden"
return

for row in tr[1:]:

entry = {}
entry['Datum'] = datum
feld1 = row.find_next('td', 'top44_table_zelle')
feld2 = feld1.find_next_sibling()
if feld2:
interpret = feld2.text
entry['Interpret'] = interpret
feld3 = feld2.find_next_sibling()
if feld3:
titel = feld3.text
entry['Titel'] = titel
entry['ID'] = interpret + " - " + titel

scraperwiki.sqlite.save(unique_keys=['ID'], data=entry, table_name="Playlist Radio Renner Bremen4")

else:
print "Titel nicht gefunden"

## MAIN ##
scrape_playlist()


0 comments on commit 7fea3c4

Please sign in to comment.
You can’t perform that action at this time.