-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…
- Loading branch information
0 parents
commit 8db031b
Showing
3 changed files
with
49 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Ignore output of scraper | ||
data.sqlite |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This scraper collects the daily song playlist of the German world music radio station Funkhaus Europa. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scraperwiki | ||
|
||
from bs4 import BeautifulSoup | ||
|
||
import urllib | ||
|
||
url = "http://www.funkhauseuropa.de/world_wide_music/playlists/index.phtml" | ||
|
||
def get_playlist(): | ||
|
||
# open url | ||
fh = urllib.urlopen(url) | ||
# read website | ||
html = fh.read() | ||
soup = BeautifulSoup(html) | ||
|
||
filter = "Die in der Sendung gespielten Titel".split() | ||
date = soup.select("#wsFhePlaylists")[0].find('input')['value'] | ||
|
||
shows_table = soup.select(".wsSendeplanFhe") | ||
summary = [] | ||
for table in shows_table: | ||
# extract name of the show from summary entry of table | ||
show_name = ' '.join([k for k in table['summary'].split() if k not in filter]) | ||
shows_tr = table.select(".wsOdd") + table.select(".wsEven") | ||
|
||
# create dictionary entries containing songtitle, interpret, length.. | ||
for tr in shows_tr: | ||
song = {} | ||
tds = tr.findAll(name='td') | ||
song['date'] = date | ||
song['time'] = tds[0].text | ||
song['id'] = date + '_' + song['time'] | ||
song['interpret'] = tds[1].text | ||
song['title'] = tds[2].text | ||
if len(tds[3].text.split()) < 2: | ||
song['length'] = "Unknown" | ||
else: | ||
song['length'] = tds[3].text | ||
song['show'] = show_name | ||
summary.append(song) | ||
|
||
scraperwiki.sqlite.save(unique_keys=['id'], data=summary, table_name="playlist") | ||
|
||
|
||
get_playlist() |