Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit 558b25cc0d6de6a69d900a0265386904be08158e @JoeGermuska committed Nov 28, 2012
Showing with 247,295 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. +30 −0 20120306.txt
  3. 0 birdconf/__init__.py
  4. 0 birdconf/apps/__init__.py
  5. 0 birdconf/apps/playlists/__init__.py
  6. +11 −0 birdconf/apps/playlists/admin.py
  7. +1 −0 birdconf/apps/playlists/fixtures/initial_data.json
  8. 0 birdconf/apps/playlists/management/__init__.py
  9. 0 birdconf/apps/playlists/management/commands/__init__.py
  10. +24 −0 birdconf/apps/playlists/management/commands/update_from_forms.py
  11. +124 −0 birdconf/apps/playlists/models.py
  12. +145 −0 birdconf/apps/playlists/scraper.py
  13. +16 −0 birdconf/apps/playlists/tests.py
  14. +67 −0 birdconf/apps/playlists/views.py
  15. +31 −0 birdconf/assets/birdconf.css
  16. +1,088 −0 birdconf/assets/bootstrap/css/bootstrap-responsive.css
  17. +9 −0 birdconf/assets/bootstrap/css/bootstrap-responsive.min.css
  18. +5,893 −0 birdconf/assets/bootstrap/css/bootstrap.css
  19. +9 −0 birdconf/assets/bootstrap/css/bootstrap.min.css
  20. BIN birdconf/assets/bootstrap/img/glyphicons-halflings-white.png
  21. BIN birdconf/assets/bootstrap/img/glyphicons-halflings.png
  22. +2,025 −0 birdconf/assets/bootstrap/js/bootstrap.js
  23. +6 −0 birdconf/assets/bootstrap/js/bootstrap.min.js
  24. +1 −0 birdconf/assets/maintenance.html
  25. +69 −0 birdconf/assets/reset.css
  26. 0 birdconf/configs/__init__.py
  27. 0 birdconf/configs/common/__init__.py
  28. +13 −0 birdconf/configs/common/common.wsgi
  29. +31 −0 birdconf/configs/common/manage.py
  30. +135 −0 birdconf/configs/common/settings.py
  31. +17 −0 birdconf/configs/common/urls.py
  32. 0 birdconf/configs/production/__init__.py
  33. +42 −0 birdconf/configs/production/apache
  34. +38 −0 birdconf/configs/production/apache_maintenance
  35. +34 −0 birdconf/configs/production/logging.conf
  36. +31 −0 birdconf/configs/production/manage.py
  37. +36 −0 birdconf/configs/production/production.wsgi
  38. +37 −0 birdconf/configs/production/settings.py
  39. 0 birdconf/configs/staging/__init__.py
  40. +26 −0 birdconf/configs/staging/apache
  41. +27 −0 birdconf/configs/staging/apache_maintenance
  42. +34 −0 birdconf/configs/staging/logging.conf
  43. +31 −0 birdconf/configs/staging/manage.py
  44. +39 −0 birdconf/configs/staging/settings.py
  45. +36 −0 birdconf/configs/staging/staging.wsgi
  46. +1 −0 birdconf/gzip/assets/temp
  47. 0 birdconf/lib/__init__.py
  48. +5 −0 birdconf/templates/404.html
  49. +5 −0 birdconf/templates/500.html
  50. +14 −0 birdconf/templates/base.html
  51. +14 −0 birdconf/templates/show.html
  52. +41 −0 birdconf/templates/shows.html
  53. +39 −0 birdconf/templates/track.html
  54. +1,644 −0 data/forms2scrape/20110906.html
  55. +1,959 −0 data/forms2scrape/20110913.html
  56. +1,824 −0 data/forms2scrape/20110920.html
  57. +1,779 −0 data/forms2scrape/20110927.html
  58. +1,869 −0 data/forms2scrape/20111004.html
  59. +1,869 −0 data/forms2scrape/20111011.html
  60. +1,959 −0 data/forms2scrape/20111025.html
  61. +1,914 −0 data/forms2scrape/20111101.html
  62. +1,869 −0 data/forms2scrape/20111108.html
  63. +1,779 −0 data/forms2scrape/20111115.html
  64. +1,466 −0 data/forms2scrape/20111122.html
  65. +1,243 −0 data/forms2scrape/20111129.html
  66. +1,783 −0 data/forms2scrape/20111206.html
  67. +1,918 −0 data/forms2scrape/20111213.html
  68. +1,738 −0 data/forms2scrape/20111220.html
  69. +1,335 −0 data/forms2scrape/20120103.html
  70. +1,966 −0 data/forms2scrape/20120110.html
  71. +2,013 −0 data/forms2scrape/20120117.html
  72. +1,383 −0 data/forms2scrape/20120124.html
  73. +2,060 −0 data/forms2scrape/20120131.html
  74. +2,017 −0 data/forms2scrape/20120207.html
  75. +2,017 −0 data/forms2scrape/20120214.html
  76. +2,152 −0 data/forms2scrape/20120221.html
  77. +1,837 −0 data/forms2scrape/20120228.html
  78. +1,792 −0 data/forms2scrape/20120306.html
  79. +1,522 −0 data/forms2scrape/20120313.html
  80. +1,748 −0 data/forms2scrape/20120320.html
  81. +1,748 −0 data/forms2scrape/20120328.html
  82. +1,930 −0 data/forms2scrape/20120404.html
  83. +1,975 −0 data/forms2scrape/20120410.html
  84. +1,708 −0 data/forms2scrape/20120417.html
  85. +1,843 −0 data/forms2scrape/20120424.html
  86. +1,709 −0 data/forms2scrape/20120501.html
  87. +1,934 −0 data/forms2scrape/20120511.html
  88. +1,846 −0 data/forms2scrape/20120515.html
  89. +2,026 −0 data/forms2scrape/20120522.html
  90. +2,026 −0 data/forms2scrape/20120529.html
  91. +2,161 −0 data/forms2scrape/20120605.html
  92. +1,938 −0 data/forms2scrape/20120612.html
  93. +1,851 −0 data/forms2scrape/20120619.html
  94. +1,899 −0 data/forms2scrape/20120703.html
  95. +1,989 −0 data/forms2scrape/20120710.html
  96. +1,854 −0 data/forms2scrape/20120717.html
  97. +1,989 −0 data/forms2scrape/20120724.html
  98. +1,449 −0 data/forms2scrape/20120731.html
  99. +1,674 −0 data/forms2scrape/20120807.html
  100. +1,584 −0 data/forms2scrape/20120814.html
  101. +1,585 −0 data/forms2scrape/20120828.html
  102. +1,315 −0 data/forms2scrape/20120911.html
  103. +1,675 −0 data/forms2scrape/20120918.html
  104. +1,990 −0 data/forms2scrape/20120925.html
  105. +2,037 −0 data/forms2scrape/20121002.html
  106. +1,592 −0 data/forms2scrape/20121009.html
  107. +1,458 −0 data/forms2scrape/20121016.html
  108. +1,999 −0 data/forms2scrape/20121023.html
  109. +1,641 −0 data/forms2scrape/20121030.html
  110. +1,821 −0 data/forms2scrape/20121106.html
  111. +1,687 −0 data/forms2scrape/20121120.html
  112. +1,553 −0 data/forms2scrape/20121127.html
  113. +173 −0 data/playlists/audio_urls.txt
  114. +34,846 −0 data/playlists/complete.bak
  115. +2 −0 data/playlists/complete.csv
  116. +54,548 −0 data/playlists/complete.json
  117. +40,161 −0 data/playlists/complete.json.bak
  118. +1 −0 data/playlists/complete2.json
  119. +595 −0 data/playlists/scrape_log.txt
  120. +24 −0 data/playlists/txt/20080429.txt
  121. +21 −0 data/playlists/txt/20080506.txt
  122. +19 −0 data/playlists/txt/20080513.txt
  123. +18 −0 data/playlists/txt/20080520.txt
  124. +20 −0 data/playlists/txt/20080527.txt
  125. +20 −0 data/playlists/txt/20080603.txt
  126. +21 −0 data/playlists/txt/20080610.txt
  127. +21 −0 data/playlists/txt/20080620.txt
  128. +16 −0 data/playlists/txt/20080624.txt
  129. +19 −0 data/playlists/txt/20080701.txt
  130. +20 −0 data/playlists/txt/20080708.txt
  131. +17 −0 data/playlists/txt/20080715.txt
  132. +19 −0 data/playlists/txt/20080722.txt
  133. +19 −0 data/playlists/txt/20080729.txt
  134. +23 −0 data/playlists/txt/20080805.txt
  135. +15 −0 data/playlists/txt/20080812.txt
  136. +22 −0 data/playlists/txt/20080819.txt
  137. +17 −0 data/playlists/txt/20080826.txt
  138. +16 −0 data/playlists/txt/20080902.txt
  139. +7 −0 data/psql/finish_init.sql
  140. +340 −0 fabfile.py
  141. +29 −0 gzip_assets.py
  142. +2 −0 manage
  143. +7 −0 requirements.txt
  144. 0 s3exclude
@@ -0,0 +1 @@
+*.pyc
@@ -0,0 +1,30 @@
+Africando: “Damagasi” – African Salsa (Earthworks)
+Celina González: “Santa Bárbara (¡Que Viva Changó!)” – Cuba: I Am Time, v. 3: Cuban Invocations (Blue Jackel)
+Fernando Lavoy y Los Soneros: “Cojelo Suave” – Son Cubano NYC: Cuban Roots, New York Spices 1972-1982 (Astralwerks)
+La Fantástica 2001: “Las Nenas” – ¡Arrollando! y con sabor! (Antila Productions)
+Willie Bobo: “Boogaloo in Room 802” – Spanish Grease / Uno Dos Tres 1-2-3 (Verve)
+Los Ecos: “Aquí en la Fiesta (I Don't Want to Spoil the Party)” – Cumbia Beat, Vol. 1 (Vampisoul)
+Grupo Fantasma: “(Naci De La) Rumba y Guaguanco” – Sonidos Gold (High Wire Music)
+Manuel Alvares y sus Dangers: “Esclavo Moderno” – Palenque Palenque: Champeta Criolla & Afro Roots in Colombia 1975-1991 (Soundway)
+Mongo Santamaría: “Chano Pozo” – Skin To Skin: The Mongo Santamaria Anthology (1958-1995) (Rhino)
+Tito Puente: “Batuka” – Para los Rumberos (Fania)
+Grupo Folklorico y Experimental Nuevayorquino: “Carmen La Ronca” – Concepts in Unity (Salsoul)
+Cal Tjader: “Mi Guaguanco” – Cal Tjader's Latin Concert (Original Jazz Classics)
+Pedro Justiz "Peruchin": “Changüí Oriental” – Maestros del Pianos... Cuban Style (RMM)
+Lucila Campos: “Toro Mata” – Afro-Peruvian Classics: The Soul of Black Peru (Luaka Bop)
+Marc Ribot y los Cubanos Postizos: “Como Se Goza en el Barrio” – The Prosthetic Cubans (Atlantic)
+Los Silvertones: “Carmen” – Panama! 3: Calypso Panameño, Guajira Jazz & Cumbia Típica on the Isthmus 1960–75 (Soundway)
+Maximo Rodriguez Y Sus Estrellas Panameñas: “Mambologica” – Panama! Latin, Calypso & Funk On The Isthmus 1965-75 (Soundway)
+Lecuona Cuban Boys: “Panamá” – Congas and Rumbas from the Unforgettable Band (Yoyo Music)
+Septeto Nacional De Ignacio Piñeiro: “El Son Hay Que Llevarlo En El Corazón” – El Son de Cuba (Milan Latino)
+Familia RMM: “El Son de Celia y Oscar” – Combinación Perfecta (RMM)
+Roberto y Su Nuevo Montuno: “Montuno Dulzon” – El Yerbero del Barrio (International)
+Willie Colón, Héctor Lavoe & Ruben Blades: “MC2 (Theme Realidades)” – The Good, the Bad, and the Ugly (Fania)
+Junior Mance: “Tin Tin Deo” – The Junior Mance Touch (Polydor)
+Dizzy Gillespie and His Orchestra: “Manteca” – Latin Jazz: La Combinación Perfecta (Smithsonian Folkways)
+Joe Bataan: “Subway Joe” – Latin Funk Brother (Vampisoul)
+Clodomiro Montes Y El Super Combo Curro: “Puerto Rico Zumbando” – Cartagena! Curro Fuentes & The Big Band Cumbia and Descarga Sound of Colombia 1962 - 72 (Soundway )
+David Martial: “Jerk Vide” – Creole Love Calls: Rythmes Latins Des Antilles (Isma'a)
+Wganda Kenya: “Elyoyo” – Colombia! The Golden Age of Discos Fuentes - The Powerhouse of Colombian Music 1960-76 (Soundway)
+Orchestra Baobab: “Jiin Ma Jiin Ma” – Specialist in All Styles (Nonesuch)
+Chucho Valdés: “Caravan” – Briyumba Palo Congo (Blue Note)
No changes.
No changes.
No changes.
@@ -0,0 +1,11 @@
+from django.contrib import admin
+from playlists.models import Show, Track
+
+class TrackAdmin(admin.ModelAdmin):
+ list_filter = ('artist','label')
+
+class ShowAdmin(admin.ModelAdmin):
+ list_filter = ('air_date',)
+
+admin.site.register(Track, TrackAdmin)
+admin.site.register(Show, ShowAdmin)

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,24 @@
+"""
+"""
+from django.core.management.base import BaseCommand, CommandError, handle_default_options
+from optparse import make_option
+
+from playlists import scraper,models
+
+class Command(BaseCommand):
+ args=''
+ help=''
+ option_list= BaseCommand.option_list + ()
+
+ def get_version(self):
+ return "0.1"
+
+ def handle(self, *args, **options):
+ x = scraper.update_json_from_forms()
+ scraper.dump_json_playlists(x)
+
+ show = x[-1]
+ models.show_from_json(*show)
+ scraper.print_show(show[1])
+
+
@@ -0,0 +1,124 @@
+from django.db import models
+
+from datetime import date
+from urlparse import urljoin
+
+class Track(models.Model):
+ """An individual recording, which might be played on more than one occasion. Not currently
+ worrying about the same recording appearing on multiple albums."""
+ artist = models.TextField(null=True)
+ title = models.TextField(null=True)
+ album = models.TextField(null=True)
+ label = models.TextField(null=True)
+
+ class Meta:
+ ordering = []
+ verbose_name, verbose_name_plural = "Track", "Tracks"
+
+ def __unicode__(self):
+ return u"'%s' by %s" % (self.title,self.artist)
+
+ @models.permalink
+ def get_absolute_url(self):
+ return ('Track', [self.id])
+
+class Show(models.Model):
+ """(Show description)"""
+ air_date = models.DateField(blank=False, null=True, auto_now_add=False)
+ tracks = models.ManyToManyField(Track, through='ShowTrack')
+
+ class Meta:
+ ordering = []
+ verbose_name, verbose_name_plural = "Show", "Shows"
+
+ def __unicode__(self):
+ try:
+ return self.air_date.strftime('%Y-%m-%d')
+ except AttributeError:
+ return self.air_date
+
+ @models.permalink
+ def get_absolute_url(self):
+ return ('show', [self.air_date.strftime('%Y-%m-%d')])
+
+class ShowTrack(models.Model):
+ """An occasion on which a track was played on a specific show."""
+ show = models.ForeignKey(Show)
+ track = models.ForeignKey(Track)
+ played_at = models.DateTimeField(blank=True, null=True, auto_now_add=False)
+ display_order = models.IntegerField()
+
+ class Meta:
+ ordering = ['display_order']
+ verbose_name, verbose_name_plural = "ShowTrack", "ShowTracks"
+
+ def __unicode__(self):
+ return u"ShowTrack"
+
+ @models.permalink
+ def get_absolute_url(self):
+ return ('ShowTrack', [self.id])
+
+class AudioFile(models.Model):
+ """(AudioFile description)"""
+ url = models.URLField(blank=False, verify_exists=True,unique=True)
+ show = models.ForeignKey(Show,blank=True)
+
+ class Meta:
+ ordering = ['url']
+ verbose_name, verbose_name_plural = "AudioFile", "AudioFiles"
+
+ def __unicode__(self):
+ return self.url
+
+ @models.permalink
+ def get_absolute_url(self):
+ return ('AudioFile', [self.id])
+
+
+def show_from_json(air_date,tracks):
+ s = Show(air_date=air_date)
+ s.save()
+ for i,d in enumerate(tracks):
+ match_dict = {
+ }
+ t,created = Track.objects.get_or_create(artist=d.get('artist'),title=d.get('track'),album=d.get('album'),label=d.get('label'))
+ try:
+ st = ShowTrack(show=s,track=t,played_at="%(date_str)s %(time_str)s" % d,display_order=i)
+ except KeyError:
+ st = ShowTrack(show=s,track=t,display_order=i)
+ st.save()
+ return s
+
+def track_from_json(d):
+ """
+ {u'album': u'Closer',
+ u'artist': u'Paul Bley Trio',
+ u'date_str': u'2008-09-09',
+ u'label': u'ESP Disk',
+ u'time_str': u'05:00:00',
+ u'track': u'Cartoon',
+ u'unix_time': 1220954400.0}
+ """
+ t = Track(show)
+ t.artist=d['artist']
+ t.title=d['track']
+ t.album=d['album']
+ t.label=d['label']
+ t.played_at="%(date_str)s %(time_str)s" % d
+ return t
+
+def add_audio_files():
+ BASE = "http://blog.germuska.com/wnur/"
+ with open("data/playlists/audio_urls.txt") as f:
+ for line in f:
+ line = line.strip()
+ dot,date,filename = line.split('/')
+ air_date = '-'.join([date[:4],date[4:6],date[6:]])
+ try:
+ show = models.Show.objects.get(air_date=air_date)
+ af = models.AudioFile(url=urljoin(BASE,line), show=show)
+ af.save()
+ except Exception, e:
+ print e
+
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from lxml.html import parse as html_parse
+from dateutil.parser import parse as date_parse
+from lxml import etree
+from lxml.cssselect import CSSSelector
+from urllib2 import urlopen
+from urlparse import urljoin
+from time import mktime
+import codecs
+from csvkit import UnicodeCSVWriter
+from django.conf import settings
+import os, os.path
+import json
+from django.conf import settings
+import re
+from playlists import models
+
+PATTERN = re.compile(r'^(.+?)_(\d+?)$')
+
+JSON_PATH = os.path.join(settings.SITE_ROOT,"../data/playlists/complete.json")
+
+DJ_SEARCH_URL = "http://www.wnur.org/playlist/search/?pickdj=Joe+G"
+
+KEEPER_KEYS = [u'album',
+ u'artist',
+ u'track',
+ u'unix_time',
+ u'label',
+ u'date_str',
+ u'time_str']
+
+def load_json_playlists():
+ return json.load(open(JSON_PATH))
+
+def dump_json_playlists(playlists):
+ with open(JSON_PATH,"w") as f:
+ json.dump(playlists,f,indent=2)
+ print "json dumped"
+
+
+def update_json_from_forms():
+ shows = load_json_playlists()
+ air_dates = set(s[0] for s in shows)
+ for dir,subdirs,files in os.walk(os.path.join(settings.SITE_ROOT,'../data/forms2scrape')):
+ for fn in files:
+ datestr,ext = fn.split('.')
+ if date_parse(datestr).strftime('%Y-%m-%d') in air_dates:
+ print "found show, skipping %s" % (fn)
+ else:
+ date_str,tracks = parse_playlist_form(open(os.path.join(dir,fn)))
+ shows.append((date_str,tracks))
+ print "added %s" % date_str
+
+ return shows
+
+def print_show(show):
+ if len(show) == 2:
+ show = show[-1]
+ # else assume it's jus the tracks
+ tmpl = u'%(artist)s: “%(track)s” – %(album)s (%(label)s)'
+ for track in show:
+ rendered = tmpl % track
+ print rendered.encode('utf-8')
+
+def add_playlist_from_form(path):
+ if not os.path.isabs(path):
+ path = os.path.join(settings.SITE_ROOT,path)
+ show = parse_playlist_form(open(path))
+ shows = load_json_playlists()
+ shows.append(show)
+ dump_json_playlists(shows)
+ print_show(show)
+
+def parse_playlist_form(fp):
+ show_date = None
+ doc = html_parse(fp)
+ inputs = {}
+ for input in doc.getroot().body.xpath('//input[@type="text"]'):
+ if input.attrib['name'] == 'date':
+ show_date = date_parse(input.attrib['value'])
+ elif PATTERN.match(input.attrib['name']):
+ field,num = PATTERN.match(input.attrib['name']).groups()
+ if field == 'timestamp':
+ try:
+ time = date_parse(input.attrib['value'])
+ real_date = date_parse(show_date.strftime('%Y-%m-%d') + ' ' + time.strftime('%H:%M:%S'))
+ d = inputs.setdefault(int(num),{})
+ d['unix_time'] = mktime(real_date.timetuple())
+ d['date_str'] = real_date.strftime('%Y-%m-%d')
+ d['time_str'] = real_date.strftime('%H:%M:%S')
+ except: pass # 'auto' probably
+ else:
+ try:
+ value = input.attrib['value']
+ inputs.setdefault(int(num),{})[field] = value
+ except KeyError: pass
+
+ show = []
+ for k in sorted(inputs):
+ d = dict((key,inputs[k].get(key,None)) for key in KEEPER_KEYS)
+ if filter(None,d.values()):
+ show.append(d)
+ return (show[0]['date_str'],show)
+
+def show_links():
+ a = CSSSelector('a').path
+ doc = html_parse(urlopen(DJ_SEARCH_URL))
+ for link in doc.getroot().xpath(a):
+ href = link.attrib.get('href')
+ if href and href.startswith('../archive/?date='):
+ yield urljoin(DJ_SEARCH_URL,href)
+
+def parse_text_files():
+ BASE = 'data/playlists/txt'
+ shows = []
+ for fn in os.listdir(BASE):
+ datepart = fn.split('.')[0]
+ date = u'-'.join([datepart[:4],datepart[4:6],datepart[6:]])
+ tracks = parse_txt_file(os.path.join(BASE,fn))
+ shows.append((unicode(date),tracks))
+ return shows
+
+TRACK_PATTERN = re.compile(r'^(?P<artist>.+): “(?P<track>.+)” – (?P<album>.+)?\s*(?:\((?P<label>.+)\))$')
+TRACK_PATTERN2 = re.compile(r'^(?P<artist>.+):\s+“(?P<track>.+)”\s*$')
+TRACK_PATTERN3 = re.compile(r'^(?P<artist>.+): “(?P<track>.+)”?\s*(?:\((?P<label>.+)\))$')
+TRACK_PATTERN4 = re.compile(r'^(?P<artist>.+): “(?P<track>.+)” – (?P<album>.+)?$')
+def parse_txt_file(path):
+ tracks = []
+ with open(path) as f:
+ for line in f:
+ line = line.strip()
+ try:
+ if TRACK_PATTERN.match(line):
+ match = TRACK_PATTERN.match(line)
+ elif TRACK_PATTERN2.match(line):
+ match = TRACK_PATTERN2.match(line)
+ elif TRACK_PATTERN3.match(line):
+ match = TRACK_PATTERN3.match(line)
+ else: match = TRACK_PATTERN4.match(line)
+ tracks.append(match.groupdict())
+ except AttributeError: # only five lines in Pieter's show so handled manually after the fact for now...
+ print "Weird line in %s!\n%s" % (path,line)
+ tracks.append(line)
+ return tracks
@@ -0,0 +1,16 @@
+"""
+This file demonstrates writing tests using the unittest module. These will pass
+when you run "manage.py test".
+
+Replace this with more appropriate tests for your application.
+"""
+
+from django.test import TestCase
+
+
+class SimpleTest(TestCase):
+ def test_basic_addition(self):
+ """
+ Tests that 1 + 1 always equals 2.
+ """
+ self.assertEqual(1 + 1, 2)
Oops, something went wrong.

0 comments on commit 558b25c

Please sign in to comment.