public
Description: Podcast Get is a short program for fetching new podcasts and plunking them on your disk
Homepage: http://jadn.com/~bob/
Clone URL: git://github.com/bherrmann7/podcast-get.git
podcast-get / src / pget.groovy
100644 139 lines (116 sloc) 5.203 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
How to get and use this script
0. Get Groovy from http://groovy.codehaus.org/
1. Download this script as pget.groovy (click the 'raw' link on this github page.) --------------------^^^
2. Create a directory to save the podcast in (for example, mkdir /tmp/podcasts )
3. Edit pget.groovy and change the sites to be url's to podcasts you like
4. Run like this, "groovy pget.groovy <downloadLocation>" (ie. groovy pget.groovy /tmp/podcasts)
Enjoy.
 
I use this script to download new mp3's into a directory. I then copy the files onto my mp3 player and
listen to them during my commute. When I'm need a recharge of new mp3s, I re-run the script to get more podcasts.
 
This script;
- numbers the files uniquely (so there aren't any naming collisions)
- preserves the download order so I hear my most important podcasts first.
- keeps track of what is downloaded (so you don't get the same podcasts over and over.)
- only downloads a max of 3 podcasts from each source (so when first using this script you don't get 100 podcasts)
- You can stop the script and edit and re-run it (tweaker friendly). Rerunning is safe because it doesn't update the history until the end and it skips already downloaded files.
 
*/
// groovy 1.6beta2 has a bug. uncomment this line to use it.
// def args = [ "/tmp/podcasts" ]
if (args.size() != 1 ){
   println "usage: pget downloadDirectory"
   System.exit(1)
}
def downloadLocation = args[0]
if ( !new File(downloadLocation).isDirectory() ){
    println "Error: The argument (${downloadLocation}) is not a directory."
    System.exit(-1);
}
downloadLocation += File.separator + "%04d-%s"
 
def downloadHistory = []
def downloadHistoryFile = new File(System.properties['user.home']+File.separator+'.pgetDownloadHistory');
if ( downloadHistoryFile.exists() ){
   downloadHistory = evaluate(downloadHistoryFile.text)
} else {
   println "Warning: No download history found, creating a new one."
}
 
println "running, cached: " + downloadHistory.size()
 
sites = [
        "http://feeds2.feedburner.com/WebdevradioPodcastHome",
        "http://feeds2.feedburner.com/PhandroidPodcast",
        "http://feeds2.feedburner.com/ThisAintYourDadsJava",
        "http://www.cringely.com/feed/podcast/",
        "http://feedproxy.google.com/androidguyscom",
        "http://hansamann.podspot.de/rss",
        "http://www.pbs.org/cringely/pulpit/rss/podcast.rss.xml",
        "http://feeds.feedburner.com/javaposse",
        "http://feeds.feedburner.com/rubyonrailspodcast",
        "http://blog.stackoverflow.com/index.php?feed=podcast",
        "http://leoville.tv/podcasts/ww.xml",
        "http://www.nofluffjuststuff.com/s/podcast/itunes.xml",
        "http://media.ajaxian.com/",
        "http://agiletoolkit.libsyn.com/rss",
        "http://feeds.feedburner.com/railsenvy-podcast",
        "http://www.discovery.com/radio/xml/sciencechannel.xml",
        "http://feeds.feedburner.com/gigavox/channel/itconversations",
        "http://www.scienceandsociety.net/podcasts/index.xml",
        "http://leoville.tv/podcasts/floss.xml",
]
enclosures = [:]
 
histmap = [:]
downloadHistory.each {urlName, fileName ->
    histmap[urlName] = fileName
}
 
sites.each {site ->
//site = sites[0]
    println "site: ${site}"
    xml = new groovy.util.XmlSlurper().parse(site)
 
    int max = 6;
 
    def xmlenclosures = xml.depthFirst().findAll { it.name().equals("enclosure") }
    println " has ${xmlenclosures.size()} enclosures"
    xmlenclosures.each {enclosure ->
        //println "enclosure " + enclosure.@url + " or " + enclosure.@url.toString()
 
        url = enclosure.@url.toString()
        filename = url.toString().substring(url.toString().lastIndexOf('/') + 1);
        //println "$filename of $url"
 
        if (max-- > 0) {
            if (histmap[filename]) {
                println "already have $filename"
                // prevend downloading past 'areadly have'
                max = 0;
            } else {
                println "Will get: " + filename + " " + url
                enclosures[url] = filename
            }
 
        }
    }
}
 
println "\nstarting downloads... will get " + enclosures.size() + " files"
 
errors = 0;
 
enclosures.each {url, filename ->
    println "downloading: " + filename + " " + url
    ondiskname = String.format(downloadLocation, downloadHistory.size(), filename)
 
    file = new File(ondiskname)
    file.parentFile.mkdir()
 
    if (file.exists()) {
        println "**** ALREADY DOWNLOADED: ${url}"
    } else {
        def fileOut = new FileOutputStream(file)
        def out = new BufferedOutputStream(fileOut)
        out << new URL(url).openStream()
        out.close()
 
        [ "/usr/bin/id3v2", "-t", '_' + file.name, ondiskname ].execute()
    }
    println "ok have downloadHistory[$filename]=$ondiskname"
 
    odnf = new File(ondiskname);
    downloadHistory << [filename, odnf.name]
}
 
// saved new items
downloadHistoryFile.withPrintWriter {pw ->
    pw.println("def history = [ ")
    downloadHistory.each {
        pw.println " [ '${it[0]}', '${it[1]}' ],"
    }
    pw.println("]")
}
 
println "all done, cache:" + downloadHistory.size() + ", added:" + enclosures.size() + ", errors:" + errors