### 0) Pull Podcast data from iTunes with Applescript/Coffeescript  
In some cases podcasts don't have the 'media type' of "Podcast," but we'll ignore those for now.

```coffeescript
tell application "iTunes"
	set trackData to "name|duration|album|artist|composer|genre|playedCount|datePlayed|dateAdded|unplayed|year|description\n"
	repeat with aTrack in tracks of library playlist 1
		set td to ""
		set td to td & name of aTrack & "|"
		set td to td & duration of aTrack & "|"
		set td to td & album of aTrack & "|"
		set td to td & artist of aTrack & "|"
		set td to td & composer of aTrack & "|"
		set td to td & genre of aTrack & "|"
		set td to td & played count of aTrack & "|"
		set td to td & played date of aTrack & "|"
		set td to td & date added of aTrack & "|"
		set td to td & unplayed of aTrack & "|"
		set td to td & year of aTrack & "|"
		set td to td & long description of aTrack & "\n"
		
		set trackData to trackData & td
	end repeat
	copy trackData to stdout
end tell
```  
  
This is run from bash with:
```bash
    osascript pullTrackData_upd101517.scpt > podcastdatapull_upd102117.txt
```

### 1) set up the result

In [1]:
import pandas as pd
raw_data = pd.read_table('data/datapull_upd102617.txt',
                         sep='|',error_bad_lines=False, warn_bad_lines=False,
                         dtype={'year':str}
                        )
print(raw_data.shape)
raw_data.head()

(10475, 13)


Unnamed: 0,name,mediaKind,duration,album,artist,composer,genre,playedCount,datePlayed,dateAdded,unplayed,year,description
0,Riots,«constant ****kMdS»,119.50700378418,Slumdog Millionaire (Music from the Motion Pic...,A. R. Rahman,A. R. Rahman,Soundtrack,0.0,missing value,"Saturday, March 7, 2009 at 20:27:41",True,2008,missing value
1,Mausam & Escape,«constant ****kMdS»,232.839996337891,Slumdog Millionaire (Music from the Motion Pic...,A. R. Rahman,A. R. Rahman,Soundtrack,0.0,missing value,"Saturday, March 7, 2009 at 20:27:41",True,2008,missing value
2,O... Saya,«constant ****kMdS»,213.826995849609,Slumdog Millionaire (Music from the Motion Pic...,A. R. Rahman & M.I.A.,A. R. Rahman,Soundtrack,0.0,missing value,"Saturday, March 7, 2009 at 20:27:41",True,2008,missing value
3,Millionaire,«constant ****kMdS»,164.427001953125,Slumdog Millionaire (Music from the Motion Pic...,A. R. Rahman & Madhumitha,A. R. Rahman,Soundtrack,0.0,missing value,"Saturday, March 7, 2009 at 20:27:41",True,2008,missing value
4,Latika's Theme,«constant ****kMdS»,189.373001098633,Slumdog Millionaire (Music from the Motion Pic...,A. R. Rahman & Suzanne,A. R. Rahman,Soundtrack,0.0,missing value,"Saturday, March 7, 2009 at 20:27:41",True,2008,missing value


### 2) Preliminary processing
- remove things with no descriprtion
- that have no played date
- that also weren't flagged as unplayed

In [2]:
podcast_data = raw_data[(raw_data['description'] != 'missing value') 
                        & (raw_data['unplayed']==False)
                        & (raw_data['datePlayed']!='missing value')].copy()
podcast_data.drop(['mediaKind','composer'],axis=1,inplace=True)
podcast_data.loc[:,'datePlayed'] = pd.core.tools.datetimes.to_datetime(podcast_data['datePlayed'])
podcast_data.loc[:,'dateAdded'] = pd.core.tools.datetimes.to_datetime(podcast_data['dateAdded'])
print(podcast_data.shape)
podcast_data.head()

(6568, 11)


Unnamed: 0,name,duration,album,artist,genre,playedCount,datePlayed,dateAdded,unplayed,year,description
10,A.D.K.O.B. - Helium,253.0,KEXP Song of the Day,A.D.K.O.B.,Podcast,1.0,2017-09-28 09:29:47,2017-09-28 06:43:37,False,2017,A.D.K.O.B. - Helium - a 2017 single on Wonderl...
11,A.D.K.O.B. - Lung Capacity,172.0,KEXP Song of the Day,A.D.K.O.B.,Podcast,1.0,2017-08-13 22:47:02,2017-01-30 06:15:32,False,0,A.D.K.O.B. - Lung Capacity - from the 2015 A D...
12,Aan - All You Have to Say,214.0,KEXP Song of the Day,Aan,Podcast,1.0,2017-01-20 11:03:50,2017-01-21 11:16:12,False,2016,Aan - All You Have to Say - from the 2016 albu...
16,Ablebody - Backseat Heart,297.0,KEXP Song of the Day,Ablebody,Podcast,1.0,2017-01-03 13:11:12,2017-01-30 06:15:32,False,0,Ablebody - Backseat Heart - from the 2016 albu...
33,Acapulco Lips,1269.0,KEXP Live Performances Podcast,Acapulco Lips,Podcast,1.0,2017-01-01 08:03:15,2017-01-30 06:14:26,False,0,"Maria-Elena Juarez, Christopher Garland, and D..."


### 3) Initial generated fields
These will help with categorizations in data exploration

In [3]:
# 3.1) What was the listen day?
podcast_data['dateAdded_DoW'] = podcast_data['dateAdded'].dt.strftime('%a')
podcast_data['datePlayed_DoW'] = podcast_data['datePlayed'].dt.strftime('%a')

In [4]:
# 3.2) What part of the day was it consumed?
def bin_f(x):
    h = pd.datetime.time(x).hour
    if h < 6:
        return "em" #early morning
    elif h < 12:
        return "m" # morning
    elif h < 18:
        return "a" # afternoon
    else:
        return "e" # evening
    
podcast_data['dateAdded_Part'] = podcast_data['dateAdded'].apply(lambda i: bin_f(i))
podcast_data['datePlayed_Part'] = podcast_data['datePlayed'].apply(lambda i: bin_f(i))

In [5]:
podcast_data.head()

Unnamed: 0,name,duration,album,artist,genre,playedCount,datePlayed,dateAdded,unplayed,year,description,dateAdded_DoW,datePlayed_DoW,dateAdded_Part,datePlayed_Part
10,A.D.K.O.B. - Helium,253.0,KEXP Song of the Day,A.D.K.O.B.,Podcast,1.0,2017-09-28 09:29:47,2017-09-28 06:43:37,False,2017,A.D.K.O.B. - Helium - a 2017 single on Wonderl...,Thu,Thu,m,m
11,A.D.K.O.B. - Lung Capacity,172.0,KEXP Song of the Day,A.D.K.O.B.,Podcast,1.0,2017-08-13 22:47:02,2017-01-30 06:15:32,False,0,A.D.K.O.B. - Lung Capacity - from the 2015 A D...,Mon,Sun,m,e
12,Aan - All You Have to Say,214.0,KEXP Song of the Day,Aan,Podcast,1.0,2017-01-20 11:03:50,2017-01-21 11:16:12,False,2016,Aan - All You Have to Say - from the 2016 albu...,Sat,Fri,m,m
16,Ablebody - Backseat Heart,297.0,KEXP Song of the Day,Ablebody,Podcast,1.0,2017-01-03 13:11:12,2017-01-30 06:15:32,False,0,Ablebody - Backseat Heart - from the 2016 albu...,Mon,Tue,m,a
33,Acapulco Lips,1269.0,KEXP Live Performances Podcast,Acapulco Lips,Podcast,1.0,2017-01-01 08:03:15,2017-01-30 06:14:26,False,0,"Maria-Elena Juarez, Christopher Garland, and D...",Mon,Sun,m,m
