# Case Study - The Current - Part 2

* The Current is an alternative radio station
* We will pull information about the play list.

# Step 0 - Insert current progress

Copy over all the relevant code from part 1 of the lab.

http://www.thecurrent.org/playlist/2014-01-01/01

In [1]:
# Import modules here
import requests
import re
from bs4 import BeautifulSoup
from composablesoup import find, find_all, get_text, has_attr
from composable.sequence import slice, head
from composable.strict import map, filter
from composable.string import replace, split
from composable import from_toolz as tlz
from composable import pipeable
from composablesoup.soup import find_parent, parents, children, find_previous_sibling, find_previous_siblings, find_next_sibling, find_next_siblings, find_previous_sibling
from composable.sequence import to_list
from composable.string import strip


In [2]:
# Functions from 5_1_1
get_period = pipeable(lambda soup: soup
                    >> find('span', attrs = {'class' : "hour-header open"})
                    >> get_text
                    >> strip
                    >> replace('\n', '')
                    >> split(' ')
                    >> tlz.get(1))

get_dj = lambda soup: (soup
    >> find('h5', attrs = {'class' : "currentDj"})
    >> get_text
    >> replace('DJ: ', ''))

get_day = pipeable(lambda soup: (soup
                >> find('a', attrs = {'class' : "start-picker"})
                >> get_text
                >> strip
                >> split(',')
                >> tlz.get(0)))

get_title = pipeable(lambda soup: (soup
                    >> find_all('h5', attrs={'class':'title'})
                    >> map(get_text)
                    ))
                    
get_artist = pipeable(lambda soup: (soup
                    >> find_all('h5', attrs={'class':'artist'})
                    >> map(get_text)
                    ))


In [3]:
# Read in the page here
s = requests.Session()
r = s.get('http://www.thecurrent.org/playlist/2014-01-01/01')
current = BeautifulSoup(r.content, "html.parser")

# Pull off the song start time

1. Inspect the element
    1. This one is tricky
    2. Time tag does not have a tag, but
    3. The surrounding div does have a class
2. Identify the html tag and class
3. Use `find_all` to make a list of all relevant tags
4. Pull off an example case
5. Write a function that extracts the start time.
6. Write a single pipe to extract the start time.
7. Confirm you have the right number of times.
8. Package your code in a function called `get_start_time`

In [4]:

get_start_time = lambda soup:(soup
                    >> find_all('div', attrs={'class':'two columns songTime'})
                    >> map(find('time'))
                    >> map(get_text)
                    >> map(strip)
                )

(current
    >> find_all('div', attrs={'class':'two columns songTime'})
    >> map(find('time'))
    >> map(get_text)
    >> map(strip)
)

['1:59',
 '1:54',
 '1:51',
 '1:46',
 '1:44',
 '1:38',
 '1:34',
 '1:31',
 '1:27',
 '1:23',
 '1:19',
 '1:13',
 '1:09',
 '1:05',
 '1:03',
 '1:01']

# Pull address of the album art image address

Follow a similar process to pull off the web address of the album cover image. 


In [5]:
get_src_or_data_src = pipeable(lambda tag: tag["src"] if tag["src"] else tag["data-src"])

(current
    >> find_all('img', attrs={'class':'album-art lazyload'})   
    >> map(get_src_or_data_src)
)

['https://albumart.publicradio.org/mb/e2/e2749c25-c2b6-493e-a2bb-10898152bd2d_5158.jpg',
 'https://albumart.publicradio.org/mb/5e/5e5c8b95-d04c-432f-8cd2-c1c8d99e6e5a_3556.jpg',
 'https://albumart.publicradio.org/mb/48/48445b64-d965-369a-af3c-8193de389fd8_3ff4.jpg',
 'https://albumart.publicradio.org/mb/e9/e999c049-c65b-4c5e-ad12-5596998679c7_92f9.jpg',
 'https://albumart.publicradio.org/mb/d6/d62320e2-20c4-4589-aa76-2f8ac28447dd_e03b.jpg',
 'https://albumart.publicradio.org/mb/02/028b8602-3bde-495a-a7da-15594fc4f786_351a.jpg',
 'https://albumart.publicradio.org/mb/c9/c92f73ee-527f-42ed-a556-fd615941e214_78f0.jpg',
 'https://albumart.publicradio.org/mb/24/24084807-5d23-423e-b1f3-5e9fd874e240_6ccd.jpg',
 'https://albumart.publicradio.org/mb/c2/c20be759-d767-4a7c-96c5-7a870ebc3a30_7f7d.jpg',
 'https://albumart.publicradio.org/mb/37/37f48931-e5e6-488f-a531-ad2db311158d_7446.jpg',
 'https://albumart.publicradio.org/mb/1a/1aa41b19-5a72-341b-bd91-4cf61d1dab6b_8e05.jpg']

In [6]:
get_album_art = lambda soup: (soup
    >> find_all('img', attrs={'class':'album-art lazyload'})   
    >> map(get_src_or_data_src))

# Putting it all together

* Make a function for each of the previous steps
* Make an overall function
    * input is a soup
    * output is a list of lists

**Hint:** You should use `zip` to put all the information together.

In [7]:
def get_info(soup):
    """This function collects various information from the current
    Args:
        soup: A soup object created from BeautifulSoup
    Returns:
        Returns time, dj, day of week, song title, artist, and album art location
    """

    
    title = get_title(soup)
    artist = get_artist(soup)
    start_time = get_start_time(soup)
    album_art = get_album_art(soup)

    # These are single elements, so their lengths need to match 
    # the length of the above elements when put into the zip function
    period = [get_period(soup)] * len(title)
    dj = [get_dj(soup)] * len(title)
    day = [get_day(soup)] * len(title)



    output = zip(start_time, period, day, dj, title, artist, album_art)

    return(output)



get_info(current)


<zip at 0x7ffd655a3ac0>

In [8]:
# Printing out the information
list(get_info(current))

[('1:59',
  'am',
  'Wednesday',
  'Jade',
  'Holy Roller',
  'Thao and The Get Down Stay Down',
  'https://albumart.publicradio.org/mb/e2/e2749c25-c2b6-493e-a2bb-10898152bd2d_5158.jpg'),
 ('1:54',
  'am',
  'Wednesday',
  'Jade',
  'Kingdom of Rust',
  'Doves',
  'https://albumart.publicradio.org/mb/5e/5e5c8b95-d04c-432f-8cd2-c1c8d99e6e5a_3556.jpg'),
 ('1:51',
  'am',
  'Wednesday',
  'Jade',
  'Black Dog',
  'Frankie Lee',
  'https://albumart.publicradio.org/mb/48/48445b64-d965-369a-af3c-8193de389fd8_3ff4.jpg'),
 ('1:46',
  'am',
  'Wednesday',
  'Jade',
  'Turn It Around',
  'Lucius',
  'https://albumart.publicradio.org/mb/e9/e999c049-c65b-4c5e-ad12-5596998679c7_92f9.jpg'),
 ('1:44',
  'am',
  'Wednesday',
  'Jade',
  'Flavor of the Month',
  'The Posies',
  'https://albumart.publicradio.org/mb/d6/d62320e2-20c4-4589-aa76-2f8ac28447dd_e03b.jpg'),
 ('1:38',
  'am',
  'Wednesday',
  'Jade',
  'Potential Wife',
  'Strange Names',
  'https://albumart.publicradio.org/mb/02/028b8602-3bde-4