# Case Study - The Current

* The Current is an alternative radio station
* We will pull information about the play list.

# Step 0 - Inspect the following page

* Song title
* Artist
* Play time
* Day, date, period (am/pm)

http://www.thecurrent.org/playlist/2014-01-01/01

In [33]:
from composablesoup import find, find_all, get_text, has_attr
from composable.sequence import slice, head
from composable.strict import map, filter
from composable.string import replace
from composable import from_toolz as tlz
import requests
from bs4 import BeautifulSoup
from composable.string import split
from composable import pipeable
import re
from composablesoup.soup import find_parent, parents, children, find_previous_sibling, find_previous_siblings
from composablesoup.soup import find_next_sibling, find_next_siblings, find_previous_sibling

In [2]:
s = requests.Session() # Start a session
r = s.get('https://www.thecurrent.org/playlist/2020-10-14/01') # Get a static page
the_current_radio = BeautifulSoup(r.content, "html.parser")

# Step 1 - Pull off the period of the day (am/pm)

Pull out the "am"/"pm"

1. Inspect the element
2. Identify the html tag and class
3. Search the soup
    1. There should be one item returned
4. Use soup\string methods to pull out the info

In [3]:
strip = pipeable(lambda s: s.strip())
(the_current_radio
 >> find('span', attrs = {'class':'hour-header'})
 >> get_text
 >> strip
 >> split(' ')
 >> tlz.get(1)
)

'am'

In [4]:
get_time_period = pipeable(lambda soup: soup
                           >> find('span', attrs = {'class':'hour-header'})
                           >> get_text
                           >> strip
                           >> split(' ')
                           >> tlz.get(1)
                          )

In [79]:
the_current_radio >> get_time_period

'am'

# Step 2 - Pull off DJ

Use a similar process to pull off the DJ.


In [6]:
(the_current_radio
 >> find('h5', attrs = {'class':'currentDj'})
 >> get_text
)

'DJ: Jade'

In [7]:
get_dj = pipeable(lambda soup: soup
                      >> find('h5', attrs = {'class':'currentDj'})
                      >> get_text
                     )

In [80]:
the_current_radio >> get_dj

'DJ: Jade'

# Step 3 - Pull out the day of the week

* Pull out the day of the week

In [9]:
(the_current_radio
 >> find('a', attrs = {'class':'start-picker'})
 >> get_text
 >> split(',')
 >> tlz.get(0)
)

'Wednesday'

In [10]:
get_day = pipeable(lambda soup: soup
                   >> find('a', attrs = {'class':'start-picker'})
                   >> get_text
                   >> split(',')
                   >> tlz.get(0)
                  )

In [81]:
the_current_radio >> get_day

'Wednesday'

# Title of each song

1. Inspect the element
2. Identify the html tag and class
3. Use `find_all` to make a list of all relevant tags
4. Pull off an example case
5. Write a function to pull out the title
6. Write a single pipe to convert the original soup into a list of titles. 
7. Verify you have the right number of titles.
8. Package the pipe in a function named `get_title`

In [12]:
(the_current_radio
 >> find_all('h5', attrs = {'class':'title'})
 >> map(get_text)
)

['The Heart Is A Muscle',
 'In My Place',
 'Help Me',
 'AUATC',
 'I Remember You',
 'March March',
 'May',
 'Rooftop Dancing',
 'Having a Party (Live)',
 'Bad Decisions',
 'Figure It Out',
 'Thickfreakness',
 'Letting Me Down',
 'Gone Daddy Gone',
 'Be an Astronaut',
 'Guess Again']

In [13]:
get_title = pipeable(lambda soup: soup
                     >> find_all('h5', attrs = {'class':'title'})
                     >> map(get_text)
                    )

In [82]:
the_current_radio >> get_title

['The Heart Is A Muscle',
 'In My Place',
 'Help Me',
 'AUATC',
 'I Remember You',
 'March March',
 'May',
 'Rooftop Dancing',
 'Having a Party (Live)',
 'Bad Decisions',
 'Figure It Out',
 'Thickfreakness',
 'Letting Me Down',
 'Gone Daddy Gone',
 'Be an Astronaut',
 'Guess Again']

# Pull off the name of the artist

1. Inspect the element
2. Identify the html tag and class
3. Use `find_all` to make a list of all relevant tags
4. Pull off an example case
5. Write a function to pull out the artist
6. Write a single pipe to convert the original soup into a list of artists. 
7. Verify you have the right number of artists.
8. Package the pipe in a function named `get_artist`


In [15]:
(the_current_radio
 >> find_all('h5', attrs = {'class':'artist'})
 >> map(get_text)
)

['Gang of Youths',
 'Coldplay',
 'Low Cut Connie',
 'Bon Iver',
 'Ramones',
 'The Chicks',
 'Lupin',
 'Sylvan Esso',
 'Sam Cooke',
 'The Strokes',
 'Blu DeTiger',
 'The Black Keys',
 'Margo Price',
 'Violent Femmes',
 'Declan McKenna',
 'Jeff Tweedy']

In [16]:
get_artist = pipeable(lambda soup: soup
                      >> find_all('h5', attrs = {'class':'artist'})
                      >> map(get_text)
                     )

In [83]:
the_current_radio >> get_artist

['Gang of Youths',
 'Coldplay',
 'Low Cut Connie',
 'Bon Iver',
 'Ramones',
 'The Chicks',
 'Lupin',
 'Sylvan Esso',
 'Sam Cooke',
 'The Strokes',
 'Blu DeTiger',
 'The Black Keys',
 'Margo Price',
 'Violent Femmes',
 'Declan McKenna',
 'Jeff Tweedy']

Bringing over the necessary functions from 5_1_2

In [31]:
time = re.compile('#song\d+')
get_start_time = pipeable(lambda soup: soup
                          >> find_all('a', attrs = {'href':time})
                          >> map(children)
                          >> map(tlz.get(1))
                          >> map(get_text)
                          >> map(split(' '))
                          >> map(tlz.get(2))
                         )

In [84]:
the_current_radio >> get_start_time

['1:57',
 '1:53',
 '1:50',
 '1:47',
 '1:45',
 '1:41',
 '1:37',
 '1:33',
 '1:28',
 '1:24',
 '1:21',
 '1:16',
 '1:13',
 '1:09',
 '1:05',
 '1:01']

In [26]:
get_src = pipeable(lambda src: src >> tlz.get('data-src') 
                               if (src >> tlz.get('src')) == '' 
                               else src >> tlz.get('src'))
get_album_art = pipeable(lambda soup: soup
                          >> find_all('img', attrs = {'class':'album-art'})
                          >> map(get_src)
                         )

In [85]:
the_current_radio >> get_album_art

['https://albumart.publicradio.org/mb/1e/1ea97022-27ba-4bf5-8ab5-4ae76836e1c1_03df.jpg',
 'https://albumart.publicradio.org/mb/12/120c786d-a3b2-3c19-b4ff-2b7b3b4435bf_6f33.jpg',
 '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png',
 '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png',
 'https://albumart.publicradio.org/mb/bc/bc3b5116-300a-3696-919d-bca466a3f143_e813.jpg',
 'https://albumart.publicradio.org/mb/fe/feeac938-9cb1-4e36-9b98-4ec2ad9a65c3_1a08.jpg',
 'https://albumart.publicradio.org/mb/16/1608212a-558a-4d20-bc01-3fc8b82e6058_6f10.jpg',
 '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png',
 '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png',
 '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png',
 'https://albumart.publicradio.org/mb/9c/9c8f9285-

# Putting it all together

* Make a function for each of the previous steps
* Make an overall function
    * input is a soup
    * output is a list of lists

**Hint:** You should use `zip` to put all the information together.

In [77]:
#'playing around'
#list1 = list(zip([time_period], [dj_name], [day_of_week]))
#list2 = list(zip(song_title, artist_name, start_time, album_art))
#list(zip(list1,list2))

#list(data) for data in (zip(titles, artists, times, arts))
#get_title, get_artist, get_day, get_dj, get_time_period
#get_header = pipeable(lambda soup: list(zip([soup >> get_time_period],
#                                            [soup >> get_dj],
#                                            [soup >> get_day]
#                                           )))
#header_info = pipeable(lambda soup: list(soup.insert(0, get_header)))

In [86]:
get_data  = pipeable(lambda soup: (list(zip([soup >> get_time_period],
                                                        [soup >> get_dj],
                                                        [soup >> get_day]
                                                       )),
                                               list(zip(soup >> get_title, 
                                                        soup >> get_artist, 
                                                        soup >> get_start_time, 
                                                        soup >> get_album_art
                                                        ))))
the_current_radio >> get_data

([('am', 'DJ: Jade', 'Wednesday')],
 [('The Heart Is A Muscle',
   'Gang of Youths',
   '1:57',
   'https://albumart.publicradio.org/mb/1e/1ea97022-27ba-4bf5-8ab5-4ae76836e1c1_03df.jpg'),
  ('In My Place',
   'Coldplay',
   '1:53',
   'https://albumart.publicradio.org/mb/12/120c786d-a3b2-3c19-b4ff-2b7b3b4435bf_6f33.jpg'),
  ('Help Me',
   'Low Cut Connie',
   '1:50',
   '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png'),
  ('AUATC',
   'Bon Iver',
   '1:47',
   '/assets/album-cover-default-32217dc68a771f3a44aa2b7a640cf91133b61bd1f2ae68c9ddb00055e9a8ac1d.png'),
  ('I Remember You',
   'Ramones',
   '1:45',
   'https://albumart.publicradio.org/mb/bc/bc3b5116-300a-3696-919d-bca466a3f143_e813.jpg'),
  ('March March',
   'The Chicks',
   '1:41',
   'https://albumart.publicradio.org/mb/fe/feeac938-9cb1-4e36-9b98-4ec2ad9a65c3_1a08.jpg'),
  ('May',
   'Lupin',
   '1:37',
   'https://albumart.publicradio.org/mb/16/1608212a-558a-4d20-bc01-3fc8b82e