In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import date
from datetime import timedelta

In [199]:
class Playlist:
    
    def __init__(self,URL):
        self.broadcast_URL = URL
        self.broadcast_key = hash(URL)
        self.content = BeautifulSoup(requests.get(URL).content, "html.parser")
        self.broadcast_details = self.get_broadcast_details()
        self.playlist = self.get_playlist()
    
    def day_clean(self,date_string):
        today = date.today()
        week_days = {"Monday": 0,
                    "Tuesday": 1,
                    "Wednesday": 2,
                    "Thursday": 3,
                    "Friday": 4,
                    "Saturday": 5,
                    "Sunday": 6}
        if date_string in week_days.keys():
            offset = (today.weekday() - week_days[date_string]) % 7
        elif date_string in ["Last " + x for x in week_days.keys()]:
            offset = 7 + (today.weekday() - week_days[date_string.strip('Last ')]) % 7
        elif date_string == "Today":
            offset = 0
        elif date_string == "Yesterday":
            offset = 1
        else:
            return(date_string)
            exit
        show_date = today - timedelta(days=offset)
        return(show_date.strftime('%a %d %b %Y')) 
   
    def get_broadcast_details(self):
        show_title = self.content.find("div", class_ ="br-masthead__title").find("a").text
        #show_details = self.content.find("div", class_ ="island")
        broadcast_banner = self.content.find("h1", class_ = "no-margin").text
        try:
            broadcast_long_description = self.content.find("div", class_ = "text--prose longest-synopsis").find("p").text    
        except:
            broadcast_long_description = self.content.find("div", class_ = "synopsis-toggle__short").find("p").text
        broadcast = self.content.find("div", class_="broadcast-event programme programme--grid highlight-box--grid")
        try:
            service_img_url = broadcast.find("div", class_="programme__img").find("img")['src']
        except:
            service_img_url = None
        try:
            service_day = broadcast.find("span", class_ = "broadcast-event__date text-base timezone--date").text
            service_day = self.day_clean(service_day)
        except:
            service_day = None
        try:
            service_time = broadcast.find("span", class_ = "timezone--time").text
        except:
            service_time = None
        broadcast_details = {"Show Title": show_title,
           "Broadcast Banner": broadcast_banner,
           "Broadcast Long Description": broadcast_long_description,
           "Service Image URL": service_img_url,
           "Service Day": service_day,
           "Service Time": service_time,
           "Broadcast Key": self.broadcast_key}
        return(pd.DataFrame([broadcast_details]))
    
    def get_playlist(self):
        music = self.content.find_all("div", class_="segment segment--music")
        playlist = []
        for track in music:
            artist_image_segment = track.find("div", class_="segment__artist-image") 
            artist_image_url = artist_image_segment.find("img", class_="image lazyload")['data-src']
            track_segment = track.find("div", class_="segment__track")
            artist_name = track_segment.find("span", class_="artist").text
            track_name = track_segment.find("p", class_="no-margin").find("span").text
            try:
                record_label = track_segment.find("abbr", title="Record Label").text.strip().strip(".") 
            except:
                record_label = None
            try:
                track_number = track_segment.find("abbr", title="Track Number").text.strip().strip(".") 
            except:
                track_number = None
            row = {"Artist Image URL": artist_image_url,
                   "Artist Name": artist_name,
                   "Track Name": track_name,
                   "Record Label": record_label,
                   "Track Number": track_number}
            playlist.append(row)
        playlist_df = pd.DataFrame(playlist)
        playlist_df['Broadcast Key'] = self.broadcast_key
        return(playlist_df)

In [193]:
class Show:
    
    def __init__(self,URL):
        self.show_URL = URL
        self.show_key = hash(URL)
        content = BeautifulSoup(requests.get(URL).content, "html.parser")
        self.show_name = content.find("div", class_ = "br-masthead__title").find("a").text
        self.broadcasts = self.get_broadcasts()
        self.show_details = pd.DataFrame([{"Show URL": self.show_URL,
                                           "Show Key": self.show_key,
                                           "Show Name": self.show_name}])
   
    def get_broadcasts(self):
        broadcast_list = []
        loop = True
        page_URL = self.show_URL
        while loop == True:
            content = BeautifulSoup(requests.get(page_URL).content, "html.parser")
            broadcasts = content.find("div", class_ = "br-box-page programmes-page").find_all("div", class_ = "js-guideitem highlight-box--list block-link block-link--steal br-keyline br-blocklink-page br-page-linkhover-onbg015--hover episode-guide__episode")
            for broadcast in broadcasts:
                broadcast_image_url = broadcast.find("img", class_ = "image lazyload")['data-src']
                broadcast_body = broadcast.find("div", class_ = "programme__body")
                broadcast_url = broadcast_body.find("a", class_ = "br-blocklink__link block-link__target")['href']
                broadcast_title = broadcast_body.find("span", class_ = "programme__title gamma").text
                broadcast_short_description = broadcast_body.find("p", class_ = "programme__synopsis text--subtle centi").find("span").text.strip('.')
                broadcast_key = hash(broadcast_url)
                if broadcast.find("div", class_ = "broadcast-event programme") == None:
                    is_broadcast_upcoming = False
                else:
                    is_broadcast_upcoming = True
                broadcast_row = {"Broadcast Image URL": broadcast_image_url,
                            "Broadcast URL": broadcast_url,
                            "Broadcast Title": broadcast_title,
                            "Broadcast Short Description": broadcast_short_description,
                            "Is Broadcast Upcoming": is_broadcast_upcoming,
                            "Broadcast Key": broadcast_key}
                broadcast_list.append(broadcast_row)
            try:
                page_URL = self.show_URL + content.find("li", class_ = "pagination__next").find("a", rel= "next")['href']
            except:
                loop = False             
        broadcasts_df = pd.DataFrame(broadcast_list)
        broadcasts_df['Show Key'] = self.show_key
        #broadcasts_df['Show Name'] = self.show_name
        #broadcasts_df['Show URL'] = self.show_URL
        return(broadcasts_df)