In [137]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import date
from datetime import timedelta

class Playlist:
    def __init__(self,URL):
        # The __init__ method is run everytime we create new instance
        # The following are called instance attributes
        self.URL = URL
        self.content = soup = BeautifulSoup(requests.get(URL).content, "html.parser")
        self.show_details = self.cleaned_descriptive_content()
        self.show_playlist = self.cleaned_playlist_content()
    
    def day_clean(self,date_string):
        today = date.today()
        week_days = {"Monday": 0,
                    "Tuesday": 1,
                    "Wednesday": 2,
                    "Thursday": 3,
                    "Friday": 4,
                    "Saturday": 5,
                    "Sunday": 6}
        if date_string in week_days.keys():
            offset = (today.weekday() - week_days[date_string]) % 7
        elif date_string in ["Last " + x for x in week_days.keys()]:
            offset = 7 + (today.weekday() - week_days[date_string.strip('Last ')]) % 7
        elif date_string == "Today":
            offset = 0
        elif date_string == "Yesterday":
            offset = 1
        else:
            return(date_string)
            exit
        show_date = today - timedelta(days=offset)
        return(show_date.strftime('%a %d %b %Y')) 
        
        # This is an instance method    
    def cleaned_descriptive_content(self):
        show_title = self.content.find("div", class_ ="br-masthead__title").find("a").text
        show_details = self.content.find("div", class_ ="island")
        show_banner = self.content.find("h1", class_ = "no-margin").text
        try:
            show_description = self.content.find("div", class_ = "text--prose longest-synopsis").find("p").text    
        except:
            show_description = self.content.find("div", class_ = "synopsis-toggle__short").find("p").text
        broadcast = self.content.find("div", class_="broadcast-event programme programme--grid highlight-box--grid")
        try:
            service_img = broadcast.find("div", class_="programme__img").find("img")['src']
        except:
            service_img = None
        try:
            service_day = broadcast.find("span", class_ = "broadcast-event__date text-base timezone--date").text
            service_day = self.day_clean(service_day)
        except:
            service_day = None
        try:
            service_time = broadcast.find("span", class_ = "timezone--time").text
        except:
            service_time = None
        data = {"Show Title": show_title,
           "Show Banner": show_banner,
           "Show Description": show_description,
           "Service Image": service_img,
           "Service Day": service_day,
           "Service Time": service_time,
           "Show URL": self.URL}
        return(data)

        # This is an instance method    
    def cleaned_playlist_content(self):
        music = self.content.find_all("div", class_="segment segment--music")
        cleaned_music = []
        for track in music:
            artist_image_segment = track.find("div", class_="segment__artist-image") 
            artist_image = artist_image_segment.find("img", class_="image lazyload")['data-src']
            track_segment = track.find("div", class_="segment__track")
            artist_name = track_segment.find("span", class_="artist").text
            track_name = track_segment.find("p", class_="no-margin").find("span").text
            try:
                record_label = track_segment.find("abbr", title="Record Label").text.strip().strip(".") 
            except:
                record_label = None
            try:
                track_number = track_segment.find("abbr", title="Track Number").text.strip().strip(".") 
            except:
                track_number = None
            row = {"Artist Image": artist_image,
                   "Artist Name": artist_name,
                   "Track Name": track_name,
                   "Record Label": record_label,
                   "Track Number": track_number}
            cleaned_music.append(row)
        track_listings = pd.DataFrame(cleaned_music)
        track_listings['Show URL'] = self.URL
        return(track_listings)

In [132]:
class Show:
    def __init__(self,URL):
        # The __init__ method is run everytime we create new instance
        # The following are called instance attributes
        self.URL = URL
        content = soup = BeautifulSoup(requests.get(URL).content, "html.parser")
        self.show_title_name = content.find("div", class_ = "br-masthead__title").find("a").text
        self.show_urls = self.get_show_urls()

        # This is an instance method    
    def get_show_urls(self):
        cleaned_shows = []
        loop = True
        page_URL = self.URL
        while loop == True:
            content = soup = BeautifulSoup(requests.get(page_URL).content, "html.parser")
            temp = content.find("div", class_ = "br-box-page programmes-page")
            shows = temp.find_all("div", class_ = "js-guideitem highlight-box--list block-link block-link--steal br-keyline br-blocklink-page br-page-linkhover-onbg015--hover episode-guide__episode")
            for show in shows:
                show_image = show.find("img", class_ = "image lazyload")['data-src']
                body = show.find("div", class_ = "programme__body")
                url = body.find("a", class_ = "br-blocklink__link block-link__target")['href']
                title = body.find("span", class_ = "programme__title gamma").text
                short_description = body.find("p", class_ = "programme__synopsis text--subtle centi").find("span").text.strip('.')
                if show.find("div", class_ = "broadcast-event programme") == None:
                    upcoming = False
                else:
                    upcoming = True
                row = {"Show Image": show_image,
                            "Show URL": url,
                            "Show Name": title,
                            "Short Description": short_description,
                            "Upcoming": upcoming,
                            "Show Master Title Name": self.show_title_name,
                            "Show Master URL": self.URL}
                cleaned_shows.append(row)
            try:
                page_URL = self.URL + content.find("li", class_ = "pagination__next").find("a", rel= "next")['href']
            except:
                loop = False    
                
        track_listings = pd.DataFrame(cleaned_shows)
        return(track_listings)

In [138]:
indie_show = Show("https://www.bbc.co.uk/programmes/m00008zy/episodes/guide")
indie_show.show_urls

Unnamed: 0,Show Image,Show URL,Show Name,Short Description,Upcoming,Show Master Title Name,Show Master URL
0,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m001gv4h,08/01/2023,The 1 for indie and alternative,True,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
1,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m001gkpl,01/01/2023,The 1 for indie and alternative,True,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
2,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m001g0lr,18/12/2022,The 1 for indie and alternative,True,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
3,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m001fv5f,11/12/2022,The 1 for indie and alternative,True,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
4,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m001fl2g,The best in indie and alternative,Two indie fans go head to head in a game of Ho...,False,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
...,...,...,...,...,...,...,...
531,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m000v9k8,22/04/2021,The 1 for Indie and Alternative,False,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
532,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m001bwwg,18/09/2022,The 1 for indie and alternative,False,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
533,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m00160p0,14/04/2022,The 1 for Indie and Alternative,False,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...
534,https://ichef.bbci.co.uk/images/ic/320x180/p0c...,https://www.bbc.co.uk/programmes/m000szbn,11/03/2021,The 1 for Indie and Alternative,False,Radio 1's Indie Show with Jack Saunders,https://www.bbc.co.uk/programmes/m00008zy/epis...


In [139]:
shows_details = pd.DataFrame()
shows_playlists = pd.DataFrame()

for i, url in enumerate(indie_show.show_urls[indie_show.show_urls["Upcoming"]==False]['Show URL'][515:]):
    data = Playlist(url)
    print(i)
    shows_playlists = shows_playlists.append(data.show_playlist, ignore_index=True)
    shows_details = shows_details.append(data.show_details, ignore_index=True)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16


In [140]:
shows_details.iloc[-1]

Service Day                                              None
Service Image                                            None
Service Time                                             None
Show Banner                                        06/03/2019
Show Description             The 1 for indie and alternative.
Show Title            Radio 1's Indie Show with Jack Saunders
Show URL            https://www.bbc.co.uk/programmes/m0002x3t
Name: 16, dtype: object

In [141]:
shows_playlists

Unnamed: 0,Artist Image,Artist Name,Track Name,Record Label,Track Number,Show URL
0,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Biffy Clyro,Living Is A Problem Because Everything Dies,,,https://www.bbc.co.uk/programmes/m0000bk5
1,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Biffy Clyro,That Golden Rule,,,https://www.bbc.co.uk/programmes/m0000bk5
2,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Gengahr,Atlas Please,Transgressive Records,1,https://www.bbc.co.uk/programmes/m0000bk5
3,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,cleopatrick,Hometown,Cleopatrick,1,https://www.bbc.co.uk/programmes/m0000bk5
4,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Bodega,Name Escape,,,https://www.bbc.co.uk/programmes/m0000bk5
...,...,...,...,...,...,...
158,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Jungle,"Heavy, California",XL,2,https://www.bbc.co.uk/programmes/m00009fq
159,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,HMLTD,Proxy Love,,,https://www.bbc.co.uk/programmes/m00009fq
160,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Bloxx,Second Opinion,,,https://www.bbc.co.uk/programmes/m00009fq
161,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Blossoms,How Long Will This Last?,,,https://www.bbc.co.uk/programmes/m00009fq
