In [4]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import date
from datetime import timedelta

class Playlist:
    def __init__(self,URL):
        # The __init__ method is run everytime we create new instance
        # The following are called instance attributes
        self.URL = URL
        self.content = soup = BeautifulSoup(requests.get(URL).content, "html.parser")
    
    def day_clean(self,date_string):
        today = date.today()
        week_days = {"Monday": 0,
                    "Tuesday": 1,
                    "Wednesday": 2,
                    "Thursday": 3,
                    "Friday": 4,
                    "Saturday": 5,
                    "Sunday": 6}
        if date_string in week_days.keys():
            offset = (today.weekday() - week_days[date_string]) % 7
        elif date_string in ["Last " + x for x in week_days.keys()]:
            offset = 7 + (today.weekday() - week_days[date_string.strip('Last ')]) % 7
        elif date_string == "Today":
            offset = 0
        elif date_string == "Yesterday":
            offset = 1
        else:
            return(date_string)
            exit
        show_date = today - timedelta(days=offset)
        return(show_date.strftime('%a %d %b %Y')) 
        
        # This is an instance method    
    def cleaned_descriptive_content(self):
        show_title = self.content.find("div", class_ ="br-masthead__title").find("a").text
        show_details = self.content.find("div", class_ ="island")
        show_banner = self.content.find("h1", class_ = "no-margin").text
        show_description = self.content.find("div", class_ = "text--prose longest-synopsis").find("p").text    
        broadcast = self.content.find("div", class_="broadcast-event programme programme--grid highlight-box--grid")
        service_img = broadcast.find("div", class_="programme__img").find("img")['src']
        service_day = broadcast.find("span", class_ = "broadcast-event__date text-base timezone--date").text
        service_day = self.day_clean(service_day)
        service_time = broadcast.find("span", class_ = "timezone--time").text
        data = {"Show Title": show_title,
           "Show Banner": show_banner,
           "Show Description": show_description,
           "Service Image": service_img,
           "Service Day": service_day,
           "Service Time": service_time}
        return(data)

        # This is an instance method    
    def cleaned_playlist_content(self):
        music = self.content.find_all("div", class_="segment segment--music")
        cleaned_music = []
        for track in music:
            artist_image_segment = track.find("div", class_="segment__artist-image") 
            artist_image = artist_image_segment.find("img", class_="image lazyload")['data-src']
            track_segment = track.find("div", class_="segment__track")
            artist_name = track_segment.find("span", class_="artist").text
            track_name = track_segment.find("p", class_="no-margin").find("span").text
            record_label = track_segment.find("abbr", title="Record Label")
            if(record_label != None):
                record_label = record_label.text.strip().strip(".") 
            track_number = track_segment.find("abbr", title="Track Number")
            if(track_number != None):
                track_number = track_number.text.strip().strip(".") 
            row = {"Artist Image": artist_image,
                   "Artist Name": artist_name,
                   "Track Name": track_name,
                   "Record Label": record_label,
                   "Track Number": track_number}
            cleaned_music.append(row)
        
        track_listings = pd.DataFrame(cleaned_music)
        return(track_listings)

In [5]:
test = Playlist(URL = "https://www.bbc.co.uk/programmes/m001fb0m")
test.URL

'https://www.bbc.co.uk/programmes/m001fb0m'

In [6]:
test.cleaned_descriptive_content()

{'Show Title': "Radio 1's Indie Show with Jack Saunders",
 'Show Banner': 'Inhaler',
 'Show Description': "Inhaler join Jack to play a game of How Well Do You Know Your Fans, plus Jack's got your Indie News headlines with a brand new gig correspondent.",
 'Service Image': 'https://programmes.files.bbci.co.uk/programmes-frontend/images/logos/svg/bbc_radio_one/service-b5c93f6c5d.svg',
 'Service Day': 'Sun 27 Nov 2022',
 'Service Time': '21:00'}

In [7]:
test.cleaned_playlist_content()

Unnamed: 0,Artist Image,Artist Name,Track Name,Record Label,Track Number
0,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Enter Shikari,Bull,,
1,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Black Honey,Heavy,Foxfive Records,
2,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Jamie T,Between The Rocks,Polydor,
3,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Low Hummer,Panic Calls,,
4,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Metronomy,It's Good To Be Back,,
5,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,CHVRCHES,The Mother We Share,Virgin Records,
6,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Gorillaz,Baby Queen,,
7,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,Lizzy McAlpine,orange show speedway,Harbour Artists & Music,
8,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,The Snuts,Hallelujah Moment,Parlophone,
9,https://ichef.bbci.co.uk/images/ic/96x96/p01c9...,You Me at Six,Mixed Emotions (I Didn't Know How To Tell You ...,AWAL Recordings Ltd,
