In [1]:
import bs4
import requests
import pandas as pd
import random
import os
import datetime as dt
import time

In [2]:
def get_billboard_hot100(save_to_csv:[bool]=False, fp:[str]=None):
    """visit website, scrape the hot 100 songs and artists and return it as df"""
    response = requests.get(url="https://www.billboard.com/charts/hot-100/index.php")
    response.raise_for_status()
    website = response.text                             # read out the html code as text
    soup = bs4.BeautifulSoup(website, "html.parser")    # make soup
    li_items = soup.select("li.lrv-u-width-100p")
    
    songs = []
    artists = []
    for _ in range(len(li_items)):
        if _ % 2 == 0:
            this_scraped = li_items[_].get_text().replace("\n","_")
            this_entry = []
            for c in this_scraped.split("_"):
                if c != "" and len(c) > 1:
                    this_entry.append(c)
                    if len(this_entry) == 2:
                        break
            songs.append(this_entry[0])
            artists.append(this_entry[1])
    if save_to_csv:
        if fp:
            try:
                pd.DataFrame({"song":songs, "artist":artists}).to_csv(fp, index=False, mode="w+", sep=";")
            except PermissionError:
                print("File already exists.")
        else:
            print("No filepath given.")
    return pd.DataFrame({"song":songs, "artist":artists})

In [3]:
def find_song_or_artists(user_entry:[str]=None, fp:[str]=None):
    if not fp:
        print("no filepath given.")
        return 
    try:
        music_df = pd.read_csv(fp, sep=";")
    except FileNotFoundError:
        print( "file not found")
        return
    else:   
        song_lst = music_df["song"].tolist()
        artist_lst = music_df["artist"].tolist()
        hit_lst = []    # entries will be tuple in form (direct match: 0 or 1, "s"ong or "a"rtist, index in df)
        for song in range(len(song_lst)):
            if user_entry.lower() == song_lst[song].lower():
                hit_lst.append((1, "s", song))
            elif user_entry.lower() in song_lst[song].lower():
                hit_lst.append((0, "s", song))
        for artist in range(len(artist_lst)):
            if user_entry.lower() == artist_lst[artist].lower():
                hit_lst.append((1, "a", artist))
            elif user_entry.lower() in artist_lst[artist].lower():
                hit_lst.append((0, "a", artist))
        return (hit_lst)

In [4]:
 def print_result(result_tup:[tuple] = None, fp:[str]=None):
    if not fp:
        print("no filepath given.")
        return 
    try:
        music_df = pd.read_csv(fp, sep=";")
    except FileNotFoundError:
        print("file not found")
        return
    else:
        this_artist = music_df.iloc[result_tup[2]]["artist"]
        this_song = music_df.iloc[result_tup[2]]["song"]
        print(f"{this_song} by {this_artist} on place {result_tup[2]+1}")

In [5]:
def return_random(this_rank:[int]=None, fp:[str]=None):
    if not fp:
        print("no filepath given.")
        return 
    if not this_rank:
        print("no rank given")
        return
    try:
        music_df = pd.read_csv(fp, sep=";")
    except FileNotFoundError:
        print("file not found")
        return
    else:
        recom_index = random.randint(0,99)
        while recom_index == this_rank:
            recom_index = random.randint(0,99)
        reco_artist = music_df.iloc[recom_index]["artist"]
        reco_song = music_df.iloc[recom_index]["song"]
        print(f"Another entry from Billboard Hot 100 would be {reco_artist} with {reco_song} currently on place {recom_index +1}.")

In [6]:
def user_search():
    while True:
        user = input("What are you looking for? ")
        if user == "end_now":
            break
        results = find_song_or_artists(user, "../data/billboard_hot100.csv")
        if len(results) == 0:
            print("No match found")
            continue
        if len(results) == 1:
            if results[0][0] == 1:
                print("Direct Match:")
                print_result(results[0],fp="../data/billboard_hot100.csv")
                return_random(results[0][2], fp="../data/billboard_hot100.csv")
        else:
            print("Partial Matches:")
            for i in range(len(results)):
                print(f" + {i+1}: ",end="")
                print_result(results[i],fp="../data/billboard_hot100.csv")
            specify = 241
            while 0 > int(specify) or int(specify) > len(results)+1:
                specify = input(f"Please specify your entry (1 to {i+1}): ")
                if specify.isnumeric() == False:
                    print("please check entry.")
                    specify = len(results)+2
                if 0 > int(specify) > len(results)+1:
                    print("please check entry.")
                    specify = len(results)+2
            print(" - - -\nMatch:")
            print_result(results[int(specify)-1],fp="../data/billboard_hot100.csv")
            return_random(results[int(specify)-1][2], fp="../data/billboard_hot100.csv")
        print("\n ************** new request *************\n")

In [7]:
def check_last_update_hot100(fp:[str] = None):
    current_tuesday = None
    if dt.datetime.now().strftime('%A') != "Tuesday":
        for d in range(1, 7):
            if (dt.datetime.now() - dt.timedelta(d)).strftime("%A") == "Monday":
                current_tuesday = (dt.datetime.now() - dt.timedelta(d)).strftime("%Y_%m_%d")
    else:
        current_tuesday = dt.datetime.now().strftime("%Y_%m_%d")
    mod_time_since_epoc = os.path.getmtime(fp)
    # Convert seconds since epoch to readable timestamp
    modification_time = time.strftime('%Y_%m_%d', time.localtime(mod_time_since_epoc))
    if current_tuesday <= modification_time:
        next_update = dt.datetime.strptime(modification_time, "%Y_%m_%d")+dt.timedelta(7)
        print(f"File is up to date (next update expected for {dt.datetime.strftime(next_update, '%d.%m.%Y')}).")
        return False

In [8]:
def update_billboard_hot100():
    if not check_last_update_hot100(fp="../data/billboard_hot100.csv"):
        if input("Update anyway (Y/n): ") == "Y":
            get_billboard_hot100(save_to_csv=True, fp="../data/billboard_hot100.csv")
    else:
        get_billboard_hot100(save_to_csv=True, fp="../data/billboard_hot100.csv")

In [10]:
update_billboard_hot100()


File is up to date (next update expected for 22.02.2022).
Update anyway (Y/n): n


In [11]:
user_search()

What are you looking for? Stay
Direct Match:
Stay by The Kid LAROI & Justin Bieber on place 5
Another entry from Billboard Hot 100 would be Jordan Davis Featuring Luke Bryan with Buy Dirt currently on place 35.

 ************** new request *************

What are you looking for? Bad guy
No match found
What are you looking for? end_now
