In [1]:
import bs4
import requests
import pandas as pd
import random
import os
import datetime as dt
import time

In [2]:
def get_billboard_hot100(save_to_csv:[bool]=False, fp:[str]=None):
    """visit website, scrape the hot 100 songs and artists and return it as df"""
    response = requests.get(url="https://www.billboard.com/charts/hot-100/index.php")
    response.raise_for_status()
    website = response.text                             # read out the html code as text
    soup = bs4.BeautifulSoup(website, "html.parser")    # make soup
    li_items = soup.select("li.lrv-u-width-100p")
    
    songs = []
    artists = []
    for _ in range(len(li_items)):
        if _ % 2 == 0:
            this_scraped = li_items[_].get_text().replace("\n","_")
            this_entry = []
            for c in this_scraped.split("_"):
                if c != "" and len(c) > 1:
                    this_entry.append(c)
                    if len(this_entry) == 2:
                        break
            songs.append(this_entry[0])
            artists.append(this_entry[1])
    if save_to_csv:
        if fp:
            try:
                pd.DataFrame({"song":songs, "artist":artists}).to_csv(fp, index=False, mode="w+", sep=";")
            except PermissionError:
                print("File already exists.")
        else:
            print("No filepath given.")
    return pd.DataFrame({"song":songs, "artist":artists})

In [3]:
def find_song_or_artists(user_entry:[str]=None, fp:[str]=None):
    if not fp:
        print("no filepath given.")
        return 
    try:
        music_df = pd.read_csv(fp, sep=";")
    except FileNotFoundError:
        print( "file not found")
        return
    else:   
        song_lst = music_df["song"].tolist()
        artist_lst = music_df["artist"].tolist()
        hit_lst = []    # entries will be tuple in form (direct match: 0 or 1, "s"ong or "a"rtist, index in df)
        for song in range(len(song_lst)):
            if user_entry.lower() == song_lst[song].lower():
                hit_lst.append((1, "s", song))
            elif user_entry.lower() in song_lst[song].lower():
                hit_lst.append((0, "s", song))
        for artist in range(len(artist_lst)):
            if user_entry.lower() == artist_lst[artist].lower():
                hit_lst.append((1, "a", artist))
            elif user_entry.lower() in artist_lst[artist].lower():
                hit_lst.append((0, "a", artist))
        return (hit_lst)

In [4]:
def print_result(result_tup:[tuple] = None, fp:[str]=None):
    if not fp:
        print("no filepath given.")
        return 
    try:
        music_df = pd.read_csv(fp, sep=";")
    except FileNotFoundError:
        print("file not found")
        return
    else:
        this_artist = music_df.iloc[result_tup[2]]["artist"]
        this_song = music_df.iloc[result_tup[2]]["song"]
        print(f"{this_song} by {this_artist} on place {result_tup[2]+1}")

In [5]:
def return_random(this_rank:[int]=None, fp:[str]=None):
    if not fp:
        print("no filepath given.")
        return 
    if not this_rank:
        print("no rank given")
        return
    try:
        music_df = pd.read_csv(fp, sep=";")
    except FileNotFoundError:
        print("file not found")
        return
    else:
        recom_index = random.randint(0,99)
        while recom_index == this_rank:
            recom_index = random.randint(0,99)
        reco_artist = music_df.iloc[recom_index]["artist"]
        reco_song = music_df.iloc[recom_index]["song"]
        print(f"Another entry from Billboard Hot 100 would be {reco_artist} with {reco_song} currently on place {recom_index +1}.")

In [6]:
def user_search():
    while True:
        user = input("What are you looking for? ")
        if user == "end_now":
            break
        results = find_song_or_artists(user, "../data/billboard_hot100.csv")
        if len(results) == 0:
            print("No match found")
            continue
        if len(results) == 1:
            if results[0][0] == 1:
                print("Direct Match:")
                print_result(results[0],fp="../data/billboard_hot100.csv")
                return_random(results[0][2], fp="../data/billboard_hot100.csv")
        else:
            print("Partial Matches:")
            for i in range(len(results)):
                print(f" + {i+1}: ",end="")
                print_result(results[i],fp="../data/billboard_hot100.csv")
            specify = 241
            while 0 > int(specify) or int(specify) > len(results)+1:
                specify = input(f"Please specify your entry (1 to {i+1}): ")
                if specify.isnumeric() == False:
                    print("please check entry.")
                    specify = len(results)+2
                if 0 > int(specify) > len(results)+1:
                    print("please check entry.")
                    specify = len(results)+2
            print(" - - -\nMatch:")
            print_result(results[int(specify)-1],fp="../data/billboard_hot100.csv")
            return_random(results[int(specify)-1][2], fp="../data/billboard_hot100.csv")
        print("\n ************** new request *************\n")

In [7]:
def check_last_update_hot100(fp:[str] = None):
    current_tuesday = None
    if dt.datetime.now().strftime('%A') != "Tuesday":
        for d in range(1, 7):
            if (dt.datetime.now() - dt.timedelta(d)).strftime("%A") == "Monday":
                current_tuesday = (dt.datetime.now() - dt.timedelta(d)).strftime("%Y_%m_%d")
    else:
        current_tuesday = dt.datetime.now().strftime("%Y_%m_%d")
    mod_time_since_epoc = os.path.getmtime(fp)
    # Convert seconds since epoch to readable timestamp
    modification_time = time.strftime('%Y_%m_%d', time.localtime(mod_time_since_epoc))
    if current_tuesday <= modification_time:
        next_update = dt.datetime.strptime(modification_time, "%Y_%m_%d")+dt.timedelta(7)
        print(f"File is up to date (next update expected for {dt.datetime.strftime(next_update, '%d.%m.%Y')}).")
        return False

In [8]:
def update_billboard_hot100():
    if not check_last_update_hot100(fp="../data/billboard_hot100.csv"):
        if input("Update anyway (Y/n): ") == "Y":
            get_billboard_hot100(save_to_csv=True, fp="../data/billboard_hot100.csv")
    else:
        get_billboard_hot100(save_to_csv=True, fp="../data/billboard_hot100.csv")

In [9]:
update_billboard_hot100()


File is up to date (next update expected for 22.02.2022).
Update anyway (Y/n): n


In [10]:
user_search()

What are you looking for? STay
Direct Match:
Stay by The Kid LAROI & Justin Bieber on place 5
Another entry from Billboard Hot 100 would be Lil Nas X with Thats What I Want currently on place 12.

 ************** new request *************

What are you looking for? stay
Direct Match:
Stay by The Kid LAROI & Justin Bieber on place 5
Another entry from Billboard Hot 100 would be Stephanie Beatriz with Waiting On A Miracle currently on place 53.

 ************** new request *************

What are you looking for? sta
Partial Matches:
 + 1: Stay by The Kid LAROI & Justin Bieber on place 5
 + 2: You Should Probably Leave by Chris Stapleton on place 30
 + 3: Megan's Piano by Megan Thee Stallion on place 99
Please specify your entry (1 to 3): eminm
please check entry.
Please specify your entry (1 to 3): 1
 - - -
Match:
Stay by The Kid LAROI & Justin Bieber on place 5
Another entry from Billboard Hot 100 would be Taylor Swift with All Too Well (Taylor's Version) currently on place 84.

 *****

# OLD STUFF, probaly trash

In [11]:
### Steps to get to the get_billboard_hot100 function ###

In [12]:
# response = requests.get(url="https://www.billboard.com/charts/hot-100/index.php")
# response.raise_for_status()
# website = response.text                             # read out the html code as text
# soup = bs4.BeautifulSoup(website, "html.parser")    # make soup

In [13]:
# li_items = soup.find_all("li", {"class": "lrv-u-width-100p"})
# li_items = soup.select(".lrv-u-width-100p")
# li_items = soup.select("li.lrv-u-width-100p")
# print(len(li_items))

double the items as expected, on exploring found out, that the uneven index list entries are just a repetition of the numbers.<br>
**→** on looping use only the even list entries.

explore one (even) entry (replace the excessive *\n* with underscore:

In [14]:
# x = li_items[6].get_text().replace("\n","_")
# x

the first non _ entry is the songname, the second the artist's name.<br>
to create a df, make two lists for songs / artists, loop through the even 

In [15]:
# songs = []
# artists = []
# for _ in range(len(li_items)):
#     if _ % 2 == 0:
#         this_scraped = li_items[_].get_text().replace("\n","_")
#         this_entry = []
#         for c in this_scraped.split("_"):
#             if c != "" and len(c) > 1:
#                 this_entry.append(c)
#                 if len(this_entry) == 2:
#                     break
#         songs.append(this_entry[0])
#         artists.append(this_entry[1])

In [16]:
# working for 99 entries:
# ranks = []
# songs = []
# interprets = []
# for _ in range(len(li_items)):
#     if _ % 2 == 0:
#         a = li_items[_].get_text()
#         songs.append(a.strip("\n").split("\n")[0])
#         interprets.append(a.strip("\n").split("\n")[3])
#         ranks.append(int(_/2+1))

In [17]:
# top100 = pd.DataFrame({"song":songs,
#                        "artist":artists
#                       })
# top100.head()

In [18]:
# top100.to_csv("../data/song_artists.csv", index=False)

In [19]:
# def search_csv(user_entry:[str]=None, fp:[str]=None, return_list:[bool] = False):
#     if user_entry and len(user_entry) >= 3:
#         search_results = []
#         df = pd.read_csv(fp, sep=";")
#         art_found = df[df["artist"].str.lower().str.contains(user_entry).lower()]
#         # song_found = df[df["song"].contains(user_entry)]
#         if len(art_found) > 0 or len(song_found) > 0:
#             if len(art_found) > 0:
#                 print("Found in artists:")
#                 print(art_found)
#                 print("\n - - - - - - -\n")
#             if len(song_found) > 0:
#                 print("Found in songs:")
#                 print(song_found)
#         else:
#             return "no match"
#     elif user_entry and len(user_entry) < 3:
#         print("search item should be at least 3 characters long")
#         if return_list:
#             return None
#     if return_list:
#         return None

# search_csv("Ade", "../data/song_artists.csv")

In [20]:
# def print_result_lst(results:[list] = None, fp:[str]=None):
#     """call function with direct=find_song_or_artists[0], results=find_song_or_artists[1], fp=your_filepath"""
#     if not fp:
#         print("no filepath given.")
#         return 
#     try:
#         music_df = pd.read_csv(fp, sep=";")
#     except FileNotFoundError:
#         print( "file not found")
#         return
#     else:
#         if results:
#             music_df = pd.read_csv(fp, sep=";")
#             for hit in results:
#                 if hit[0] == 1:              # hit[0] == 1, direct match
#                     if hit[1] == "a":
#                         this_artist = music_df.iloc[hit[2]]["artist"]
#                         this_song = music_df.iloc[hit[2]]["song"]
#                         print(f"Matching artist: {this_artist} with {this_song} on place {hit[2]+1}.")
#                     if hit[1] == "s":
#                         this_artist = music_df.iloc[hit[2]]["artist"]
#                         this_song = music_df.iloc[hit[2]]["song"]
#                         print(f"Matching song: {this_song} by {this_artist} on place {hit[2]+1}.")
#                     print("----------")
#                     recom_index = random.randint(0,99)
#                     while recom_index == hit[2]:
#                         recom_index = random.randint(0,99)
#                     reco_artist = music_df.iloc[recom_index]["artist"]
#                     reco_song = music_df.iloc[recom_index]["song"]
#                     print(f"Another entry from Billboard Hot 100 would be {reco_artist} with {reco_song} currently on place {recom_index +1}.")
#                 else:          # hit[0] == False (0), no direct match
#                     if hit[1] == "a":
#                         this_artist = music_df.iloc[hit[2]]["artist"]
#                         this_song = music_df.iloc[hit[2]]["song"]
#                         print(f"Partial match in artists: {this_artist} with {this_song} on place {hit[2]+1}.")
#                     if hit[1] == "s":
#                         this_artist = music_df.iloc[hit[2]]["artist"]
#                         this_song = music_df.iloc[hit[2]]["song"]
#                         print(f"Partial match in songs: {this_song} by {this_artist} on place {hit[2]+1}.")
#                     print("----------")
#         else:
#             print("no match.")