In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from random import choice

#from difflib import SequenceMatcher Wasn't nescesary because get_close_matches was easier. But pretty cool anyhow.
from difflib import get_close_matches

**User experience:**

- What happens if the user inputs a song that doesn't exist?
- What do we do with songs that have the same name, but a different artist?
- How do we deal with typos?

**Architecture:**

- Do we build the interaction with the user in the same notebook as the web-scraping?
- Where do we store the scraped songs?

**Scheduling / Automation:**

- Should we scrape billboard / wikipedia every time a user sends a request?

**Testing:**

- Does it work when you test it with a real user (a colleague)?
- Chances are that more issues will appear, and that not all of them will be solved during this session. But what's - important is that the issues have been identified.

In [2]:
url="https://www.billboard.com/charts/hot-100/"

In [3]:
response=requests.get(url)

In [4]:
response.status_code

200

In [5]:
soup = BeautifulSoup(response.content, "html.parser")

In [6]:
length_of_list=len(soup.select("span.c-label.a-no-trucate"))

In [7]:
artists=[]
songs=[]
for i in range(length_of_list):
    artists.append(soup.select("span.c-label.a-no-trucate")[i].get_text(strip=True))
    songs.append(soup.select(".o-chart-results-list__item > h3")[i].get_text(strip=True))

In [8]:
df=pd.DataFrame({"song":songs,"artist":artists})
df

Unnamed: 0,song,artist
0,Anti-Hero,Taylor Swift
1,Rich Flex,Drake & 21 Savage
2,Unholy,Sam Smith & Kim Petras
3,Bad Habit,Steve Lacy
4,As It Was,Harry Styles
...,...,...
95,The Great War,Taylor Swift
96,Gotta Move On,Diddy & Bryson Tiller
97,Forget Me,Lewis Capaldi
98,"Going, Going, Gone",Luke Combs


### User experience part

In [9]:
song_req=input("Write a song you like: ")
if song_req in list(df["song"]):
    new_rec=choice(list(df.drop(df.index[df['song']==song_req].to_list()[0],axis=0)['song']))
    arti=df.loc[df.index[df['song']==new_rec].to_list()[0],'artist']
    print(f"You might also like '{new_rec}' by {arti}")
else:
    print("lol try again")
    

Write a song you like: f
lol try again


### Now where case doesn't matter

In [10]:
song_req=input("Write a song you like: ")


if song_req.lower() in list(df["song"].apply(lambda x: x.lower())):
    new_rec=choice(list(df.drop(df.index[df['song'].apply(lambda x: x.lower())==song_req.lower()].to_list()[0],axis=0)["song"]))
    arti=df.loc[df.index[df['song'].apply(lambda x: x.lower())==new_rec.lower()].to_list()[0],'artist']
    print(f"You might also like '{new_rec}' by {arti}")
else:
    print("Good you're not a basic bitch. Listen to 'The Watcher' by Dr. Dre to feel happy.")
    
    

Write a song you like: f
Good you're not a basic bitch. Listen to 'The Watcher' by Dr. Dre to feel happy.


In [11]:
df["artist"].value_counts()["Joji"]

1

### Now where we account for multiple songs with same name

In [12]:
def which_artist(name):
    li=list(df[df["song"]==name]["artist"])
    for art in li:
        ans=input(f"Do you mean {name} by {art}? write y for yes, press any other key for no: ")
        if ans.lower() =="y":
            return art
            break
        else:
            if art==li[-1]:
                print("Sorry, we didn't find the song you were looking for.")
                break
            else:
                continue

In [13]:
song_req=input("Write a song you like: ").lower()

if song_req in list(df["song"].apply(lambda x: x.lower())):
    if df["song"].apply(lambda x: x.lower()).value_counts()[song_req]>1:
        print("We found more than one song matching this name")
        which_artist(song_req)
    else:
        new_rec=choice(list(df.drop(df.index[df['song'].apply(lambda x: x.lower())==song_req.lower()].to_list()[0],axis=0)["song"]))
        arti=df.loc[df.index[df['song'].apply(lambda x: x.lower())==new_rec.lower()].to_list()[0],'artist']
        print(f"You might also like '{new_rec}' by {arti}")
else:
    print("Good you're not a basic bitch. Listen to 'The Watcher' by Dr. Dre to feel happy.")
    
    

Write a song you like: f
Good you're not a basic bitch. Listen to 'The Watcher' by Dr. Dre to feel happy.


### With typos

In [21]:
def str_similar(string):
    match=get_close_matches(string,list(df["song"].unique()),1)
    if len(match)==1:
        return match[0]
    else:
        return False

In [20]:
song_req=input("Write a song you like: ").lower()

if str_similar(song_req)!=False:
    song_req=str_similar(song_req).lower()
    if df["song"].apply(lambda x: x.lower()).value_counts()[song_req]>1:
        print("We found more than one song matching this name")
        which_artist(song_req)
    else:
        new_rec=choice(list(df.drop(df.index[df['song'].apply(lambda x: x.lower())==song_req.lower()].to_list()[0],axis=0)["song"]))
        arti=df.loc[df.index[df['song'].apply(lambda x: x.lower())==new_rec.lower()].to_list()[0],'artist']
        print(f"You might also like '{new_rec}' by {arti}")
else:
    print("Good you're not a basic bitch. Listen to 'The Watcher' by Dr. Dre to be happy.")
    
    

Write a song you like: cheese
Good you're not a basic bitch. Listen to 'The Watcher' by Dr. Dre to be happy.


In [227]:
get_close_matches("unspoaple",list(df["song"].unique()),1)

['Unstoppable']