In [9]:
# start coding here!

import pandas as pd
import zipfile

zf = zipfile.ZipFile('archive.zip')
songs = pd.read_csv(zf.open('spotify_millsongdata.csv'))

songs.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [10]:
import re

def clean_title(song):
    return re.sub("[^a-zA-Z0-9]", "", song)

In [11]:
songs["clean_title"] = songs["song"].apply(clean_title) 

songs.head()

Unnamed: 0,artist,song,link,text,clean_title
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...",AhesMyKindOfGirl
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...",AndanteAndante
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,AsGoodAsNew
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,Bang
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,BangABoomerang


In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(songs["clean_title"])

In [13]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(song):
    song = clean_title(song)
    query_vec = vectorizer.transform([song])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5) [-5:]
    results = songs.iloc[indices][::-1]
    return results



In [14]:
import ipywidgets as widgets
from IPython.display import display

song_input = widgets.Text(
    value="Carry_on_Wayward_Son",
    description="Song title:",
    disabled=False
)

song_list = widgets.Output()

def on_type(data):
    with song_list:
        song_list.clear_output()
        song = data["new"]
        if len(song) > 0:
            display(search(song))

song_input.observe(on_type, names="value")

display(song_input, song_list)


Text(value='Carry_on_Wayward_Son', description='Song title:')

Output()