In [1]:
import requests
import  json
import pandas as pd

In [None]:
# Define the API endpoint
url = "https://api.deezer.com/chart"

# Send a GET request to the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Extract the data in JSON format
    data = response.json()
    
    # Extract song data from the 'tracks' key
    tracks = data['tracks']['data']
    
    # Prepare the data for CSV
    spotify_millsongdata = []
    for track in tracks:
        song_data.append({
            "artist": track['artist']['name'],
            "song": track['song'],
            "link": track['link'],
            "text": track['text']
        })
    
    # Create a DataFrame from the list
    df = pd.DataFrame(spotify_millsongdata)
    
    # Save the DataFrame to a CSV file
    df.to_csv("spotify_millsongdata.csv", index=False)

In [5]:
df = pd.read_csv("spotify_millsongdata.csv")

In [6]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [7]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [8]:
df.shape

(57650, 4)

In [9]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [10]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [11]:
df.head(10)

Unnamed: 0,artist,song,text
0,Yello,Downtown Samba,This one's called \r\nNow for you \r\nDownto...
1,Perry Como,Forever And Ever,"[Chorus] \r\nForever and ever, \r\nMy heart ..."
2,Guided By Voices,Sad If I Lost It,Strikes me as an animal \r\nAnd now gone kiss...
3,Eagles,Twenty-One,Twenty-one and strong as I can be \r\nI know ...
4,Quarterflash,Right Kind Of Love,"Baby, your love it surrounds me but never give..."
5,Michael W. Smith,A Little Stronger Everyday,I was throwin' away what I needed most \r\nIt...
6,George Jones,Family Bible,[C] There's A Fam'ly [F] Bible on the [C] tabl...
7,Smiths,I Want The One I Can't Have,On the day that your mentality \r\nDecides to...
8,Nick Cave,Nick The Stripper,Nick the stripper \r\nHideous to the eye \r\...
9,Tragically Hip,She Didn't Know,"Born a queen up on her side, and she felt it t..."


In [12]:
df['text'][0]

"This one's called  \r\nNow for you  \r\nDowntown samba  \r\n  \r\nCanal Street's where we dance  \r\nWe do the rhumba, samba  \r\nThe city's hot, we're almost naked  \r\nIt's a scandal  \r\nNight's still young  \r\nWe do the samba with the congo warrior  \r\n  \r\nDowntown samba  \r\nDowntown samba  \r\n  \r\nHey you over there  \r\nYou shouldn't dance alone  \r\nWatch me come on closer  \r\nCause I am the one  \r\nTo dance with you  \r\nAll night long  \r\nI'm dancing like a mambo  \r\nGo go go for sambo  \r\nDancing, dancing, dancing  \r\nTango  \r\nOver there, mister  \r\nCome see, you'll see  \r\nSo dance the tango, tango\r\n\r\n"

In [13]:
# df = df.sample(5000)

In [14]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [15]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [16]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [None]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [20]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [21]:
similarity[0]

array([1.        , 0.0037128 , 0.00148866, ..., 0.00363061, 0.00563215,
       0.00813924])

In [22]:
df[df['song'] == 'Crying Over You']

Unnamed: 0,artist,song,text


In [23]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [None]:
recommendation('Crying Over You')

In [None]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))