In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [8]:
df.head(10)

Unnamed: 0,artist,song,text
0,Hillsong,Oh You Bring,Oh you bring hope to the hopeless \r\nAnd lig...
1,Avril Lavigne,Alice,Trippin' out \r\nSpinnin' around \r\nI'm und...
2,Red Hot Chili Peppers,Funny Face,It must have been your funny face \r\nIt must...
3,Culture Club,I Just Wanna Be Loved,Take a picture of tonight and \r\nKeep it by ...
4,Omd,Walking On Air,The mood I'm in \r\nI can't explain \r\nIs b...
5,Backstreet Boys,Best That I Can,Some say that love isn't fair \r\nBut they do...
6,Boney M.,Still I Am Sad,See the stars come joining down from the sky ...
7,You Am I,...And Vandalism,I know this guy \r\nIf I'm the mayonnaise he'...
8,Squeeze,Striking Matches,Striking matches and I'm smoking cigarettes \...
9,Barbra Streisand,I'm The Greatest Star,I got 36 expressions! \r\nSweet as pie to tou...


In [9]:
df['text'][0]

"Oh you bring hope to the hopeless  \r\nAnd light to those in the darkness  \r\nAnd death to life, now I'm alive  \r\n  \r\nOh you give peace to the restless  \r\nAnd joy to homes that are broken  \r\nI see you now, in you I'm found  \r\n  \r\nAnd you open the door for me  \r\nAnd you lay down your life to set me free  \r\nAll that I am will serve you lord  \r\n  \r\nOh you fill those who are empty  \r\nAnd rescue those in the valley  \r\nAnd through it all you calm my soul  \r\n  \r\nOh now you find me in my weakness  \r\nAnd heal the wounds of my heartache  \r\nAnd worship for you in spirit and truth  \r\n  \r\nAnd you open the door for me  \r\nAnd you lay down your life to set me free  \r\nAll that I am will serve you lord  \r\n  \r\nAnd you open my eyes to see  \r\nAll the wondering all of christ in me  \r\nJesus you're everything I need  \r\n  \r\n(Worship)  \r\n  \r\nAll honor  \r\nAll glory  \r\nAll praise to you (repeat)  \r\n  \r\nAnd you open the door for me  \r\nAnd you lay 

In [10]:
df.shape

(5000, 3)

In [11]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [17]:
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')

def tokenization(txt):
    tokens = tokenizer.tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)



In [18]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

# Limit the number of features in TF-IDF
tfidvector = TfidfVectorizer(
    analyzer='word',
    stop_words='english',
    max_features=3000  # Adjust as needed
)
matrix = tfidvector.fit_transform(df['text'])

# Reduce dimensions with SVD
svd = TruncatedSVD(
    n_components=100,   # Adjust as needed
    algorithm='randomized',
    n_iter=7,
    random_state=42
)
reduced_matrix = svd.fit_transform(matrix)

# Compute cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(reduced_matrix)


In [27]:
similarity[0]

array([ 1.        ,  0.06369799,  0.17072355, ..., -0.00897891,
        0.25393836,  0.17909241], shape=(5000,))

In [28]:
df[df['song'] == 'Crying Over You']

Unnamed: 0,artist,song,text


In [31]:
def recommendation(song_df):
    if song_df not in df['song'].values:
        print(f"Song '{song_df}' not found in the dataset.")
        return []

    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x: x[1])

    songs = []
    for i in distances[1:11]:  # Assuming you want the top 10 recommendations
        songs.append(df.iloc[i[0]]['song'])
    return songs


In [32]:
recommendation('Crying Over You')

Song 'Crying Over You' not found in the dataset.


[]

In [33]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))