In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.manifold import TSNE

In [2]:
tracks = pd.read_csv("playlist_features.csv")
tracks.head(3)

Unnamed: 0.1,Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature
0,0,"What Jhumka ? (From ""Rocky Aur Rani Kii Prem K...","What Jhumka ? (From ""Rocky Aur Rani Kii Prem K...",Various Artists,2023-07-12,213611,64,0.235,0.821,0.823,0.0,0.207,-5.455,0.1,107.953,4
1,1,"Zinda Banda (From ""Jawan"")","Zinda Banda (From ""Jawan"")",Anirudh Ravichander,2023-07-31,264000,72,0.00785,0.613,0.881,3e-06,0.155,-5.974,0.249,149.978,4
2,2,Mahiye Jinna Sohna,Mahiye Jinna Sohna,Darshan Raval,2023-06-22,181250,86,0.736,0.567,0.54,3.2e-05,0.164,-5.754,0.0406,92.027,4


In [3]:
tracks.isnull().sum()

Unnamed: 0        0
Name              0
Album             0
Artist            0
Release_Date      0
Length            0
Popularity        0
Acousticness      0
Danceability      0
Energy            0
Instrumentness    0
Liveness          0
Loudness          0
Speechness        0
Tempo             0
Time_Signature    0
dtype: int64

In [29]:
import re

# Function to remove bracketed data from a string
def remove_brackets(text):
    return re.sub(r'\([^)]*\)', '', text)

# Apply the function to the song_link column
tracks['Name'] = tracks['Name'].apply(remove_brackets)


In [31]:
tracks["Name"] = tracks["Name"].str.lower()

In [32]:
song_vectorizer = CountVectorizer()
song_vectorizer.fit(tracks["Name"])

# Sorting by Popularity
tracks = tracks.sort_values(by = ["Popularity"], ascending = False)
tracks.head()

Unnamed: 0.1,Unnamed: 0,Name,Album,Artist,Release_Date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentness,Liveness,Loudness,Speechness,Tempo,Time_Signature,similarity_factor
2,2,mahiyejinnasohna,Mahiye Jinna Sohna,Darshan Raval,2023-06-22,181250,86,0.736,0.567,0.54,3.2e-05,0.164,-5.754,0.0406,92.027,4,1.0
19,19,maanmerijaan,Champagne Talk,King,2022-10-12,194653,85,0.354,0.698,0.505,0.0,0.0995,-8.242,0.0356,95.868,4,1.0
6,6,malangsajna,Malang Sajna,Sachet Tandon,2022-12-19,161041,84,0.232,0.739,0.716,0.0,0.33,-4.964,0.037,113.943,4,1.0
15,15,obedardeya,"O Bedardeya (From ""Tu Jhoothi Main Makkaar"")",Pritam,2023-03-04,313051,83,0.794,0.582,0.418,0.0,0.109,-8.701,0.0288,112.909,4,1.0
21,21,terepyaarmein,"Tere Pyaar Mein (From ""Tu Jhoothi Main Makkaar"")",Pritam,2023-02-01,266094,82,0.111,0.679,0.748,0.0,0.183,-5.487,0.054,118.945,4,1.0


In [33]:
def get_similarities(song_name, data):

    text_array1 = song_vectorizer.transform(data[data["Name"] == song_name]["Artist"]).toarray()
    num_array1 = data[data["Name"] == song_name].select_dtypes(include = np.number).to_numpy()

    sim = []
    for idx, row in data.iterrows():
        name = row["Name"]

        text_array2 = song_vectorizer.transform(data[data["Name"] == name]["Artist"]).toarray()
        num_array2 = data[data["Name"] == name].select_dtypes(include = np.number).to_numpy()

        text_sim = cosine_similarity(text_array1, text_array2)[0][0]
        num_sim = cosine_similarity(num_array1,num_array2)[0][0]
        sim.append(text_sim + num_sim)

    return sim

In [34]:
def recommend_songs(song_name, data = tracks):

    if tracks[tracks["Name"] == song_name].shape[0] == 0:
        print ("This song is either not so poplular have entered an invalid name not contained in this playlist")
        
        for song in data.sample(n=7)["Name"].values:
            print(song)

        return
    
    data["similarity_factor"] = get_similarities(song_name, data)
    data.sort_values(by = ["similarity_factor", "Popularity"],
                     ascending = [False,False], inplace = True)
    
    display(data[["Name", "Artist","Album", "Popularity"]][1:11])

In [35]:
songname = input("Please enter the song name: ")

Please enter the song name: kesariya


In [40]:
recommend_songs (songname)

Unnamed: 0,Name,Artist
36,ranjha,Various Artists
40,raataanlambiyan,Tanishk Bagchi
22,desperado,Raghav
47,tuaakedekhle,King
15,obedardeya,Pritam
21,terepyaarmein,Pritam


In [None]:
print(recommend_songs)