<a href="https://colab.research.google.com/github/SiddharthChavan23/Music_Recommendation/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade scikit-learn


In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
from sklearn.model_selection import GridSearchCV

scaler = MinMaxScaler()

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Spotify Dataset/dataset.csv',index_col=0)


**Song Duplicates Drop**

In [None]:
total_duplicates = len(df[df.duplicated(subset=['artists', 'track_name'], keep=False)])
print("Total number of duplicate songs based on artists and track names:", total_duplicates)

Total number of duplicate songs based on artists and track names: 49157


In [None]:
df = df.drop_duplicates(subset=['artists', 'track_name'], keep='first').reset_index(drop=True)
print("Shape of DataFrame after dropping duplicates:", df.shape)

df['artists'] = df['artists'].str.replace(';', ',')

Shape of DataFrame after dropping duplicates: (81344, 20)


In [None]:
df.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,"Ingrid Michaelson,ZAYN",To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [None]:
spotify_df = df.drop(['artists',	'album_name',	'track_name',	'duration_ms',	'explicit','track_genre'],axis=1)
spotify_df.index = spotify_df['track_id']
spotify_df = spotify_df.drop(['track_id'],axis=1)

In [None]:
param_grid = {
    'n_neighbors': [5, 10, 15, 20],
    'algorithm': ['ball_tree', 'kd_tree', 'brute'],
    'metric' : ['minkowski','euclidean','manhattan']
}

In [None]:
knn = NearestNeighbors()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='neg_mean_squared_error')
mat_songs = csr_matrix(spotify_df.values)
grid_search.fit(mat_songs)
best_params = grid_search.best_params_
print("Best parameters:", best_params)


In [None]:
knn = NearestNeighbors(algorithm='ball_tree', n_neighbors=5,metric="minkowski")
mat_songs = csr_matrix(spotify_df.values)
knn.fit(mat_songs)

In [None]:
def recommend(idx, model, number_of_recommendations=5):
    query = spotify_df.loc[idx].to_numpy().reshape(1,-1)
    print('Searching for recommendations...')
    distances, indices = model.kneighbors(query,n_neighbors = number_of_recommendations)

    for i in indices:
        print(df[['track_name','artists']].loc[i].where(df['track_id']!=idx).dropna())

In [None]:
name = input('Enter song title: ')
print('Search results: ')
print(df[['artists','track_name']].where(df['track_name'] == name).dropna())

ind = int(input('Enter the index value of the required song: '))
idx = df['track_id'].loc[ind]
song = df['track_name'].loc[ind]

artists = df['artists'].loc[ind]

print('Song selected is ', song, 'by', artists)

nor = int(input('Enter number of recommendations: '))

recommend(idx, knn, nor)

Enter song title: Hello
Search results: 
                       artists track_name
9357                     Adele      Hello
21284            Lionel Richie      Hello
24883               Izzy Vadim      Hello
26138  Consoul Trainin;Zaheera      Hello
29401         MR TOUT LE MONDE      Hello
43143                    µ-Ziq      Hello
44148   Swattrex;The Two Locks      Hello
48959            Berry Goodman      Hello
66136            The Baseballs      Hello
Enter the index value of the required song: 9357
Song selected is  Hello by Adele
Enter number of recommendations: 5
Searching for recommendations...
                       track_name                             artists
9390            Chasing Pavements                               Adele
65908                    Warriors                     Imagine Dragons
43679  Rasiya (From "Brahmastra")  Pritam;Shreya Ghoshal;TUSHAR JOSHI
2384                        Black                           Pearl Jam


In [None]:
import pickle
pickle.dump(knn, open('knn_model.pkl', 'wb'))

In [None]:
spotify_df.to_csv('data_with_index.csv', index=True)

In [None]:
df.to_csv('df_good.csv', index=True)