#Import Modules

In [1]:
import numpy as np
import pandas as pd
import difflib    # to get closet match if user enter wrong data
from sklearn.feature_extraction.text import TfidfVectorizer # used to convert text /string(title,director) into numeric value
from sklearn.metrics.pairwise import cosine_similarity # to find the similarity beteen data by using similarity score


#Loading the dataset

In [2]:
music_data = pd.read_csv('music1.csv')

# printing the first 5 rows of the dataframe
print(music_data.head())

# number of rows and columns in the data frame
print(music_data.shape)

   index                track_id                 artists  \
0      0  5SuOikwiRyPMVoIQDJUgSV             Gen Hoshino   
1      1  4qPNDBW1i3p13qLCt0Ki3A            Ben Woodward   
2      2  1iJBSr7s7jYXzM8EGcbK5b  Ingrid Michaelson;ZAYN   
3      3  6lfxq3CG4xtTiEg7opyCyx            Kina Grannis   
4      4  5vjLSffimiIP26QG5WcN2K        Chord Overstreet   

                                          album_name  \
0                                             Comedy   
1                                   Ghost (Acoustic)   
2                                     To Begin Again   
3  Crazy Rich Asians (Original Motion Picture Sou...   
4                                            Hold On   

                   track_name  popularity  duration_ms  explicit  \
0                      Comedy          73       230666     False   
1            Ghost - Acoustic          55       149610     False   
2              To Begin Again          57       210826     False   
3  Can't Help Falling In Love 

#Data Pre-Processing

In [3]:
# selecting the relevant features for recommendation  # track_name
selected_features = ['track_id','artists','album_name','popularity','duration_ms','danceability','energy','mode','track_genre','liveness']
print(selected_features)

# replacing the null valuess with null string

for feature in selected_features:
  music_data[feature] = music_data[feature].fillna('') # filling null value with empty string


# combining all the 10 selected features
combined_features = music_data['track_id']+' '+music_data['artists']+' '+music_data['album_name']+' '+music_data['track_genre']

# music_data['popularity']+' '+music_data['duration_ms']+' '+music_data['danceability']+' '+music_data['energy']+' '+' '+music_data['mode']++' '+music_data['liveness']
print(combined_features)

#important  converting the text data to feature vectors(numerical value)
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)
print(feature_vectors)


['track_id', 'artists', 'album_name', 'popularity', 'duration_ms', 'danceability', 'energy', 'mode', 'track_genre', 'liveness']
0        5SuOikwiRyPMVoIQDJUgSV Gen Hoshino Comedy acou...
1        4qPNDBW1i3p13qLCt0Ki3A Ben Woodward Ghost (Aco...
2        1iJBSr7s7jYXzM8EGcbK5b Ingrid Michaelson;ZAYN ...
3        6lfxq3CG4xtTiEg7opyCyx Kina Grannis Crazy Rich...
4        5vjLSffimiIP26QG5WcN2K Chord Overstreet Hold O...
                               ...                        
9996     3UFZkGoRoqZD2gp0ayZlM7 Canção & Louvor Profeti...
9997     0VYLCqCYrLo2aZ1mcdVDjH Planet Hemp Usuário brazil
9998     5BtFkPust4wifo9KOThNGI Cidade Negra;Lulu Santo...
9999     3TAxLyCtWcLqPBVE1rtj6k Legião Urbana O Descobr...
10000    2hETkH7cOfqmz3LqZDHZf5 Cesária Evora Miss Perf...
Length: 10001, dtype: object
  (0, 9467)	0.1827100723394914
  (0, 11109)	0.5285692759555124
  (0, 13262)	0.45157838217602303
  (0, 12715)	0.45157838217602303
  (0, 6938)	0.5285692759555124
  (1, 12746)	0.4289901516701319
  

#Finding the similarity scores using cosine similarity 

In [4]:
#important getting the similarity scores using cosine similarity is distance metric use to measure how much two string/valuse match
similarity = cosine_similarity(feature_vectors)
print(similarity) # compare itself to every other
print(similarity.shape)

[[1.         0.069337   0.0280771  ... 0.         0.         0.        ]
 [0.069337   1.         0.05831662 ... 0.         0.         0.        ]
 [0.0280771  0.05831662 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.02173009 0.        ]
 [0.         0.         0.         ... 0.02173009 1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]
(10001, 10001)


##Music Recommendation Sytem

In [7]:
music_name = input(' Enter your favourite music name : ')
#Enter your favourite music name : Can't Help Falling In Love

# creating a list with all the music names given in the dataset
list_of_all_titles = music_data['track_name'].tolist()
# print(list_of_all_titles)

#finding the close match for the music name given by the user
find_close_match = difflib.get_close_matches(music_name, list_of_all_titles)
# print(find_close_match) 

close_match = find_close_match[0]
# print(close_match) # Can't Help Falling In Love


# finding the index of the music with title
index_of_the_music = music_data[music_data.track_name == close_match]['index'].values[0]
# print(index_of_the_music) # this done because we need to find similarity with index no.

# getting a list of similar music track
similarity_score = list(enumerate(similarity[index_of_the_music])) # enumerate - loop
# print(similarity_score)# (index,similarity score)
# len(similarity_score)

# sorting the music based on their similarity score
sorted_similar_musics = sorted(similarity_score, key = lambda x:x[1], reverse = True) # sorting by 2nd value
# print(sorted_similar_musics)

# print the name of similar musics based on the index

if(len(sorted_similar_musics)!=0):
  print('\n Music suggested for you : \n')

  i = 1
  for music in sorted_similar_musics:
    index = music[0]
    title_from_index = music_data[music_data.index==index]['track_name'].values[0]
    artist_from_index = music_data[music_data.index==index]['artists'].values[0]
    album_from_index = music_data[music_data.index==index]['album_name'].values[0]
    if (i<30):
      print(i, '.',title_from_index,'-',artist_from_index,'-',album_from_index)
      i+=1
else:
  print("No music found ,try again")      


 Enter your favourite music name : love me like you do

 Music suggested for you : 

1 . Love Me Like You Do - Boyce Avenue - Cover Sessions, Vol. 4
2 . Can’t Help Falling in Love - Boyce Avenue - Cover Sessions, Vol. 6
3 . In Case You Didn’t Know - Boyce Avenue - Cover Sessions, Vol. 4
4 . Beautiful Soul - Boyce Avenue - Cover Sessions, Vol. 4
5 . Thinking out Loud - Boyce Avenue - Cover Sessions, Vol. 3
6 . Someone You Loved - Boyce Avenue - Cover Sessions, Vol. 6
7 . A Thousand Years - Boyce Avenue - Cover Sessions, Vol. 3
8 . Hey There Delilah - Boyce Avenue - Cover Sessions, Vol. 6
9 . Chasing Cars - Boyce Avenue - Cover Sessions, Vol. 5
10 . Hallelujah - Boyce Avenue - Cover Sessions, Vol. 6
11 . Just the Way You Are - Boyce Avenue - Cover Sessions, Vol. 2
12 . Rise - Boyce Avenue - Cover Sessions, Vol. 4
13 . I'll Be There For You (Friends Theme) - Boyce Avenue - Cover Sessions, Vol. 3
14 . Someone Like You - Boyce Avenue - Cover Sessions, Vol. 2
15 . Scared to Be Lonely - Boyce