Importing the dependencies

In [35]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Pre-Processing

In [36]:
# loading the data from the csv file to apandas dataframe
music_data = pd.read_csv('/content/music.csv')

In [37]:
# printing the first 5 rows of the dataframe
music_data.head()

Unnamed: 0,age,gender,genre,title,singer,vote,index
0,20,1,HipHop,fight the power,A,6.0,0
1,23,1,HipHop,passing by me,A,6.3,1
2,25,1,HipHop,grindin,A,9.2,2
3,26,1,Jazz,at last,C,6.4,3
4,29,1,Jazz,take five,C,9.3,4


In [38]:
# number of rows and columns in the data frame

music_data.shape

(18, 7)

In [39]:
# selecting the relevant features for recommendation

selected_features = ['genre','vote','singer']
print(selected_features)

['genre', 'vote', 'singer']


In [40]:
# replacing the null valuess with null string

for feature in selected_features:
  music_data[feature]=music_data[feature].fillna('')

In [41]:
# combining all the 5 selected features

combined_features = music_data['genre']+' '+music_data['singer']

In [42]:
print(combined_features)

0        HipHop A
1        HipHop A
2        HipHop A
3          Jazz C
4          Jazz C
5          Jazz C
6     Classical B
7     Classical B
8     Classical B
9         Dance D
10        Dance D
11        Dance D
12     Acoustic E
13     Acoustic E
14     Acoustic E
15    Classical F
16    Classical F
17    Classical F
dtype: object


In [43]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [44]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [45]:
print(feature_vectors)

  (0, 3)	1.0
  (1, 3)	1.0
  (2, 3)	1.0
  (3, 4)	1.0
  (4, 4)	1.0
  (5, 4)	1.0
  (6, 1)	1.0
  (7, 1)	1.0
  (8, 1)	1.0
  (9, 2)	1.0
  (10, 2)	1.0
  (11, 2)	1.0
  (12, 0)	1.0
  (13, 0)	1.0
  (14, 0)	1.0
  (15, 1)	1.0
  (16, 1)	1.0
  (17, 1)	1.0


Cosine Similarity

In [46]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [47]:
print(similarity)

[[1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 1. 1. 1. 0.

In [48]:
print(similarity.shape)

(18, 18)


Getting the movie name from the user

In [49]:
# getting the music name from the user

music_name = input(' Enter your favourite music name : ')

 Enter your favourite music name : mozart


In [51]:
# creating a list with all the music names given in the dataset

list_of_all_titles = music_data['title'].tolist()
print(list_of_all_titles)

['fight the power', 'passing by me', 'grindin', 'at last', 'take five', 'strange fruit', 'mozart', 'beethoven', 'puccini', 'no lie', 'blinding lights', 'just dance', 'i got you', 'XO', 'lucky', 'vivaldi', 'bizet', 'ravel']


In [52]:
# finding the close match for the music name given by the user

find_close_match = difflib.get_close_matches(music_name, list_of_all_titles)
print(find_close_match)

['mozart']


In [53]:
close_match = find_close_match[0]
print(close_match)

mozart


In [54]:
# finding the index of the music with title

index_of_the_music = music_data[music_data.title == close_match]['index'].values[0]
print(index_of_the_music)

6


In [55]:
# getting a list of similar music

similarity_score = list(enumerate(similarity[index_of_the_music]))
print(similarity_score)

[(0, 0.0), (1, 0.0), (2, 0.0), (3, 0.0), (4, 0.0), (5, 0.0), (6, 1.0), (7, 1.0), (8, 1.0), (9, 0.0), (10, 0.0), (11, 0.0), (12, 0.0), (13, 0.0), (14, 0.0), (15, 1.0), (16, 1.0), (17, 1.0)]


In [56]:
len(similarity_score)

18

In [65]:
# sorting the music based on their similarity score

sorted_similar_music = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_music)

[(6, 1.0), (7, 1.0), (8, 1.0), (15, 1.0), (16, 1.0), (17, 1.0), (0, 0.0), (1, 0.0), (2, 0.0), (3, 0.0), (4, 0.0), (5, 0.0), (9, 0.0), (10, 0.0), (11, 0.0), (12, 0.0), (13, 0.0), (14, 0.0)]


In [66]:
# print the name of similar music based on the index

print('Movies suggested for you : \n')

i = 1

for music in sorted_similar_music:
  index = music[0]
  title_from_index = music_data[music_data.index==index]['title'].values[0]
  if (i<5):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . mozart
2 . beethoven
3 . puccini
4 . vivaldi


In [73]:
music_name = input(' Enter your favourite music name : ')

list_of_all_titles = music_data['title'].tolist()

find_close_match = difflib.get_close_matches(music_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_music = music_data[music_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_music]))

sorted_similar_music = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Music suggested for you : \n')

i = 1

for music in sorted_similar_music:
  index = music[0]
  title_from_index = music_data[music_data.index==index]['title'].values[0]
  if (i<5):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite music name : mozart
Music suggested for you : 

1 . mozart
2 . beethoven
3 . puccini
4 . vivaldi
