# Data Science Intern @LGMVIP July, 2022

## Beginner Level Task - 03

## Title : Music Recommendation

In [24]:
# Importing the Libraries

import random
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

#### Dataset Link: https://www.kaggle.com/datasets/kanishkbansalajm/recommendationsystemspotify

In [25]:
# Loading the Dataset

df = pd.read_csv('spotify.csv')

df.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [26]:
# Finding the shape of the Dataset

df.shape

(170653, 19)

In [27]:
# Getting all the columns of the Dataset

df.columns

Index(['valence', 'year', 'acousticness', 'artists', 'danceability',
       'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',
       'speechiness', 'tempo'],
      dtype='object')

In [28]:
# Getting the Overall Information of the Dataset

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170653 entries, 0 to 170652
Data columns (total 19 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   valence           170653 non-null  float64
 1   year              170653 non-null  int64  
 2   acousticness      170653 non-null  float64
 3   artists           170653 non-null  object 
 4   danceability      170653 non-null  float64
 5   duration_ms       170653 non-null  int64  
 6   energy            170653 non-null  float64
 7   explicit          170653 non-null  int64  
 8   id                170653 non-null  object 
 9   instrumentalness  170653 non-null  float64
 10  key               170653 non-null  int64  
 11  liveness          170653 non-null  float64
 12  loudness          170653 non-null  float64
 13  mode              170653 non-null  int64  
 14  name              170653 non-null  object 
 15  popularity        170653 non-null  int64  
 16  release_date      17

In [29]:
# Checking for null values in the Dataset

df.isnull().sum()

valence             0
year                0
acousticness        0
artists             0
danceability        0
duration_ms         0
energy              0
explicit            0
id                  0
instrumentalness    0
key                 0
liveness            0
loudness            0
mode                0
name                0
popularity          0
release_date        0
speechiness         0
tempo               0
dtype: int64

In [30]:
# Selecting the Columns for Training the Model

train_data = df[['valence','acousticness','danceability','energy','explicit',
                 'instrumentalness','liveness','loudness','speechiness','tempo']]

train_data.head()

Unnamed: 0,valence,acousticness,danceability,energy,explicit,instrumentalness,liveness,loudness,speechiness,tempo
0,0.0594,0.982,0.279,0.211,0,0.878,0.665,-20.096,0.0366,80.954
1,0.963,0.732,0.819,0.341,0,0.0,0.16,-12.441,0.415,60.936
2,0.0394,0.961,0.328,0.166,0,0.913,0.101,-14.85,0.0339,110.339
3,0.165,0.967,0.275,0.309,0,2.8e-05,0.381,-9.316,0.0354,100.109
4,0.253,0.957,0.418,0.193,0,2e-06,0.229,-10.096,0.038,101.665


In [31]:
# Selecting the Target Column

target = df[['name']]

target.head()

Unnamed: 0,name
0,"Piano Concerto No. 3 in D Minor, Op. 30: III. ..."
1,Clancy Lowered the Boom
2,Gati Bali
3,Danny Boy
4,When Irish Eyes Are Smiling


In [None]:
# Normalizing the values of the Training Columns

normalize = MinMaxScaler()

for i in train_data.columns:
    train_data[i] = normalize.fit_transform(train_data[[i]])

In [33]:
train_data.head()

Unnamed: 0,valence,acousticness,danceability,energy,explicit,instrumentalness,liveness,loudness,speechiness,tempo
0,0.0594,0.985944,0.282389,0.211,0.0,0.878,0.665,0.624916,0.037732,0.33245
1,0.963,0.73494,0.828947,0.341,0.0,0.0,0.16,0.744797,0.427835,0.250243
2,0.0394,0.964859,0.331984,0.166,0.0,0.913,0.101,0.707071,0.034948,0.453125
3,0.165,0.970884,0.27834,0.309,0.0,2.8e-05,0.381,0.793736,0.036495,0.411113
4,0.253,0.960843,0.423077,0.193,0.0,2e-06,0.229,0.781521,0.039175,0.417503


In [None]:
# Applying K-Means Clustering Algorithm to Classify the songs into n-categories
# Here I have taken 10 clusters as there are 10 features to be used to train

cluster = KMeans(n_clusters=10)

train_data['cluster'] = cluster.fit_predict(train_data)

In [35]:
train_data.head()

Unnamed: 0,valence,acousticness,danceability,energy,explicit,instrumentalness,liveness,loudness,speechiness,tempo,cluster
0,0.0594,0.985944,0.282389,0.211,0.0,0.878,0.665,0.624916,0.037732,0.33245,4
1,0.963,0.73494,0.828947,0.341,0.0,0.0,0.16,0.744797,0.427835,0.250243,9
2,0.0394,0.964859,0.331984,0.166,0.0,0.913,0.101,0.707071,0.034948,0.453125,4
3,0.165,0.970884,0.27834,0.309,0.0,2.8e-05,0.381,0.793736,0.036495,0.411113,1
4,0.253,0.960843,0.423077,0.193,0.0,2e-06,0.229,0.781521,0.039175,0.417503,1


In [42]:
# Converting all the Names to Lower Case and storing it to a List

target_music = [i.lower() for i in target['name']]

In [49]:
# Declaring recommend() function to return the indexes of similar songs

def recommend(song, n):
    
    music_recommend = []
    
    try:
        loc_music = target_music.index(song)
        loc_cluster = train_data['cluster'].loc[loc_music]
    
        for i,ele in enumerate(train_data['cluster']):
            if(ele == loc_cluster and i != loc_music):
                music_recommend.append(i)

        if(n <= len(music_recommend)):
            return (random.sample(music_recommend, n))
        else:
            return [music_recommend]
    
    except Exception:
        return('No such Song is present in the List')

In [61]:
# Displaying the n Recommended Songs

song = input("Enter the Name of the Song: ")
n = int(input("Enter the no. of recommendations to show: "))

m = recommend(song.lower(),n)

if(type(m) == list):
    print('\nThe Recommended Songs are: \n')
    for i,ele in enumerate(m):
        print(f"{i+1}. {target.name[ele]}")
else:
    print(m)

Enter the Name of the Song: In My Blood
Enter the no. of recommendations to show: 5

The Recommended Songs are: 

1. Desert Skies
2. Look Away
3. Destiny (feat. Jordan Hill & Billy Porter)
4. The Seashores Of Old Mexico
5. Shelf In The Room
