In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

In [None]:
data = pd.read_csv('data.csv')
data_genre = pd.read_csv('data_by_genres.csv')

In [None]:
data

In [None]:
data.isnull().sum()

In [None]:
data_genre

In [None]:
data_genre.isnull().sum()

# Clustering the Genres

In [None]:
features = ['acousticness','danceability', 'energy','instrumentalness', 'liveness', 'loudness', 'speechiness', 'valence', 'tempo', 'duration_ms', 'key']


In [None]:
numeric_genre_data = data_genre[features]
numeric_genre_data

In [None]:
# calculating standard scaler vlaues using numerical data in dataset 
from sklearn.preprocessing import StandardScaler

genre_scaler = StandardScaler()
scaled_genre_data = genre_scaler.fit_transform(numeric_genre_data)
scaled_genre_data

In [None]:
# clustering using above scaled data.
from sklearn.cluster import KMeans

genre_cluster = KMeans(n_clusters = 10)
genre_cluster.fit(scaled_genre_data)

In [None]:
# labeling the clusters
genre_cluster_labels = genre_cluster.labels_
# adding those cluster labels to dataset
data_genre['clusters'] = genre_cluster_labels
data_genre

In [None]:
# Reducing the dimension of scaled data using TSNE for graph plotting
from sklearn.manifold import TSNE

genre_reducer = TSNE(n_components = 2)
reduced_genre_data = genre_reducer.fit_transform(scaled_genre_data)

#creating new dataset for the reduced dimension values
new_genre_dataset = pd.DataFrame(columns = ['x-axis', 'y-axis'], data = reduced_genre_data)
new_genre_dataset['genres'] = data_genre['genres']
new_genre_dataset['clusters'] = data_genre['clusters']

In [None]:
new_genre_dataset

In [None]:
# plotting the cluster graph
import plotly.express as px
genre_cluster_graph = px.scatter(new_genre_dataset, x = 'x-axis', y = 'y-axis', color = 'clusters', hover_data = ['x-axis', 'y-axis', 'genres'])
genre_cluster_graph.show()

# Clustering of songs with names

In [None]:
# taking the numeric data

numeric_song_data = data[features]
numeric_song_data

In [None]:
# scaling the data
from sklearn.preprocessing import StandardScaler

song_scaler = StandardScaler()
scaled_song_data = song_scaler.fit_transform(numeric_song_data)

In [None]:
# clustering the scaled data.
from sklearn.cluster import KMeans

song_cluster = KMeans(n_clusters = 20)
song_cluster.fit(scaled_song_data)

In [None]:
# labeling song clusters
song_cluster_labels = song_cluster.labels_

# addinng cluster lables to the datset
data['clusters'] = song_cluster_labels


In [None]:
# reducing the dimensionality for graph plotting.
# as data has linear relation instead of TSNE we use PCA

from sklearn.decomposition import PCA

song_reducer = PCA(n_components = 2)
reduced_song_data = song_reducer.fit_transform(scaled_song_data)

In [None]:
new_song_dataset = pd.DataFrame(columns = ['x-axis', 'y-axis'], data = reduced_song_data)
new_song_dataset['name'] = data['name']
new_song_dataset['clusters'] = data['clusters']

In [None]:
import plotly.express as px

song_graph = px.scatter(new_song_dataset, x = 'x-axis', y = 'y-axis', color = 'clusters', hover_data = ['x-axis', 'y-axis', 'clusters'])
song_graph.show()

# Recommending Songs
     If input is song name

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def GenreBasedRecommender1(song_name):
    # finding the song genre
    song_values = data.loc[data['name'] == song_name, features].iloc[0]
    best_genre_match = None
    best_genre_similarity = -1
    for index,row in data_genre.iterrows():
        genre_values = row[features]
        genre_similarity = cosine_similarity([song_values],[genre_values])[0][0]
        if similarity > best_genre_similarity:
            best_genre_match = row['genres']
            best_genre_similarity = similarity
            
    song_genre = best_genre_match
    # finding similar songs to recommend
    best_songs = {}
    genre_features = data_genre.loc[data_genre['genres'] == song_genre, features].iloc[0]
    best_song_match = None
    best_song_similarity = -1
    
    for index,row in data.iterrows():
        song_features = row[features]
        song_similarity = cosine_similarity([song_features],[genre_features])[0][0]
        if song_similarity > best_song_similarity:
            best_song_match = row['name']
            best_song_similarity = song_similarity
            best_songs[best_song_match] = best_song_similarity
    return best_songs

In [None]:
# Calling the recommending function
best_songs = GenreBasedRecommender1("Clancy Lowered the Boom")
# sorting the best similar songs
sorted_best_songs = dict(sorted(best_songs.items(), key = lambda x: x[1]))
# Printing the recommending songs
best_songs_list = list(sorted_best_songs.keys())
for i in range(len(best_songs_list)):
    if i <= 4 :
        print(best_songs_list[i])
    else:
        break

# Recommending Songs 
    input is genre.

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def GenreBasedRecommender2(genre_name):
    genre_features = data_genre.loc[data_genre['genres'] == genre_name, features].iloc[0]
    best_songs = {}
    best_song_match = None
    best_song_similarity = -1
    for index,row in data.iterrows():
        song_features = row[features]
        similarity = cosine_similarity([song_features], [genre_features])[0][0]
        if similarity > best_song_similarity:
            best_song_match = row['name']
            best_song_similarity = similarity
            best_songs[best_song_match] = best_song_similarity
    return best_songs

In [None]:
# Calling the recommending function
best_songs = GenreBasedRecommender2("a cappella")
# sorting the best similar songs
sorted_best_songs = dict(sorted(best_songs.items(), key = lambda x: x[1]))
# Printing the recommending songs
best_songs_list = list(sorted_best_songs.keys())
for i in range(len(best_songs_list)):
    if i <= 4 :
        print(best_songs_list[i])
    else:
        break