In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
data = pd.read_csv('song-dataset.csv', low_memory=False)

In [3]:
data.head()

Unnamed: 0,Position,Artist Name,Song Name,Days,Top 10 (xTimes),Peak Position,Peak Position (xTimes),Peak Streams,Total Streams
0,1,Post Malone,Sunflower SpiderMan: Into the SpiderVerse,1506,302.0,1,(x29),2118242,883369738
1,2,Juice WRLD,Lucid Dreams,1673,178.0,1,(x20),2127668,864832399
2,3,Lil Uzi Vert,XO TOUR Llif3,1853,212.0,1,(x4),1660502,781153024
3,4,J. Cole,No Role Modelz,2547,6.0,7,0,659366,734857487
4,5,Post Malone,rockstar,1223,186.0,1,(x124),2905678,718865961


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11084 entries, 0 to 11083
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Position                11084 non-null  int64  
 1   Artist Name             11084 non-null  object 
 2   Song Name               11080 non-null  object 
 3   Days                    11084 non-null  int64  
 4   Top 10 (xTimes)         11084 non-null  float64
 5   Peak Position           11084 non-null  int64  
 6   Peak Position (xTimes)  11084 non-null  object 
 7   Peak Streams            11084 non-null  int64  
 8   Total Streams           11084 non-null  int64  
dtypes: float64(1), int64(5), object(3)
memory usage: 779.5+ KB


In [5]:
data.isna().sum()

Position                  0
Artist Name               0
Song Name                 4
Days                      0
Top 10 (xTimes)           0
Peak Position             0
Peak Position (xTimes)    0
Peak Streams              0
Total Streams             0
dtype: int64

In [6]:
data= data.dropna(axis=0)

In [7]:
# Removing duplicates
data = data.drop_duplicates(subset = "Song Name")

In [8]:
# Removing space from "Artist Name" column
data.loc[:, "Artist Name"] = data["Artist Name"].str.replace(" ", "")


In [9]:
# Combine all columns and assgin as new column
data.loc[:, "data"] = data.apply(lambda value: " ".join(value.astype("str")), axis=1)


In [10]:
# models
vectorizer = CountVectorizer()
vectorized = vectorizer.fit_transform(data["data"])
similarities = cosine_similarity(vectorized)

In [11]:
# Assgin the new dataframe with `similarities` values
df_tmp = pd.DataFrame(similarities, columns=data["Song Name"], index=data["Song Name"]).reset_index()

true = True
while true:
    print("The Top 10 Song Recommendation System")
    print("-------------------------------------")
    print("This will generate the 10 songs from the database thoese are similar to the song you entered.")

    # Asking the user for a song, it will loop until the song name is in our database.
    while True:
        input_song = input("Please enter the name of song: ")

        if input_song in df_tmp.columns:
            recommendation = df_tmp.nlargest(11, input_song)["Song Name"]
            break
        
        else:
            print("Sorry, there is no song name in our database. Please try another one.")
    
    print("You should check out these songs: \n")
    for song in recommendation.values[1:]:
        print(song)

    print("\n")
    # Asking the user for the next command, it will loop until the right command.
    while True:
        next_command = input("Do you want to generate again for the next song? [yes, no] ")

        if next_command == "yes":
            break

        elif next_command == "no":
            # `true` will be false. It will stop the whole script
            true = False
            break

        else:
            print("Please type 'yes' or 'no'")

The Top 10 Song Recommendation System
-------------------------------------
This will generate the 10 songs from the database thoese are similar to the song you entered.


Please enter the name of song:  lover


Sorry, there is no song name in our database. Please try another one.


Please enter the name of song:  karma


Sorry, there is no song name in our database. Please try another one.


Please enter the name of song:  rockstar


You should check out these songs: 

Reputation
Euthanasia
Patient
Leave
ABC
Congratulations
Playinwitme
ily
Cooped Up
Over Now




Do you want to generate again for the next song? [yes, no]  no
