# Recommender System

## Importing Libraries and DataSet

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('data.csv')

## PreProcessing

In [3]:
import spacy

nlp = spacy.load("en_core_web_sm")

def preprocess_text(text):
    doc = nlp(text.lower())
    lemmatized_text = " ".join([token.lemma_ for token in doc if not token.is_stop and token.is_alpha])
    print("lemmatized_text: ", lemmatized_text)
    return lemmatized_text

## Create TF-IDF matrix

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(preprocessor=preprocess_text)

corpus = [
    f"{title} {singer} {date} {loudness} {album}" 
    for title, singer, date, loudness, album in zip(data['Title'], data['Singer'], data['Date'], data['Loudness'], data['Album'])
]

tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)

lemmatized_text:  justified ancient stand jam klf song collection
lemmatized_text:  know want calle ocho pitbull pitbull star rebelution
lemmatized_text:  broken heart britney spear baby time digital deluxe version
lemmatized_text:  apeman remastered version kink lola powerman moneygoround pt percy super deluxe
lemmatized_text:  want rolling stone let bleed
lemmatized_text:  stop remaster fleetwood mac rumour
lemmatized_text:  eastside halsey khalid benny blanco halsey khalid eastside halsey khalid
lemmatized_text:  way look tonight edit version elton john candle wind
lemmatized_text:  juke box hero foreigner expand
lemmatized_text:  mercy shawn mend illuminate deluxe
lemmatized_text:  like run jason nevins great hit
lemmatized_text:  door away sun
lemmatized_text:  listen band single version monkee good monkee
lemmatized_text:  little luck remastere wing london town
lemmatized_text:  sing ed sheeran x deluxe edition
lemmatized_text:  mississippi pussycat
lemmatized_text:  flava nathan

## Retreive Similar Songs

In [5]:
from sklearn.metrics.pairwise import cosine_similarity

def find_similar_songs(target_song, top_n=5):
    target_song = preprocess_text(target_song)
    target_song_tfidf = tfidf_vectorizer.transform([target_song])
    print("target_song_tfidf: ", target_song_tfidf)

    similarities = cosine_similarity(target_song_tfidf, tfidf_matrix)[0]
    similar_indices = similarities.argsort()[-top_n:][::-1]

    similar_songs = data.iloc[similar_indices]
    return similar_songs

## Recommending

In [12]:
target_song = "songs like Mercy by Shawn Mendes"  # Replace with your input
similar_songs = find_similar_songs(target_song)

print("Recommended songs:\n")
for idx, row in similar_songs.iterrows():
    print("'", row['Title'], "' by '", row['Singer'], "'")

lemmatized_text:  song like mercy shawn mende
lemmatized_text:  song like mercy shawn mende
  (0, 6732)	0.35163094838403786
  (0, 6485)	0.46791069713283817
  (0, 4706)	0.5300261831190879
  (0, 4698)	0.5086480006894225
  (0, 4226)	0.34316863529475966
Recommended songs:

' If I Can't Have You ' by ' Shawn Mendes '
' Nervous ' by ' Shawn Mendes '
' Mercy ' by ' Shawn Mendes '
' Lover (Remix) [feat. Shawn Mendes] ' by ' Taylor Swift, Shawn Mendes '
' Señorita ' by ' Shawn Mendes, Camila Cabello '
