<a href="https://colab.research.google.com/github/Annapoornadwivedi/Machine-Learning-30-days-of-code/blob/main/day21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Music recommendation system

In [None]:
import numpy as np
import pandas as pd

## load dataset

In [None]:
df = pd.read_csv('/content/songdata.csv')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [None]:
df.shape

(57650, 4)

In [None]:
df = df.sample(n=5000).drop('link', axis=1).reset_index(drop=True)
df.shape

(5000, 3)

In [None]:
df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ', regex=True)

In [None]:
df['text'][0]

"walk away when you're angry   brace yourself, there's nothing to gain   old enough to know the outcome   more blood, it's always the same      [chorus]   aren't we cool and aren't we calm?   for facing death, we run it on   aren't we cool and aren't we calm?   the god reserved a false alarm   aren't we cool and arent we calm?   for facing death, we run it on      do what they say   or they take it away   i'd rather be dead than   carry on      wait   but you live, dont be cursing   you mustn't take gods name in vain   wait   put your knife forward the curtain   impulsives, you must refrain      [chorus]      do what they say   or they take it away   i'd rather be dead than   carry on      so we carry on   with this burden   i can't remember anything   i can't be certain      aren't we cool and aren't we calm?   for facing death, we run it on   do what they say   or they take it away   i'd rather be dead than   carry on  "

In [None]:
import nltk
# Download the 'punkt' resource
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:

from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [None]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [None]:
similarity[0]

array([1.        , 0.01627356, 0.03352344, ..., 0.0229197 , 0.01571545,
       0.01460171])

In [None]:
empty_songs = df[df['song'] == '']
if not empty_songs.empty:
    index_of_first_empty_song = empty_songs.index[0]
    print(index_of_first_empty_song)
else:
    print("No songs are empty strings.")

No songs are empty strings.


## recommendation system

In [None]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])

    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)

    return songs

In [None]:
def recommendation(song_df, df, similarity): # Added df and similarity as arguments
    # Convert song_df and 'song' column to lowercase for case-insensitive matching
    song_df_lower = song_df.lower()
    df['song_lower'] = df['song'].str.lower()

    idx = df[df['song_lower'] == song_df_lower].index
    if idx.empty:
        print(f"Song '{song_df}' not found in the dataset.")
        return []  # Return an empty list if song not found

    idx = idx[0] # Extract the index value after checking if it's not empty
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])

    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)

    return songs

In [None]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))