# Environment configuration

In [21]:
# Upgrading pip which will be used to install all libraries
!pip install -r requirements.txt
# !pip install pymongo



In [2]:
# to check if working on GPU
# !pip install DeepSaki
from tensorflow import keras
import DeepSaki
strategy, RUNTIME_ENVIRONMENT, hw_accelerator_handle = DeepSaki.utils.DetectHw()

Running on single GPU  /device:GPU:0
Number of accelerators:  1
____________________________________________________________________________________
Device List: 
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9027242169430391422
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1721342363
locality {
  bus_id: 1
  links {
  }
}
incarnation: 17701183863651686981
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


# Content-based filtering using TF-IDF score

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

In [5]:

from typing import List
# from ../models/msd_song import MsdSongWithLyrics
from models.msd_song import MsdSongWithLyrics
from dao.dao_msd_songs_with_lyrics import DAOMsdSongsWithLyrics

dao_songs_with_lyrics: DAOMsdSongsWithLyrics = DAOMsdSongsWithLyrics()
songs: List[MsdSongWithLyrics] = dao_songs_with_lyrics.find_many_by_query({'lyrics': {'$ne':None}})

In [6]:
headers = songs[0].dict().keys()
songs_values = [song.dict().values() for song in songs]
songs_df = pd.DataFrame(songs_values, columns=headers)
songs_df.head()

Unnamed: 0,song_id,title,release,artist_name,year,tag,features,lyrics
0,SOLJTLX12AB01890ED,El hijo del pueblo,32 Grandes Éxitos CD 2,Jorge Negrete,1997,pop,[],Es mi orgullo haber nacido en el barrio más hu...
1,SOMPVQB12A8C1379BB,Pilots,The Loyal,Tiger Lou,2005,pop,[],Raise the chandelier light the candels dear i ...
2,SOSDCFG12AB0184647,006,Lena 20 År,Lena Philipsson,1998,pop,[],I had come in the name of love\nWith a mission...
3,SOKOVRQ12A8C142811,Ethos of Coercion,Descend Into Depravity,Dying Fetus,2009,rock,[],"Castigation of the offenders, no punishment ou..."
4,SOIMMJJ12AF72AD643,Rock-N-Rule,I'm Only A Man (Bonus Track Version),Emery,2007,rock,[],[Intro]\nThis is a waking up\nThis is your fin...


In [7]:
!pip install scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer



In [8]:
songs_df['lyrics'] = songs_df['lyrics'].str.replace(r'\n', ' ')

  songs_df['lyrics'] = songs_df['lyrics'].str.replace(r'\n', ' ')


In [9]:
songs_df['lyrics']

0         Es mi orgullo haber nacido en el barrio más hu...
1         Raise the chandelier light the candels dear i ...
2         I had come in the name of love With a mission ...
3         Castigation of the offenders, no punishment ou...
4         [Intro] This is a waking up This is your final...
                                ...                        
181329    Yesterday I went outside And all my grass had ...
181330    Big girls like telling boys that shove "fact i...
181331    [Verse 1] Think of me as your soldier The man ...
181332    Interrotte speranze, eterna fede Fiamme e stra...
181333    Where do you go with all of those scars They r...
Name: lyrics, Length: 181334, dtype: object

In [11]:
!pip install --user -U nltk
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
final_stopwords_list = stopwords.words('english') + stopwords.words('french') + stopwords.words('spanish') + stopwords.words('swedish')
tfidf = TfidfVectorizer(analyzer='word', stop_words=final_stopwords_list)



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Ewakae\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [12]:
songs_df = songs_df[:20000]
tfidf_matrix = tfidf.fit_transform(songs_df['lyrics'])


In [13]:
tfidf_matrix

<20000x106683 sparse matrix of type '<class 'numpy.float64'>'
	with 1170789 stored elements in Compressed Sparse Row format>

In [14]:
tfidf_matrix.shape

(20000, 106683)

## Content-based recommendation based on sigmoid kernel

In [15]:
from sklearn.metrics.pairwise import sigmoid_kernel

songs_sig = sigmoid_kernel(tfidf_matrix, tfidf_matrix)

In [16]:
songs_sig

array([[0.76159809, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159432],
       [0.76159416, 0.76159809, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159416],
       [0.76159416, 0.76159416, 0.76159809, ..., 0.7615942 , 0.76159416,
        0.76159416],
       ...,
       [0.76159416, 0.76159416, 0.7615942 , ..., 0.76159809, 0.76159416,
        0.76159416],
       [0.76159416, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159809,
        0.76159416],
       [0.76159432, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159809]])

In [17]:
songs_sig[0]

array([0.76159809, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159416,
       0.76159432])

In [18]:
indices = pd.Series(songs_df.index, index=songs_df['title'])

In [82]:
indices

title
El hijo del pueblo        0
Pilots                    1
006                       2
Ethos of Coercion         3
Rock-N-Rule               4
                      ...  
Tyttö metsässä        19995
Inis Mona             19996
I Can Tell            19997
The April Fools       19998
Daría                 19999
Length: 20000, dtype: int64

In [84]:
idx = indices['Before He Kissed Me']

In [85]:
idx

7

In [88]:
sig_scores = list(enumerate(songs_sig[idx]))
sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
sig_scores = sig_scores[1:11]
sig_scores

[(17140, 0.7615980925843139),
 (6679, 0.7615946934019091),
 (16945, 0.7615946934019091),
 (18372, 0.7615946753420102),
 (5809, 0.7615946434644573),
 (12405, 0.761594615341704),
 (12947, 0.7615945966095496),
 (19732, 0.7615945928406871),
 (12072, 0.7615945801236044),
 (4867, 0.7615945403184269)]

In [91]:
for song_score in sig_scores:
    print(songs_df['title'].iloc[song_score[0]], songs_df['artist_name'].iloc[song_score[0]])

Waterlogged Broken Hope
Gone Kissin Lunachicks
Gone Kissin Lunachicks
Flowers Grow Out of My Grave Dead Man's Bones
When Pain Comes To Surface Skinlab
Present Arrived Tom Verlaine
Peace Senser
Beautiful Mind The Verve
Envelopes Another Day Ariel Pink's Haunted Graffiti
Drifting Texas Sand Webb Pierce


## Content-based recommendation based on cosine similarity

In [117]:
from sklearn.metrics.pairwise import cosine_similarity

In [118]:
cosine_song_similarity = cosine_similarity( ';')

In [119]:
cosine_song_similarity

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.04216187],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.01048144, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.01048144, ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.04216187, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [120]:
len(cosine_song_similarity)

20000

In [121]:
indices = pd.Series(songs_df.index, index=songs_df['title'])

In [122]:
idx = indices['Before He Kissed Me']
idx

7

In [123]:
type(cosine_song_similarity[idx])

numpy.ndarray

In [124]:
cosine_song_similarity[idx]

array([0.        , 0.00606898, 0.00161058, ..., 0.        , 0.        ,
       0.        ])

In [126]:
import numpy as np

In [131]:
results = cosine_song_similarity[idx].argsort()[:-50:-1]

In [140]:
np.sort(cosine_song_similarity[idx])[::-1]

array([1.        , 1.        , 0.13652363, ..., 0.        , 0.        ,
       0.        ])

In [133]:
type(results)

numpy.ndarray

In [135]:
for result in results:
    print(songs_df['title'].iloc[result], songs_df['artist_name'].iloc[result])

Before He Kissed Me Lisa Brokop
Waterlogged Broken Hope
Gone Kissin Lunachicks
Gone Kissin Lunachicks
Flowers Grow Out of My Grave Dead Man's Bones
When Pain Comes To Surface Skinlab
Present Arrived Tom Verlaine
Peace Senser
Beautiful Mind The Verve
Envelopes Another Day Ariel Pink's Haunted Graffiti
Drifting Texas Sand Webb Pierce
The Abandoned Ava Inferi
Lay Some Flowers On My Grave Blind Willie McTell
Hold Fast Call To Preserve
Fish Mr. Scruff
Come Death Blood Red Throne
Home Again Beach House
Mouth Machine Gun Our Last Night
Fallen Angel Seelenkrank
Settling Down Jerry Cantrell
Gerontion A Silent Film
The Grove Chuck Ragan
Lucky Lips Gale Storm
Crushed Eighteen Visions
Upon Raging Waves Mithotyn
Deep Dark Side Cowboys
Under My Skin Paffendorf
Your Place In The World The Space Brothers
This Would Be Paradise Melissa Auf der Maur
Soft Lips Hank Thompson
Lay Your Body Down Divinyls
The Morning After Tankard
Bukkake Tsunami Cattle Decapitation
Not Now Coffin Break
Devotion Luscious Jac