Importing required python libraries



In [29]:
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
from nltk.tokenize import TreebankWordTokenizer
from nltk.stem.porter import *
import numpy
from math import log,sqrt
import pandas as pd
import timeit

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Defining methods for preprocessing

In [30]:
def tokenize_sentence(sentence):    # Tokenize a string and remove punctuations
#arguments : string
  sentence=sentence.lower()
  tokenizer=TreebankWordTokenizer()
  tokens_list_with_punct = tokenizer.tokenize(sentence.lower())
  tokens_list_without_punct=[]
  for x in tokens_list_with_punct:
   if x.isalpha():
    tokens_list_without_punct.append(x)
  return tokens_list_without_punct

def remove_stopwords(words):        # Remove english stop words from a string
#arguments : List of strings
 stopwords_list=stopwords.words('english')
 filtered_words=[]
 for x in words:
  if x not in stopwords_list:
   filtered_words.append(x)
 return filtered_words

def stem_words(words_list):         # Stem words in a string
#arguments : List of strings
  ps=PorterStemmer()
  stemmed_words=[]
  for x in range(len(words_list)):
   stemmed_words.append(ps.stem(words_list[x]))
  return stemmed_words

def preprocess_sentence(sentence):  # Method to call above defined preprocessing tasks on a string
#arguments : string
  return stem_words(remove_stopwords(tokenize_sentence(sentence)))

Vector space model defined by class " IR_model "

In [31]:
class IR_model():
 def __init__(self):
# constructor method
  self.__word_dict={}      #dictionary of all words in corpus.Each unique word is mapped to an index
  self.__word_dict_size=0  #size of word_dict
  self.__doc_list_size=0   #number of docs in corpus
  self.__score_matrix=None #2D matrix to store tf-idf score
 
 def __addDocument(self,document): #adds document to the matrix and store tf in cells
 # arguments : corpus as a list of tuples ; No return type. 
  for word in document[1]:
   self.__score_matrix[self.__doc_list_size][self.__word_dict[word]]+=1
  self.__doc_list_size+=1

 def build_Vector_Space(self,documents): # computes values for cells in vector space
 #arguments : corpus as a list of tuples ; No return type. 
  for document in documents:             # assign index to unique words
   for word in document[1]:
    if word not in self.__word_dict:
     self.__word_dict[word]=self.__word_dict_size
     self.__word_dict_size+=1

  self.__score_matrix=numpy.zeros((len(documents),self.__word_dict_size))
		
  for document in documents:              # assign tf value to cells
   self.__addDocument(document)

  idf=numpy.zeros((self.__word_dict_size))
  df=numpy.zeros((self.__word_dict_size))

  for word in self.__word_dict:            # calculate df of all words in corpus
   x=self.__word_dict[word]
   for i in range(len(self.__score_matrix)):
    if self.__score_matrix[i][x]!=0:
     df[x]+=1

  for i in range(self.__word_dict_size):   # calculate idf of all words in corpus
   idf[i]=log(len(self.__score_matrix)/df[i])

  for i in range(len(self.__score_matrix)):# fill cells with tf-idf score
   for j in range(self.__word_dict_size):
    if self.__score_matrix[i][j]!=0:
     self.__score_matrix[i][j]=(1+log(self.__score_matrix[i][j]))*(idf[j])
		
 def Search(self,query,documents): # Finds the tf_idf score of query for all docs in corpus and returns it
#arguments : user query as list of terms, corpus as list of tuples
#Return type : list of tuples
  query_df=numpy.zeros((self.__word_dict_size))
  query_idf=numpy.zeros((self.__word_dict_size))
  for word in query:
   if word in self.__word_dict:               # for each word in query, find df and idf
    if query_df[self.__word_dict[word]]==0:
     for document in documents:
      if word in document[1]:
       query_df[self.__word_dict[word]]+=1
       query_idf[self.__word_dict[word]]=log(len(self.__score_matrix)/query_df[self.__word_dict[word]])

  query_score=[]  # To store score between query and each document
  for document in documents:  # Finding tf for each term in query for each doc and find score
   query_tf=numpy.zeros(self.__word_dict_size)
   score=0
   for word in query:
    if word in self.__word_dict:
     if query_tf[self.__word_dict[word]]==0:
      for term in document[1]:
       if term==word:
        query_tf[self.__word_dict[word]]+=1
      if query_tf[self.__word_dict[word]] > 0 :
       query_tf[self.__word_dict[word]]=1+log(query_tf[self.__word_dict[word]])
       score+= query_tf[self.__word_dict[word]] * query_idf[self.__word_dict[word]] 
                                              # score(query,doc)=sum of score for all terms in query and doc
   query_score.append([(score,document[0])]) # append doc id and score for returning
   
  return query_score 


Mounting dataset from google drive

In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Setting filepath to the dataset

In [33]:
filepath = "/content/drive/My Drive/Colab Notebooks/Song.csv"

Reading the dataset into df

In [34]:
df = pd.read_csv(filepath, error_bad_lines=False)
print(len(df))
df

4049


Unnamed: 0.1,Unnamed: 0,Id,Artist Name,Song Name,Lyrics,Clean Lyrics
0,0,1000,Josh Groban,Brave,"Wake up, wake up, the sun cannot wait for long.\nReach out, reach out before it fades away.\nYou will find the warmth when you surrender.\nSmile into the fear and let it play.\n\nYou wanna run away, run away and you say that it can't be so.\nYou wanna look away, look away but you stay cause' it's all so close.\nWhen you stand up and hold out your hand.\nIn the face of what I don't understand.\nMy reason to be brave.\n\nHold on, hold on, so strong, time just carries on.\nAnd all that you thought was wrong is pure again.\nYou can't hide forever from the thunder.\nLook into the storm and feel the rain.\n\nYou wanna run away, run away and you say that it can't be so.\nYou wanna look away, look away but you stay cause' it's all so close.\nWhen you stand up and hold out your hand.\nIn the face of what I don't understand.\nMy reason to be brave.\n\nWhoa (x 4)\n\nGo on, go on…\n\nYou wanna run away, run away and you say that it can't be so.\nYou wanna look away, look away but you stay caus...",wake wake sun wait long reach reach fade away find warmth surrender smile fear let play wan run away run away say cant wan look away look away stay cause close stand hold hand face dont understand reason brave hold hold strong time carry think wrong pure cant hide forever thunder look storm feel rain wan run away run away say cant wan look away look away stay cause close stand hold hand face dont understand reason brave whoa wan run away run away say cant wan look away look away stay cause close stand hold hand face dont understand reason brave explain request
1,1,1001,Simple Plan,Where Is The Love?,"[Originally by Black Eyed Peas]\n\nWhat's wrong with the world mama,\nPeople livin' like ain't got no mamas.\nI think the whole world's addicted to the drama,\nOnly attracted to the things that breathe the trauma.\nOverseas yeah we tryin' to stop terrorism,\nBut we still got terrorists here livin'\nIn the USA, the big CIA, the bloods and the crips and the KKK.\n\nPeople killin' people dyin'\nChildren hurt, do you hear them cryin'?\nIf you practice what you preach?\nWill you turn the other cheek?\nFather, Father, Father help us.\nSend some guidance from above.\nPeople got me got me questioning,\nWhere is the love? (the love, the love)\nWhere is the love, the love, the love?\n\nWhatever happened to the values of humanity?\nWhatever happened to the fairness in equality?\nInstead of spreadin' love we're spreadin' animosity.\nThe lack of understanding leading us away from unity.\nThat's the reason why sometimes I'm feelin' under,\nThat's the reason why sometimes I'm feelin' down,\nIt's ...",originally black eye peas whats wrong world mama people livin like aint get mamas think whole worlds addict drama attract things breathe trauma overseas yeah tryin stop terrorism still get terrorists livin usa big cia blood crips kkk people killin people dyin children hurt hear cryin practice preach turn cheek father father father help send guidance people get get question love love love love love love whatever happen value humanity whatever happen fairness equality instead spreadin love spreadin animosity lack understand lead away unity thats reason sometimes feelin thats reason sometimes feelin wonder sometimes feelin get keep faith alive till love find people killin people dyin children hurt hear cryin practice preach turn cheek father father father help send guidance people get get question love love love love love love love love love love love love love love love love explain request
2,2,1002,Ozzy Osbourne,Paranoid,"Finished with my woman 'cause she couldn't help me with my mind\nPeople think I'm insane because I am frowning all the time\nAll day long I think of things but nothing seems to satisfy\nThink I'll lose my mind if I don't find something to pacify\nCan you help me occupy my brain?\nOh yeah\nI need someone to show me the things in life that I can't find\nI can't see the things that make true happiness, I must be blind\nMake a joke and I will sigh and you will laugh and I will cry\nHappiness I cannot feel and love to me is so unreal\nAnd so as you hear these words telling you now of my state\nI tell you to enjoy life I wish I could but it's too late \nExplain\nRequest\n \n\n\n×",finish woman cause couldnt help mind people think insane frown time day long think things nothing seem satisfy think ill lose mind dont find something pacify help occupy brain yeah need someone show things life cant find cant see things make true happiness must blind make joke sigh laugh cry happiness feel love unreal hear word tell state tell enjoy life wish could late explain request
3,3,1003,Godflesh,Baby Blue Eyes,Bury your dreams\nDrown your skin\nUseful ruins [X2]\n\nBaby Blue Eyes [X4]\n\nPerfect waist\nPerfect skin\nBreed and serve\n\nBaby Blue Eyes [X4]\n\nBury your dreams\nDrown your skin\nUseful ruins [X2]\n\nBaby Blue Eyes [X4] \nExplain\nRequest\n \n\n\n×,bury dream drown skin useful ruin baby blue eye perfect waist perfect skin breed serve baby blue eye bury dream drown skin useful ruin baby blue eye explain request
4,4,1004,Grateful Dead,Youngblood,"I saw her standing on the corner, yellow ribbon in her hair\nI knew at once I was a goner, said lookie there, lookie there\nYoungblood, youngblood, youngblood, I can't get you out of my mind\n\nI tried to talk, my tongue was fractured\nI tried to walk, but I was lame\nCould'nt stop myseld from shouting\nWhat's your name, what's your name?\n\nCrazy stuff, sure look tough, had to follow her all the way home\nThings went bad, I met her dad\nHe said ""you better leave my daughter alone""\n\nI could'nt sleep that night for trying, I saw the rising of the sun\nAll night long my heart felt like crying\nYou're the one, you're the one \nExplain\nRequest\n \n\n\n×",saw stand corner yellow ribbon hair know goner say lookie lookie youngblood youngblood youngblood cant get mind try talk tongue fracture try walk lame couldnt stop myseld shout whats name whats name crazy stuff sure look tough follow way home things bad meet dad say better leave daughter alone couldnt sleep night try saw rise sun night long heart felt like cry youre one youre one explain request
...,...,...,...,...,...,...
4044,4044,5044,Rachel Z,This Woman's Work,"Pray God you can cope\nI stand outside\nThis woman's work\nThis woman's world\nOh it's hard on the man\nNow his part is over\nNow starts the craft of the father\n\nI know you've got a little life in you left\nI know you've got a lotta strength left\nI know you've got a little life in you left\nI know you've got a lotta strength left\nI should be cryin' but I just can't let it show\nI should be hopin' but I can't stop thinkin'\nAll the things we should've said that are never said\nAll the things we should've done that we never did\nAll the things we should've given, but I didn't\nOh darlin', make it go\nMake it go away \nExplain\nRequest\n \n\n\n×",pray god cope stand outside womans work womans world hard man part start craft father know youve get little life leave know youve get lotta strength leave know youve get little life leave know youve get lotta strength leave cryin cant let show hopin cant stop thinkin things shouldve say never say things shouldve never things shouldve give didnt darlin make make away explain request
4045,4045,5045,Doris Day,I Ll Be Home For Christmas,I'll be home for Christmas\nYou can plan on me\nPlease have snow and mistletoe\nAnd presents on the tree\n\nChristmas Eve will find me\nWhere the love-light gleams\nI'll be home for Christmas\nIf only in my dreams\n\nChristmas Eve will find me\nWhere the love-light gleams\nI'll be home for Christmas\nIf only in my dreams \nExplain\nRequest\n \n\n\n×,ill home christmas plan please snow mistletoe present tree christmas eve find love light gleam ill home christmas dream christmas eve find love light gleam ill home christmas dream explain request
4046,4046,5046,Jason Mraz,Rainbow Connection,"Why are there so many,\nSongs about rainbows,\nAnd what's on the other side.\n\nRainbows are visions,\nBut only illusions,\nRainbows have nothing to hide.\n\nWhat's..\n(Spoken: D'you guys know this part?)\nWhat's so amazing,\nThat it keeps us star gazing,\nAnd what do they think we might see.\n\nOh, someday we'll find it,\nThe Rainbow Connection,\nThe lovers, the dreamers, and me.\n\nHave you been half asleep,\nAnd have you heard the voices,\nWell, I've heard them calling my name.\n\nIs this the sweet sound,\nThat calls the young sailors,\nTheir voice might be one and the same.\n\nI've.. Heard it too many, many, many times to ignore it,\nAnd I think it might be.. Oh..\n\nSomeday we'll find it,\nThe Rainbow Connection,\nThe lovers, the dreamers, and me.\n\n(Spoken: Something like that.) \nExplain\nRequest\n \n\n\n×",many songs rainbows whats side rainbows visions illusions rainbows nothing hide whats speak dyou guy know part whats amaze keep star gaze think might see someday well find rainbow connection lovers dreamers half asleep hear voice well ive hear call name sweet sound call young sailors voice might one ive hear many many many time ignore think might someday well find rainbow connection lovers dreamers speak something like explain request
4047,4047,5047,Conway Twitty,Bad Boy,"Everyday your husband goes to work from nine to five\nWakes up in the morning and puts on his coat and tie\nSits down at the table with some coffee and the news\n""Two eggs over easy,"" girl, that's all he says to you\nThe kids are all in school now and you sit home alone\nSpendin' all your time down it the basement a-washin' clothes\nSomehow this just ain't the way you dreamed love would be\nI can look into your eyes and tell just what you need.\n\nYou need a bad boy\nTo love you up and down\nA bad boy\nTo take you on the town\nIt's plain to see what you need's\nA little bit of mystery\nYou need a bad boy\nA bad boy like me.\n\nEvery night at six o'clock he walks on through the door\nLays down on the couch and throws his shoes off on the floor\nTonight was gonna be the night he took you to a show\nNow he's much too tired and he doesn't wanna go\nOnce again you spend the night in front of the tv\nIn bed you turn to touch him but he's already asleep.\n\nYou need a bad boy\nTo love you...",everyday husband work nine five wake morning put coat tie sit table coffee news two egg easy girl thats say kid school sit home alone spendin time basement washin clothe somehow aint way dream love would look eye tell need need bad boy love bad boy take town plain see need little bite mystery need bad boy bad boy like every night six oclock walk door lay couch throw shoe floor tonight gon night take show hes much tire doesnt wan spend night front bed turn touch hes already asleep need bad boy love bad boy take town plain see need little bite mystery need bad boy bad boy like bad boy explain request


Appending lyrics from the dataset to the list "songs"


In [35]:
songs=[]     #"songs" is a list of lyrics from df
for i in df['Clean Lyrics']:
 songs.append(i)

Generating the corpus in required form.

In [36]:
docs=[]#"docs" is a list where each element in the list is a tuple whose first element is index of song in corpus and second element is preprocessed item
start = timeit.default_timer()

docs = [(i, preprocess_sentence(songs[i])) for i in range(len(songs))]

stop = timeit.default_timer()
print('Time: ', stop - start)

Time:  12.86841538300007


Building the IR model.


In [37]:
start = timeit.default_timer()
Model=IR_model()      #"Model" is the Vector space model.
Model.build_Vector_Space(docs)
stop = timeit.default_timer()
print('Time: ', stop - start)

Time:  145.14310020599999


Assigning the query to q.

In [56]:
start = timeit.default_timer()
q="startt"
q=preprocess_sentence(q)
stop = timeit.default_timer()
print('Time: ', stop - start)

Time:  0.000857245000020157


Calculating tf_idf score for query and each song.

In [57]:
start = timeit.default_timer()
f=Model.Search(q,docs)
stop = timeit.default_timer()
print('Time: ', stop - start)

Time:  0.04730948500002796


Getting the index of top 10 similar songs in corpus.

In [58]:
isallzeros = 1
f.sort(reverse=True)
top10=[]
for i in range(10):
  for j in f[i]:
      if j[0] > 0:
        isallzeros = 0
if isallzeros == 1 :
 result="Sorry, no match found"
else:     
 for i in range(10):
  for j in f[i]:
   top10.append(j[1])

Appending other details of top 10 songs in ans.

In [59]:
if isallzeros == 0 :
 ans=[]
 for i in top10:
  ans.append([df.iloc[i]['Artist Name'],df.iloc[i]['Song Name'],df.iloc[i]['Clean Lyrics']])

Printing top 10 relevant results.

In [60]:
if isallzeros == 0 :
 pd.set_option('display.max_colwidth',1000)
 print("Top 10 search results\n")
 result=pd.DataFrame(ans,columns=['Artist Name','Song Name','Lyrics'])
 result

In [61]:
result

'Sorry, no match found'