# spotify lyric search

## Import Libraries

In [6]:
pip install pandas scikit-learn nltk

Note: you may need to restart the kernel to use updated packages.


In [21]:
import pandas as pd
import numpy as np
import re
import random
import nltk

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [23]:
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sudip\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Load Dataset

In [24]:
df = pd.read_csv("Spotify Million Song Dataset_exported.csv")

df = df[['artist', 'song', 'text']]
df.dropna(inplace=True)
print("Dataset Shape:", df.shape)

Dataset Shape: (57650, 3)


### Text Preprocessing

In [25]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

df['clean_lyrics'] = df['text'].apply(clean_text)

### TF-IDF Vectorization

In [26]:
tfidf = TfidfVectorizer(
    max_features=15000,
    ngram_range=(1, 2),   # unigrams + bigrams
    min_df=2
)
tfidf_matrix = tfidf.fit_transform(df['clean_lyrics'])


### Prediction Function

In [27]:
def predict_song(lyrics_snippet):
    cleaned_snippet = clean_text(lyrics_snippet)
    snippet_vector = tfidf.transform([cleaned_snippet])

    similarity_scores = cosine_similarity(snippet_vector, tfidf_matrix)
    best_match_index = similarity_scores.argmax()

    return (
        df.iloc[best_match_index]['song'],
        df.iloc[best_match_index]['artist']
    )


### Accuracy Evaluation

In [28]:
def evaluate_accuracy(samples=100):
    correct = 0

    for _ in range(samples):
        idx = random.randint(0, len(df) - 1)

        original_song = df.iloc[idx]['song']
        original_artist = df.iloc[idx]['artist']

        clean_lyrics = df.iloc[idx]['clean_lyrics']

        words = clean_lyrics.split()
        snippet = " ".join(words[:50])

        predicted_song, predicted_artist = predict_song(snippet)

        if predicted_song == original_song and predicted_artist == original_artist:
            correct += 1

    return (correct / samples) * 100


accuracy = evaluate_accuracy(samples=100)
print(f"Model Accuracy: {accuracy:.2f}%")


Model Accuracy: 96.00%


### Lyric input

In [29]:
print("Spotify Lyric Search ")
print("Type a small snippet of lyrics to identify the song.")
print("Type 'exit' to quit.\n")

while True:
    user_input = input("Enter lyrics snippet: ")

    if user_input.lower() == "exit":
        print("Thank you for using Spotify Lyric Search!")
        break

    song, artist = predict_song(user_input)

    print("\nüîç Prediction Result:")
    print("Song   :", song)
    print("Artist :", artist)
    print("-" * 40)

Spotify Lyric Search 
Type a small snippet of lyrics to identify the song.
Type 'exit' to quit.



Enter lyrics snippet:  Big cat walking on the wild side   Big cat talking on the wild side   He knows he looks the way he should   He's got something making him feel good   He's got the style, he's got the sway   He's a million miles away      She cat walking on the wild side   She cat talking on the wild side   She don't smile, she don't look   She don't do laundry and she don't cook   She's not listening to what he says   She's not making any promises      He just wants to love someone   She just wants to love someone   He just wants to hold someone   She just wants to love someone      Big cat walking on the wild side   Big cat stalking on the wild side   She cat walking on the wild side   She cat talking on the wild side   He's thinking how she's not so tough   Could be a diamond sitting in the rough   She's staring out to empty space   She's wondering how long he will take      He just wants to love someone   She just wants to love someone   He just wants to hold someone   She jus


üîç Prediction Result:
Song   : Big Cat
Artist : Air Supply
----------------------------------------


Enter lyrics snippet:  exit


Thank you for using Spotify Lyric Search!
