In [None]:
from google.colab import files

# Upload the kaggle.json file
uploaded = files.upload()

# Move the uploaded file to the required directory
!mkdir -p /root/.kaggle
!mv kaggle.json /root/.kaggle/

In [None]:
# Download and unzip dataset
!kaggle datasets download -d neisse/scrapped-lyrics-from-6-genres
!unzip /content/scrapped-lyrics-from-6-genres.zip

In [None]:
import pandas as pd

# Load dataset containing lyrics
df_lyrics = pd.read_csv('/content/lyrics-data.csv')

# Load dataset containing artist information
df_genre = pd.read_csv('/content/artists-data.csv')

# Merging datasets
df_merged = pd.merge(df_lyrics, df_genre, left_on='ALink', right_on='Link', how='inner')

# Drop rows with missing values in relevant columns
df_merged = df_merged[['Lyric', 'language', 'Genres']].dropna()

# Filtering out rows where the language is not English or Spanish
df_merged = df_merged[df_merged['language'] != 'pt']

print(df_merged.head())


In [None]:
# Saving the merged data to csv
df_merged.to_csv('/content/merged_lyrics_data.csv', index=False)  # Adjust output filename as needed

print("Merged data saved to merged_lyrics_data.csv")

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

df_merged = pd.read_csv('/content/merged_lyrics_data.csv').sample(frac=1.0, random_state=42)

# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000)

# Fit TF-IDF vectorizer on the merged dataset
X = tfidf_vectorizer.fit_transform(df_merged['Lyric'])
y = df_merged['Genres']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Logistic Regression model
model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)

# Now we evaluate the model's accuracy
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)

In [None]:
# Function to predict genre for given lyrics
def predict_genre(lyrics):
    lyrics_vectorized = tfidf_vectorizer.transform([lyrics])
    genre = model.predict(lyrics_vectorized)
    return genre[0]

# Example of usage
input_lyrics = 'I come from Alabama with a banjo on my knee, I’m going to Louisiana, my true love for to see It rained all night the day I left, the weather it was dry The sun so hot I froze to death; Susanna, don’t you cry. Oh, Susanna, don’t you cry for me cos’ I come from Alabama With my banjo on my knee. had a dream the other night when everything was still, I thought I saw Susanna coming up the hill, A buck wheat cake was in her mouth, a tear was in her eye, I said I’m coming from the south, Susanna don’t you cry. I soon will be in New Orleans and then I’ll look around And when I find my Susanna, I’ll fall upon the ground But if I do not find her, this man will surely die And when I’m dead and buried, Susanna don’t you cry.'
predicted_genre = predict_genre(input_lyrics)
print("Predicted Genre:", predicted_genre)

In [None]:
# Function to predict the top three probable genres along with their probabilities
def predict_genre_top_three(lyrics):
    lyrics_vectorized = tfidf_vectorizer.transform([lyrics])
    probabilities = model.predict_proba(lyrics_vectorized)[0]
    top_three_indices = probabilities.argsort()[-3:][::-1]
    top_three_genres = model.classes_[top_three_indices]
    top_three_probabilities = probabilities[top_three_indices]
    for genre, probability in zip(top_three_genres, top_three_probabilities):
        print("Genre:", genre, "| Probability:", probability)

# Example of usage
input_lyrics = 'When the music fades\nAll is stripped away\nAnd I simply come\nLongin just to bring\nSomething that is of worth\nThat will bless Your heart\nI will bring You more than a song\nFor a song in itself\nIs not what You have required\nYou search much deeper within\nThrough the ways things appear\nYou are looking into my heart\nI am comin back to the heart of worship\nAnd it is all about You\nIt is all about You, Jesus\nI am sorry, Lord, for the thing I have made it\nWhen it is all about You\nIt is all about You, Jesus\nKing of endless worth\nNo one could express\nHow much You deserve?\nThough I am weak and poor\nAll I have is Yours\nEvery single breath\nI will bring You more than a song\nFor a song in itself\nIs not what You have required\nYou search much deeper within\nThrough the way things appear\nYou are looking into my heart, yeah\nI am comin back to the heart of worship\nAnd it is all about You\nIt is all about You, Jesus\nI am sorry, Lord, for the thing I have made it\nWhen it is all about You\nIt is all about You, Jesus\nI am comin back to the heart of worship\nCause it is all about You\nIt is all about You, Jesus\nI am sorry, Lord, for the thing I have made it\nCause it is all about You\nIt is all about You, Jesus, yeah\nAll about You\nI will bring You more than a song\nI will bring You more than a song, more than a song\nI will bring You more than a song\nI will bring You more than a song (than a song)\nYou are looking into my heart\nYou are looking into my heart\nYou are looking into my heart\nInto my heart\nI will bring You more than a song\nI will bring You more than a song, yeah, yeah\nI will bring You more than a song\nI will bring You more than a song'
predict_genre_top_three(input_lyrics)

In [None]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'trained_model.train')

In [None]:
# Load the trained model from the file
loaded_model = joblib.load('trained_model.train')

# Function to predict genre for given lyrics using the loaded model
def predict_genre_loaded_model(lyrics):
    lyrics_vectorized = tfidf_vectorizer.transform([lyrics])
    genre = loaded_model.predict(lyrics_vectorized)
    return genre[0]

# Example of usage
input_lyrics = 'When the days are cold\nAnd the cards all fold\nAnd the saints we see are all made of gold\nWhen your dreams all fail\nAnd the ones we hail\nAre the worst of all, and the bloods run stale\nI wanna hide the truth\nI wanna shelter you\nBut with the beast inside\nTheres nowhere we can hide\nNo matter what we breed\nWe still are made of greed\nThis is my kingdom come\nThis is my kingdom come\nWhen you feel my heat, look into my eyes\nIts where my demons hide\nIts where my demons hide\nDont get too close, its dark inside\nIts where my demons hide\nIts where my demons hide\nAt the curtains call\nIts the last of all\nWhen the lights fade out, all the sinners crawl\nSo they dug your grave\nAnd the masquerade\nWill come calling out at the mess youve made\nDont wanna let you down\nBut I am hell-bound\nThough this is all for you\nDont wanna hide the truth\nNo matter what we breed\nWe still are made of greed\nThis is my kingdom come\nThis is my kingdom come\nWhen you feel my heat, look into my eyes\nIts where my demons hide\nIts where my demons hide\nDont get too close, its dark inside\nIts where my demons hide\nIts where my demons hide\nThey say its what you make\nI say its up to fate\nIts woven in my soul\nI need to let you go\nYour eyes, they shine so bright\nI wanna save that light\nI cant escape this now\nUnless you show me how\nWhen you feel my heat, look into my eyes\nIts where my demons hide\nIts where my demons hide\nDont get too close, its dark inside\nIts where my demons hide\nIts where my demons hide'
predicted_genre_loaded_model = predict_genre_loaded_model(input_lyrics)
print("Predicted Genre using loaded model:", predicted_genre_loaded_model)

In [None]:
# Function to preprocess the lyrics before use
def clean_lyrics(lyrics):
    cleaned_lyrics = lyrics.replace("'", "")

    cleaned_lyrics = cleaned_lyrics.replace("\n", " ")

    return cleaned_lyrics

song_lyrics = """
It's way past restoring
Lash out call it coping
I should have known
Yeah you keep me hoping
This boat we've been rowing
Is stuck on the shore
We've spent a while in this uncertain space
But I've realised that the pieces have changed
Now that we're past the fun
You don't bat an eye
All that we were, undone
You don't bat an eye
Maybe we're overrun
You don't bat an eye
If you and me are done
Why are you surprised
Driving after midnight
The rain on my headlight
Got nowhere to go
It' now that I realise
The fun in the daylight
We're footprints on snow
We've spent a while in this uncertain space
But I've realized that the pieces have changed
Now that we're past the fun
You don't bat an eye
All that we were, undone
You don't bat an eye
Maybe we're overrun
You don't bat an eye
If you and me are done
Why are you surprised
Every time we fall in love
Knowing that we'll fall out again
That's okay, that's okay, that's okay
Knowing that we'll fall back on
Something that's just filled with pain
Why won't it drive me insane
Now that we're past the fun
You don't bat an eye
All that we were, undone
You don't bat an eye
Maybe we're overrun
You don't bat an eye
If you and me are done
Why are you surprised
"""
cleaned_lyrics = clean_lyrics(song_lyrics)
print(cleaned_lyrics)