In [None]:
from google.colab import drive
drive.mount('/content/MyDrive')

Mounted at /content/MyDrive


In [None]:
import csv
import requests

# Define your Musixmatch API key
MUSIXMATCH_API_KEY = ""
all_songs = []
# Function to search for tracks by artist
def search_songs(artist_name, page_size=50):
    url = f"https://api.musixmatch.com/ws/1.1/track.search"
    params = {
        "apikey": MUSIXMATCH_API_KEY,
        "q_artist": artist_name,
        "page_size": page_size,
        "s_track_rating": "desc"
    }
    res = requests.get(url, params=params)
    final_data = res.json()
    if "message" in final_data and "body" in final_data["message"]:
        songs = final_data["message"]["body"]["track_list"]
        return [(tr["track"]["track_name"], tr["track"]["artist_name"], tr["track"]["track_id"]) for tr in songs]
    else:
        print("Failed to fetch tracks.")
        return []

# Function to get lyrics for a track by track_id
def get_songs(track_id):
    url = f"https://api.musixmatch.com/ws/1.1/track.lyrics.get"
    params = {
        "apikey": MUSIXMATCH_API_KEY,
        "track_id": track_id
    }
    res = requests.get(url, params=params)
    final_data = res.json()
    if "message" in final_data and "body" in final_data["message"]:
        lyrics = final_data["message"]["body"]["lyrics"]["lyrics_body"]
        # Remove metafinal_data from the lyrics
        lyrics = lyrics.split("*******")[0]
        return lyrics.strip()
    else:
        print("Failed to fetch lyrics.")
        return ""

# Function to get lyrics for 50 songs of an artist
def get_artist_songs(artist_name):
    tracks = search_songs(artist_name)
    songs = []
    for track_name, artist, track_id in tracks:
        lyrics = get_songs(track_id)
        if lyrics:
            songs.append([track_name, artist, lyrics])
    return songs

# Function to write lyrics to CSV file
def write_to_csv(final_data, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Title", "Artist", "Lyrics"])
        writer.writerows(final_data)

# Main function
def main():
    artists = [
      "Eminem",
      "Pop Smoke",
      "Rihanna",
      "50 Cent",
      "Drake",
      "Future",
      "Jay Z",
      "Dr. Dre",
      "Travis Scott",
      "Megan Thee Stallion",

      "Cardi B",
      "Kanye West",
      "Nicki Minaj",
      "Lil Wayne"
    ]  # Add more artists as needed


    for artist in artists:
        print(f"Fetching lyrics for {artist}...")
        songs = get_artist_songs(artist)
        print(songs)
        all_songs.extend(songs)



if __name__ == "__main__":
    main()

Fetching lyrics for Eminem...
Fetching lyrics for Pop Smoke...
[['Dior', 'Pop Smoke', "(Traphouse Mob)\nHuh, roll another one\n\nSaid I'm never lackin', always pistol packing\nWith them automatics, we gon' send him to heaven\nWait, wait, wait, wait, ayy, ayy, woo (aw, shit), huh?\nOh, you feelin' sturdy, huh? (You feelin' sturdy, man)\nLet me see somethin'\nShake it, shake it, shake it\n\nShe like the way that I dance\nShe like the way that I move\nShe like the way that I rock\nShe like the way that I woo\nAnd she let it clap for a nigga\n(She let it clap for a nigga)\nAnd she throw it back for a nigga\n(Yeah, she throw it back for a nigga)\n\nMike Amiri, Mike Amiri\nBillie Jean, Billie Jean, uh\nChristian Dior, Dior\nI'm up in all the stores\nWhen it rains, it pours\nShe like the way I rrr\n\nMike Amiri, Mike Amiri\nBillie Jean, Billie Jean, uh\nChristian Dior, Dior\n..."], ['For The Night (feat. Lil Baby & DaBaby)', 'Pop Smoke feat. Lil Baby & DaBaby', 'CashMoneyAP\nGet \'em, get \'e

In [None]:
def extend_songs():
  artists_extended = [
      "Snoop Dogg",
      "Lil Baby",
      "Post Malone",
      "Chance the Rapper",
      "Tyler, The Creator",
      "Juice WRLD",
      "Young Thug",
      "DaBaby",
      "Meek Mill",
      "G-Eazy",
      "Logic"
  ]


  for artist in artists_extended:
      print(f"Fetching lyrics for {artist}...")
      songs = get_artist_songs(artist)
      all_songs.extend(songs)
extend_songs()

Fetching lyrics for Snoop Dogg...
Fetching lyrics for Lil Baby...
Fetching lyrics for Post Malone...
Fetching lyrics for Chance the Rapper...
Fetching lyrics for Tyler, The Creator...
Fetching lyrics for Juice WRLD...
Fetching lyrics for Young Thug...
Fetching lyrics for DaBaby...
Fetching lyrics for Meek Mill...
Fetching lyrics for G-Eazy...
Fetching lyrics for Logic...


In [None]:
write_to_csv(all_songs, "/content/MyDrive/MyDrive/NLP Project/lyrics.csv")

In [None]:
#drop languages that aren't english
!pip install langdetect
from langdetect import detect
import pandas as pd

# Read the CSV file into a DataFrame
lyrics_df = pd.read_csv('lyrics.csv')

def detect_lyric(x):
    try:
        return detect(x)
    except:
        return

lyrics_df["language"] = lyrics_df["Lyrics"].apply(detect_lyric)
lyrics_df= lyrics_df[lyrics_df.language == "en"]



In [None]:
lyrics = lyrics_df["Lyrics"].to_list()
lyrics = list(map(lambda x: x.replace("::", ":"),lyrics))
verses = [lyric.split(":") for lyric in lyrics]

import itertools

verses = list(itertools.chain(*verses))

def create_couplets(verses: list):
    couplets = []
    for i in range(1,len(verses)):
        couplet = verses[i-1] + "\n" + verses[i]
        couplets.append(couplet)

    return couplets


couplets = create_couplets(verses)
#display(couplets)

couplets_df = pd.DataFrame(couplets, columns=["couplets_g"])
print(couplets_df.head())

                                          couplets_g
0  Just gonna stand there and watch me burn?\nWel...
1  (Ugh, you're a monster)\n\nI can swallow a bot...
2  I'm friends with the monster that's under my b...
3  Yeah, It's been a ride...\nI guess I had to go...
4  I got a song filled with shit for the strong-w...
