### *Code Reference Note*
*This code snippet is mainly based on Professor **Carly Bobak**'s Spotify Project.
The set up of **Spotipy API** references the [blog](https://medium.com/@maxtingle/getting-started-with-spotifys-api-spotipy-197c3dc6353b) from Max Tingle.* 

In [4]:
#!pip install spotipy
#!pip install pandas

In [1]:
import json
import spotipy
import pandas as pd
import time
from spotipy.oauth2 import SpotifyClientCredentials
from requests.exceptions import HTTPError

client_id = '78a417244e874df79b88ebea59a57f18'  # insert your client id
client_secret = '1feb0a8cd0984f5cb54f7a3e9129af40'  # insert your client secret id here

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# playlist URL
playlist_url = 'https://open.spotify.com/playlist/6DUiUcSWM3UDEF8HrqhqWp'
# extract playlist ID from URL
playlist_id = playlist_url.split('/')[-1].split('?')[0]

# function to get all tracks from a playlist with retry logic
def get_playlist_tracks(sp, playlist_id, retries=5):
    tracks = []
    try:
        results = sp.playlist_tracks(playlist_id)
        tracks.extend(results['items'])
        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])
    except HTTPError as e:
        if retries > 0:
            print(f"HTTPError encountered: {e}. Retrying in 5 seconds...")
            time.sleep(5)
            return get_playlist_tracks(sp, playlist_id, retries - 1)
        else:
            print(f"HTTPError encountered: {e}. Max retries exceeded.")
    return tracks

# get all tracks from the playlist
tracks = get_playlist_tracks(sp, playlist_id)

# create a list of song ids
ids = [item['track']['id'] for item in tracks if item['track']]

song_meta = {'id': [], 'album': [], 'name': [],
             'artist': [], 'popularity': [], 'genre': []}

for song_id in ids:
    try:
        # get song's meta data
        meta = sp.track(song_id)
        
        # song id
        song_meta['id'].append(song_id)

        # album name
        album = meta['album']['name']
        song_meta['album'].append(album)

        # song name
        song = meta['name']
        song_meta['name'].append(song)
        
        # artists name
        artist_ids = [artist['id'] for artist in meta['artists']]
        artist_names = [artist['name'] for artist in meta['artists']]
        artist_names_joined = ', '.join(artist_names)
        song_meta['artist'].append(artist_names_joined)

        # get genres for the first artist
        if artist_ids:
            artist_info = sp.artist(artist_ids[0])
            genre = ', '.join(artist_info['genres'])
            song_meta['genre'].append(genre)
        else:
            song_meta['genre'].append("")
        
        # song popularity
        popularity = meta['popularity']
        song_meta['popularity'].append(popularity)

    except HTTPError as e:
        print(f"HTTPError encountered while processing track {song_id}: {e}. Skipping this track.")

song_meta_df = pd.DataFrame.from_dict(song_meta)

# Function to fetch audio features in batches
def fetch_audio_features(sp, ids, batch_size=50):
    features_list = []
    for i in range(0, len(ids), batch_size):
        batch = ids[i:i + batch_size]
        try:
            features = sp.audio_features(batch)
            features_list.extend(features)
        except HTTPError as e:
            print(f"HTTPError encountered while fetching audio features: {e}. Retrying in 5 seconds...")
            time.sleep(5)
            features = sp.audio_features(batch)
            features_list.extend(features)
    return features_list

# Fetch audio features in batches
features = fetch_audio_features(sp, song_meta['id'])
# Change dictionary to dataframe
features_df = pd.DataFrame.from_dict(features)

# Convert milliseconds to minutes
features_df['duration_ms'] = features_df['duration_ms'] / 60000

# Combine two dataframes
final_df = song_meta_df.merge(features_df, on='id')

# Save the final dataframe to a CSV file
final_df.to_csv('playlist_final_info.csv', index=False)


# Save results to a JSON file
with open('playlist_results.json', 'w') as f:
    json.dump(tracks, f, indent=4)

In [7]:
final_df.to_csv('playlist_final_info.csv', index=False)

### *Code Reference Note*
*The configuration and utilization of the **Genius API** are based on the official [GitHub repository](https://github.com/johnwmillr/LyricsGenius) of the LyricsGenius developer.*

In [8]:
#!pip install lyricsgenius

In [5]:
import os
import time
import lyricsgenius
import re 
from requests.exceptions import RequestException
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
from requests.exceptions import HTTPError

# Set up Genius API credentials
genius_access_token = 'P85jb3oeFInvk5UMcLbu1DXc9Fo3QL8WfXRnc5dxFulxrXgHP8psAq34iCXXd9l0'  # Replace with your actual access token
genius = lyricsgenius.Genius(genius_access_token)

# Load your existing data
final_df = pd.read_csv('unique_playlist_info.csv')

# Create a directory to save lyrics files
lyrics_dir = 'lyrics'
os.makedirs(lyrics_dir, exist_ok=True)

# Initialize a list to store metadata about lyrics
lyrics_meta = []

# Function to fetch lyrics with retry logic and exponential backoff
def fetch_lyrics(artist, song_title):
    wait_time = 10  # Start with an initial wait time of 10 seconds
    for attempt in range(5):  # Allow up to 5 attempts
        try:
            song = genius.search_song(song_title, artist)
            if song:
                return song.lyrics
            else:
                return None
        except HTTPError as e:
            if hasattr(e, 'response') and e.response is not None:
                if e.response.status_code == 429:
                    retry_after = int(e.response.headers.get('Retry-After', wait_time))
                    print(f"Rate limit exceeded. Retrying in {retry_after} seconds...")
                    time.sleep(retry_after)
                    wait_time *= 2  # Exponential backoff
                else:
                    print(f"HTTPError for {song_title} by {artist}: {e}")
            else:
                print(f"HTTPError without response for {song_title} by {artist}: {e}")
            if attempt < 4:
                print(f"Retrying in {wait_time} seconds... ({attempt + 1}/5)")
                time.sleep(wait_time)
                wait_time *= 2  # Exponential backoff
            else:
                print("Max retries exceeded.")
                return None
        except RequestException as e:
            print(f"RequestException for {song_title} by {artist}: {e}")
            if attempt < 4:
                print(f"Retrying in {wait_time} seconds... ({attempt + 1}/5)")
                time.sleep(wait_time)
                wait_time *= 2  # Exponential backoff
            else:
                print("Max retries exceeded.")
                return None

# Function to clean lyrics
def clean_lyrics(lyrics):
    lines = lyrics.split('\n')
    cleaned_lines = []
    for line in lines[1:]:  # Skip the first line
        # Exclude lines with "Embed" and common unwanted patterns
        if not re.match(r'^[•\[].*$', line) and "Embed" not in line:
            cleaned_lines.append(line)
    return '\n'.join(cleaned_lines)


#time.sleep(65)

# Fetch lyrics for each song in the dataframe
for index, row in final_df.iterrows():
    artist = row['artist'].split(',')[0]  # Taking the first artist in case of multiple artists
    song_title = row['name']
    lyrics = fetch_lyrics(artist, song_title)
    if lyrics:
        cleaned_lyrics = clean_lyrics(lyrics)

        # Save cleaned lyrics to a text file
        filename = f"{artist.replace(' ', '_')}_{song_title.replace(' ', '_').replace('/', '_')}.txt"
        filepath = os.path.join(lyrics_dir, filename)
        with open(filepath, 'w', encoding='utf-8') as file:
            file.write(cleaned_lyrics)
        
        # Append metadata to lyrics_meta list
        lyrics_meta.append({
            'id': row['id'],
            'artist': artist,
            'name': song_title,
            'lyrics_file': filepath
        })
    else:
        lyrics_meta.append({
            'id': row['id'],
            'artist': artist,
            'name': song_title,
            'lyrics_file': None
        })


# Create a DataFrame for lyrics metadata
lyrics_meta_df = pd.DataFrame(lyrics_meta)

# Save the lyrics metadata DataFrame to a CSV file
lyrics_meta_df.to_csv('lyrics_metadata.csv', index=False)

print("Lyrics metadata saved to lyrics_metadata.csv")
print("Lyrics files saved in the 'lyrics' folder")

Searching for "Demons" by Imagine Dragons...
Done.
Searching for "yyyyyyyyyyyyyyyyyyy" by Collectively Adrift...
No results found for: 'yyyyyyyyyyyyyyyyyyy Collectively Adrift'
Searching for "Reminder" by The Weeknd...
Done.
Searching for "Shake It Off (Taylor's Version)" by Taylor Swift...
Done.
Searching for "SNAP" by Rosa Linn...
Done.
Searching for "Shape of You" by Ed Sheeran...
Done.
Searching for "Night Changes" by One Direction...
Done.
Searching for "Fin de Semana" by Oscar Maydon...
Done.
Searching for "オトノケ - Otonoke" by Creepy Nuts...
Done.
Searching for "Car's Outside" by James Arthur...
Done.
Searching for "Apocalypse" by Cigarettes After Sex...
Done.
Searching for "Cheques" by Shubh...
Done.
Searching for "Pink + White" by Frank Ocean...
Done.
Searching for "Circles" by Post Malone...
Done.
Searching for "Just the Way You Are" by Bruno Mars...
Done.
Searching for "Take Me to Church" by Hozier...
Done.
Searching for "Bebe Dame" by Fuerza Regida...
Done.
Searching for "You

OSError: [Errno 22] Invalid argument: 'lyrics\\Pritam_Kesariya_(From_"Brahmastra").txt'