## Code Document

### Working on Spotify Playlist


Importing libraries and setting up authentication with Spotify API.

In [None]:
#Importing data from Spotify dataset

import spotipy
from spotipy.oauth2 import SpotifyOAuth

# Replace 'your_client_id', 'your_client_secret', and 'your_redirect_uri' with your actual values
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id='5093ad0ad45f435ca7ce43b2afd406b9',
    client_secret='f151962f414144aa85494be81ccd8e20',
    redirect_uri='http://localhost:8888/callback',
    scope="user-library-read playlist-read-private"))

Fetching data from Spotify

In [None]:
'''playlist_id = '6JOysTE0drK9yDi1Xs4FKy'  # Replace with the actual playlist ID
playlist = sp.playlist(playlist_id)
print(playlist['name'])
for item in playlist['tracks']['items']:
    track = item['track']
    print(track['name'], '-', track['artists'][0]['name'])'''

In [None]:
import pandas as pd

# Fetch saved tracks
playlist_id = '6JOysTE0drK9yDi1Xs4FKy'

results = sp.playlist_tracks(playlist_id)
tracks_data = {
    'Name': [],
    'Artist': [],
    'Album': [],
    'Release Date': []
}

for item in results['items']:
    track = item.get('track')
    if track:  # Check if track details are present
        name = track.get('name', 'No Title Available')
        artist_name = track['artists'][0].get('name', 'No Artist Available') if track['artists'] else 'No Artists'
        album_name = track['album'].get('name', 'No Album Available') if track['album'] else 'No Album'
        release_date = track['album'].get('release_date', 'No Release Date Available') if track['album'] else 'No Release Info'
        
        tracks_data['Name'].append(name)
        tracks_data['Artist'].append(artist_name)
        tracks_data['Album'].append(album_name)
        tracks_data['Release Date'].append(release_date)

# Convert to DataFrame
df_tracks = pd.DataFrame(tracks_data)
print(df_tracks)

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

# Initialize the Spotify client
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id='your_client_id',
    client_secret='your_client_secret',
    redirect_uri='http://localhost:8888/callback',
    scope='user-library-read'))

# Search for tracks by genre
genre = "rock"  # Example genre
results = sp.search(q='genre:' + genre, type='track', limit=50)
tracks_data = {
    'Name': [],
    'Artist': [],
    'Genre': genre,
    'Popularity': []
}

for track in results['tracks']['items']:
    tracks_data['Name'].append(track['name'])
    tracks_data['Artist'].append(track['artists'][0]['name'])
    tracks_data['Popularity'].append(track['popularity'])

# Print or process the results
print([tracks_data])

#### Importing Song Data with Lyrics using Genius API

In [None]:
import lyricsgenius

# Initialize Genius API
genius = lyricsgenius.Genius('put_your_token_here')

genius.remove_section_headers = True

# Spotify Global Top 50 playlist ID
playlist_id = '37i9dQZEVXbMDoHDwVN2tF'  # Example ID, update as necessary

results = sp.playlist_tracks(playlist_id)
tracks_data = []

# Fetch track details from Spotify
for item in results['items']:
    track = item['track']
    track_info = {
        'Name': track['name'],
        'Artist': track['artists'][0]['name'] if track['artists'] else 'Unknown',
        'Album': track['album']['name'] if track['album'] else 'Unknown',
        'Release Date': track['album']['release_date'] if track['album'] else 'Unknown',
        'Popularity': track['popularity']
    }
    # Fetch lyrics using Genius
    song = genius.search_song(track_info['Name'], track_info['Artist'])
    track_info['Lyrics'] = song.lyrics if song else 'Lyrics not found'
    
    tracks_data.append(track_info)

In [None]:
import pandas as pd

# Convert the data to a pandas DataFrame
df_tracks = pd.DataFrame(tracks_data)

# Set column names and display the DataFrame
df_tracks.columns = ['Track Name', 'Artist', 'Album', 'Release Date', 'Popularity', 'Lyrics']
print(df_tracks)

In [None]:
# Save DataFrame to CSV file
df_tracks.to_csv('spotify_songs_with_lyrics.csv', index=False)

In [None]:
df_tracks.columns

### Preprocessing data

Clean the lyrics by removing punctuation, converting to lowercase, and other typical text cleaning steps:

In [None]:
import re

def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Removeing punctuation
    text = text.lower()  # Convert to lower case
    return text

# Apply the cleaning function to the lyrics column
df_tracks['cleaned_lyrics'] = df_tracks['Lyrics'].apply(clean_text)

### Vectorization

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize a TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df_tracks['cleaned_lyrics'])

### Sentiment Analysis using VADER

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the VADER sentiment intensity analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to get the compound sentiment score
def get_sentiment(text):
    score = analyzer.polarity_scores(text)
    return score['compound']  # Return the compound score

# Apply the sentiment analysis function to the cleaned lyrics
df_tracks['sentiment_score'] = df_tracks['cleaned_lyrics'].apply(get_sentiment)

### Assign Sentiment Labels

Classifying the sentiments as positive, neutral or negative based on the computed score

In [None]:
def assign_sentiment(score):
    if score > 0.05:
        return 'positive'
    elif score < -0.05:
        return 'negative'
    else:
        return 'neutral'

# Apply sentiment label assignment
df_tracks['sentiment_label'] = df_tracks['sentiment_score'].apply(assign_sentiment)

In [None]:
print(df_tracks[['Track Name', 'Artist', 'sentiment_score', 'sentiment_label']])

In [None]:
df_tracks[['Track Name', 'Artist', 'sentiment_score', 'sentiment_label']].to_csv('test_sentiment.csv', index=False)

test_sentiment = pd.read_csv('test_sentiment.csv')

### Importing Json file to Pandas readable format

Start by importing spotify challenge (AI Crowd) file into python.

In [1]:
import pandas as pd
import json

# Load JSON data from file
with open('/Users/rishabhhasija/Documents/neuefische/sentify_recommender/data/challenge_set.json', 'r') as file:
    data = json.load(file)

# Initialize an empty list to collect all track data
all_tracks = []

# Loop through each playlist in the dataset
for playlist in data['playlists']:
    for track in playlist['tracks']:
        # Add playlist-level information to each track record
        track_info = {
            'playlist_name': playlist.get('name', 'Unknown'),
            'playlist_pid': playlist['pid'],
            'playlist_num_tracks': playlist['num_tracks'],
            'track_pos': track['pos'],
            'artist_name': track['artist_name'],
            'track_uri': track['track_uri'],
            'artist_uri': track['artist_uri'],
            'track_name': track['track_name'],
            'album_uri': track['album_uri'],
            'duration_ms': track['duration_ms'],
            'album_name': track['album_name']
        }
        all_tracks.append(track_info)

# Convert the list of track dictionaries to a DataFrame
df_spotify = pd.DataFrame(all_tracks)

# Display the first few rows of the DataFrame to verify
#print(df_spotify.head())


In [2]:
print(df_spotify.shape)
#print(df_spotify.head())

(281000, 11)


In [3]:
df_spotify.columns

Index(['playlist_name', 'playlist_pid', 'playlist_num_tracks', 'track_pos',
       'artist_name', 'track_uri', 'artist_uri', 'track_name', 'album_uri',
       'duration_ms', 'album_name'],
      dtype='object')

In [None]:
df_spotify['playlist_num_tracks'].unique()

In [4]:
#df_spotify
#df_lyrics = df_spotify['track_uri'].unique()  
df_lyrics = df_spotify.drop_duplicates(subset=['track_uri']).drop(['playlist_name', 'playlist_pid', 'playlist_num_tracks', 'track_pos', 'artist_name',
       'artist_uri', 'track_name', 'album_uri', 'duration_ms', 'album_name'], axis=1).reset_index(drop=True)

In [141]:
df_lyrics.shape

(66243, 1)

In [120]:
mini_df = df_lyrics.head(47397)
mini_df.shape

(47397, 1)

In [39]:
df_lyrics.to_csv('df_lyrics.csv', index=False)

In [121]:
mini_df.to_csv('mini_df.csv', index=False)

In [122]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time

auth_manager = SpotifyClientCredentials(client_id='59cdbf840d9245118927fc195c3e4e0a', client_secret='f3b0a7237fb04ca2ba9fc6bbefb729b7')
sp = spotipy.Spotify(auth_manager=auth_manager)

In [None]:
#import os
#def file_exists_and_not_empty(file_name):
#    return os.path.isfile(file_name) and os.stat(file_name).st_size > 0

In [123]:
def process_batch(batch_data):
    # Creating an empty DataFrame to store updated data
    processed_data = pd.DataFrame(columns=['track_uri', 'artist_name', 'track_name'])
    
    # Iterating over the batch_data to fetch details from Spotify
    for i, uri in enumerate(batch_data['track_uri']):
        try:
            track = sp.track(uri)
            processed_data.loc[i, 'track_uri'] = uri
            processed_data.loc[i, 'artist_name'] = track['artists'][0]['name']
            processed_data.loc[i, 'track_name'] = track['name']
            print(i)
        except Exception as e:
            print(f"Error processing {uri}: {e}")
    return processed_data

In [124]:
chunk_size = 10
csv_file = "mini_df.csv"  # Path to input CSV file
output_file = "mini_test.csv"  # Path to save the processed data

start_row = 46244 # Adjust row number from which processing needs to be started

# Read the CSV file in batches and process each batch
for chunk in pd.read_csv(csv_file, chunksize=chunk_size, skiprows=range(1, start_row)):
    time.sleep(5)
    processed_chunk = process_batch(chunk)
    # Append each processed chunk to the output file
    processed_chunk.to_csv(output_file, mode="a", header=False, index=False)

0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9


#### Removing Duplicates

In [126]:
# Reading dataset & assigning column names
df1 = pd.read_csv('mini_test.csv', header=None)
df1.columns = ['track_uri', 'artist_name', 'track_name']
df1.head()

Unnamed: 0,track_uri,artist_name,track_name
0,spotify:track:66U0ASk1VHZsqIkpMjKX3B,AronChupa,Little Swing
1,spotify:track:5MhsZlmKJG6X5kTHkdwC4B,AronChupa,I'm an Albatraoz
2,spotify:track:0GZoB8h0kqXn7XFm4Sj06k,Lorde,Yellow Flicker Beat - From The Hunger Games: M...
3,spotify:track:35kahykNu00FPysz3C2euR,Lorde,White Teeth Teens
4,spotify:track:3G6hD9B2ZHOsgf4WfNu7X1,Lorde,Team


In [127]:
#df1[df1.duplicated(keep=False)]
df1_unique = df1.drop_duplicates(keep='first')
df1_unique.shape

(47387, 3)

In [None]:
data1 = ['df1', 'df2']

#data2 = [pd.read_csv(f) for f in data1]
spotify_data1 = pd.concat([df1, df2], ignore_index=True)
spotify_data1.shape

In [142]:
spotify_data = spotify_data1.drop_duplicates(keep='first')
#spotify_data.shape
spotify_data.to_csv('spotify_data.csv', index=False)


### Testing on Genius

In [15]:
import pandas as pd
spotify_data = pd.read_csv('/Users/rishabhhasija/Documents/neuefische/sentify_recommender/spotify_data.csv')
#df2 = spotify_data.head(10)
#df2.to_csv('df2.csv', index=False)

In [3]:
# Calling for Genius API credentials from dotenv file
from dotenv import load_dotenv
import os
import lyricsgenius
import pandas as pd

load_dotenv()
token = os.getenv("genius_token")
genius = lyricsgenius.Genius(token) # Providing token info to access information 
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.timeout = 60 # Timeout call for API

In [4]:
def process_batch(batch_data):
    # Creating an empty DataFrame to store updated data
    processed_data = pd.DataFrame(columns=['track_uri', 'artist_name', 'track_name', 'lyrics'])

    # Iterating over the batch_data to fetch details from Spotify
    for i in range(len(batch_data)-1):
        try:
            track = genius.search_song(batch_data['track_name'][i], batch_data['artist_name'][i])

            processed_data.loc[i, 'track_uri'] = batch_data['track_uri'].iloc[i]
            processed_data.loc[i, 'artist_name'] = batch_data['artist_name'].iloc[i]
            processed_data.loc[i, 'track_name'] = batch_data['track_name'].iloc[i]
            processed_data.loc[i, 'lyrics'] = track.lyrics if track else 'Lyrics not found'

            print(i)
        except Exception as e:
            print(f"Error processing {batch_data['track_uri'][i]}: {e}")
    return processed_data

In [None]:
chunk_size = 10
csv_file = '/Users/rishabhhasija/Documents/neuefische/sentify_recommender/spotify_data.csv'  # Path to input CSV file
output_file = "lyrics_test.csv"  # Path to save the processed data

#start_row = 0 # Adjust row number from which processing needs to be started
pd.DataFrame(columns=['track_uri', 'artist_name', 'track_name', 'lyrics']).to_csv(output_file, index=False)

# Read the CSV file in batches and process each batch
for chunk in pd.read_csv(csv_file, chunksize=chunk_size):
    processed_chunk = process_batch(chunk)
    processed_chunk.to_csv(output_file, mode="a", header=False, index=False)
    # Append each processed chunk to the output file
    print(processed_chunk)

#### Try 2

In [31]:
import pandas
import lyricsgenius
import time

genius = lyricsgenius.Genius('your_genius_api_token')
genius.remove_section_headers = True

# Defining the batches at which to save progress
batch_size = 10
num_songs = len(spotify_data)

# Specify the starting index from where to resume processing
start_index = int(input("input value here"))

for i in range(start_index, num_songs, batch_size):
    end_index = min(i + batch_size, num_songs)

    for j in range(i, end_index):
        try:
            song = genius.search_song(spotify_data.loc[j, 'track_name'], spotify_data.loc[j, 'artist_name'])
            spotify_data.loc[j, 'lyrics'] = song.lyrics.replace('\n', ' ') if song else 'Lyrics not found'
        except Exception as e:
            print(f"Error fetching lyrics for index {j}: {e}")
            spotify_data.loc[j, 'lyrics'] = 'Error fetching lyrics'

    # Save the DataFrame at regular intervals
    spotify_data.to_csv('lyrics_rishabh.csv', index=False)
    print(f"Progress saved up to index {end_index - 1}")

    time.sleep(10)

Searching for "Halloween Spooks" by Lambert, Hendricks & Ross...
Done.
Searching for "Nightmare" by Artie Shaw...
Done.
Searching for "The Ghost of Smoky Joe" by Cab Calloway & His Orchestra...
No results found for: 'The Ghost of Smoky Joe Cab Calloway & His Orchestra'
Searching for "The Little Man Who Wasn't There (feat. Glenn Miller)" by Glenn Miller...
Done.
Searching for "Coleccionista de Canciones" by Camila...
Done.
Searching for "Mientes" by Camila...
Done.
Searching for "Yo Quiero" by Camila...
Done.
Searching for "De Mí" by Camila...
Done.
Searching for "No Veo la Hora" by Noel Schajris...
Done.
Searching for "Don't Lay Your Head" by Us The Duo...
Done.
Progress saved up to index 84
Searching for "Come Back" by Us The Duo...
Done.
Searching for "No Matter Where You Are" by Us The Duo...
Done.
Searching for "Poison & Wine" by The Civil Wars...
Done.
Searching for "Hold Back The River" by James Bay...
Error fetching lyrics for index 88: Request timed out:
HTTPSConnectionPool(hos

KeyboardInterrupt: 

In [None]:
for i in range(len(spotify_data)-1):
    song = genius.search_song(spotify_data['track_name'][i], spotify_data['artist_name'][i])
    spotify_data.loc[i, 'lyrics'] = song.lyrics if song else 'Lyrics not found'

In [None]:
# Function to fetch artist name and track name
def fetch_tracks_details(track_uris):
    try:
        tracks_info = sp.tracks(track_uris)
        results = []
        for track in tracks_info['tracks']:
            if track:  # Check if track details are successfully retrieved
                artist_name = track['artists'][0]['name'] if track['artists'] else 'Unknown'
                track_name = track['name'] if track else 'Unknown'
                results.append((artist_name, track_name))
            else:
                results.append(('Unknown', 'Unknown'))
        return results
    except Exception as e:
        print(f"Error processing batch: {str(e)}")
        return [('Unknown', 'Unknown')] * len(track_uris)

# Apply batch processing
batch_size = 50
artist_names = []
track_names = []

for i in range(0, len(df_lyrics['track_uri']), batch_size):
    batch_uris = df_lyrics['track_uri'][i:i + batch_size].tolist()
    batch_results = fetch_tracks_details(batch_uris)
    artist_names.extend([res[0] for res in batch_results])
    track_names.extend([res[1] for res in batch_results])

df_lyrics['alb_name'] = artist_names
df_lyrics['trk_name'] = track_names

# Save the updated DataFrame
df_lyrics.to_csv('df_lyrics_updated.csv', index=False)

In [6]:
df2

Unnamed: 0,track_uri,artist_name,track_name
0,spotify:track:66U0ASk1VHZsqIkpMjKX3B,AronChupa,Little Swing
1,spotify:track:5MhsZlmKJG6X5kTHkdwC4B,AronChupa,I'm an Albatraoz
2,spotify:track:0GZoB8h0kqXn7XFm4Sj06k,Lorde,Yellow Flicker Beat - From The Hunger Games: M...
3,spotify:track:35kahykNu00FPysz3C2euR,Lorde,White Teeth Teens
4,spotify:track:3G6hD9B2ZHOsgf4WfNu7X1,Lorde,Team
5,spotify:track:6WQLkih8nE0JdUCEyLaGnQ,Alesso,Heroes (we could be)
6,spotify:track:37sINbJZcFdHFAsVNsPq1i,The Script,Superheroes
7,spotify:track:0yhPEz5KxlDwckGJaMlZqM,Fall Out Boy,Centuries
8,spotify:track:5j9iuo3tMmQIfnEEQOOjxh,American Authors,Best Day Of My Life
9,spotify:track:4eLSCSELtKxZwXnFbNLXT5,Imagine Dragons,On Top Of The World


In [None]:
from lyricsgenius import Genius

genius = Genius(g6Ycf22ZvPBoLt_IARCcp8_DUVzU0EHrGdgOKV_v9yqFutnG2F3HBoR1UJcoFDal)
genius.search_lyrics(df2['artist_name'], df2['track_name'])


#### Splitting File into multiple files of fixed row size

In [None]:
rows_per_file = 3000  # Number of rows per file

# Calculate how many files will be needed
num_files = len(df_lyrics) // rows_per_file + (1 if len(df_lyrics) % rows_per_file != 0 else 0)

# Split and save the DataFrame into multiple CSV files
for i in range(num_files):
    start_row = i * rows_per_file
    end_row = start_row + rows_per_file
    # Create a new CSV file for each chunk
    df_lyrics[start_row:end_row].to_csv(f'output_file_{i + 1}.csv', index=False)

#### Fetching Lyrics from Genius API

In [None]:
import dotenv
from dotenv import load_dotenv
import os
import lyricsgenius

load_dotenv()
token = os.getenv("genius_token")
genius = lyricsgenius.Genius(token)
genius.remove_section_headers = True # Remove section headers from lyrics when searching
genius.timeout = 60

In [None]:
# Fetching lyrics via API
for i in range(len(df_lyrics)-66233):
    df_lyrics.loc[i, 'lyrics'] = genius.search_song(df_lyrics['track_name'][i], df_lyrics['artist_name'][i])

In [None]:
for i in range(len(df_lyrics)-1):
    song = genius.search_song(df_lyrics['track_name'][i], df_lyrics['artist_name'][i])
    df_lyrics.loc[i, 'lyrics'] = song.lyrics if song else 'Lyrics not found'

In [None]:
df_lyrics['lyrics'][7]

In [None]:
print("131 ContributorsTranslationsEspañolFrançaisItalianoPortuguêsTeam Lyrics\nWait till you're announced\nWe've not yet lost all our graces\nThe hounds will stay in chains\nLook upon Your Greatness and she'll\nSend the call out, send the call out\nSend the call out, send the call out\nSend the call out, send the call out\nSend the call out, send the call out\nSend the call out, send the call out\nSend the call out, send the call out\nSend the call out, send the call out\nSend the call out, send the call out\n\nCall all the ladies out\nThey're in their finery\nA hundred jewels on throats\nA hundred jewels between teeth\nNow bring my boys in\nTheir skin in craters like the moon\nThe moon we love like a brother\nWhile he glows through the room\n\nDancin' around the lies we tell\nDancin' around big eyes, as well\nEven the comatose\nThey don't dance and tell\nYou might also like\nWe live in cities you'll never see on-screen\nNot very pretty, but we sure know how to run things\nLivin' in ruins of a palace within my dreams\nAnd you know, we're on each other's team\n\nI'm kind of over gettin' told to throw my hands up in the air\nSo there\n\nSo all the cups got broke\nShards beneath our feet\nBut it wasn't my fault\nAnd everyone's competing\nFor a love they won't receive\n'Cause what this palace wants is release\n\nWe live in cities you'll never see on-screen\nNot very pretty, but we sure know how to run things\nLivin' in ruins of a palace within my dreams\nAnd you know, we're on each other's team\n\nI'm kind of over gettin' told to throw my hands up in the air\nSo there\nI'm kind of older than I was when I reveled without a care\nSo there\nWe live in cities you'll never see on-screen\nNot very pretty, but we sure know how to run things\nLivin' in ruins of a palace within my dreams\nAnd you know, we're on each other's team\n\nWe're on each other's team\nAnd you know, we're on each other's team\nWe're on each other's team\nAnd you know, and you know, and you know330Embed")

In [None]:
df_lyrics

In [None]:
df_lyrics['lyrics'].isna().value_counts()

In [None]:
import lyricsgenius

# Initialize Genius API
genius = lyricsgenius.Genius('put_your_token_here')

genius.remove_section_headers = True

# Spotify Global Top 50 playlist ID
playlist_id = '37i9dQZEVXbMDoHDwVN2tF'  # Example ID, update as necessary

results = sp.playlist_tracks(playlist_id)
tracks_data = []

# Fetch track details from Spotify
for item in results['items']:
    track = item['track']
    track_info = {
        'Name': track['name'],
        'Artist': track['artists'][0]['name'] if track['artists'] else 'Unknown',
        'Album': track['album']['name'] if track['album'] else 'Unknown',
        'Release Date': track['album']['release_date'] if track['album'] else 'Unknown',
        'Popularity': track['popularity']
    }
    # Fetch lyrics using Genius
    song = genius.search_song(track_info['Name'], track_info['Artist'])
    track_info['Lyrics'] = song.lyrics if song else 'Lyrics not found'
    
    tracks_data.append(track_info)

In [None]:
# Initialize the lyrics column if it doesn't exist
if 'lyrics' not in df_lyrics.columns:
    df_lyrics['lyrics'] = None

# Defining batch size for progress tracking
batch_size = 100
num_batches = (len(df_lyrics) + batch_size - 1) // batch_size  # Calculate total number of batches

# Processing each song in df_lyrics
for i in range(len(df_lyrics)):
    if pd.isna(df_lyrics.at[i, 'lyrics']):  # Check if lyrics already fetched to avoid refetching
        try:
            song = genius.search_song(df_lyrics.at[i, 'track_name'], df_lyrics.at[i, 'artist_name'])
            df_lyrics.at[i, 'lyrics'] = song.lyrics if song else 'Lyrics not found'
        except Exception as e:
            print(f"Error fetching lyrics for row {i}: {e}")
            df_lyrics.at[i, 'lyrics'] = 'Error fetching lyrics'
    # Checking print progress
    if i % batch_size == 0 or i == len(df_lyrics) - 1:
        print(f"Processed {i+1}/{len(df_lyrics)} songs, Batch {i // batch_size + 1}/{num_batches}")

# Saving complete DataFrame with lyrics to a new file
df_lyrics.to_csv('df_final.csv', index=False)


In [None]:
# Save DataFrame to CSV file
df_lyrics.to_csv('spotify_lyrics.csv', index=False)

In [None]:
df_lyrics.head()

In [None]:
'''import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Initialize spotipy with Spotify API credentials
client_id = '5093ad0ad45f435ca7ce43b2afd406b9'  # Replace with your client ID
client_secret = 'f151962f414144aa85494be81ccd8e20'  # Replace with your client secret
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)'''

# Function to fetch artist name and track name
def fetch_tracks_details(track_uris):
    try:
        tracks_info = sp.tracks(track_uris)
        results = []
        for track in tracks_info['tracks']:
            if track:  # Check if track details are successfully retrieved
                artist_name = track['artists'][0]['name'] if track['artists'] else 'Unknown'
                track_name = track['name'] if track else 'Unknown'
                results.append((artist_name, track_name))
            else:
                results.append(('Unknown', 'Unknown'))
        return results
    except Exception as e:
        print(f"Error processing batch: {str(e)}")
        return [('Unknown', 'Unknown')] * len(track_uris)

# Apply batch processing
batch_size = 50
artist_names = []
track_names = []

for i in range(0, len(df_lyrics['track_uri']), batch_size):
    batch_uris = df_lyrics['track_uri'][i:i + batch_size].tolist()
    batch_results = fetch_tracks_details(batch_uris)
    artist_names.extend([res[0] for res in batch_results])
    track_names.extend([res[1] for res in batch_results])

df_lyrics['alb_name'] = artist_names
df_lyrics['trk_name'] = track_names

# Save the updated DataFrame
df_lyrics.to_csv('df_lyrics_updated.csv', index=False)