# Loading the libraries required

In [1]:
import requests
import base64
from bs4 import BeautifulSoup
import re
import pandas as pd

# Spotify credentials and access token

In [None]:
# Spotify API credentials
CLIENT_ID = '' #Spotify API client ID: deleted because this will be on github public
CLIENT_SECRET = '' #Spotify API client secret : deleted because this will be on github public

In [None]:
# Getting an access token
def get_access_token(client_id, client_secret):
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_header = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode()
    headers = {
        'Authorization': f'Basic {auth_header}'
    }
    data = {
        'grant_type': 'client_credentials'
    }
    response = requests.post(auth_url, headers=headers, data=data)
    if response.status_code == 200:
        return response.json().get('access_token')
    else:
        print(f"Error getting access token: {response.status_code} - {response.text}")
        return None

# Spotify API Methods

In [None]:
# Searching for tracks from a specific year
def search_tracks(year, access_token):
    url = 'https://api.spotify.com/v1/search'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        'q': f'year:{year}',
        'limit': 50,
        'offset': 0,
        'type': 'track',  
        'market': 'US' 
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching tracks: {response.status_code} - {response.text}")
        return None

In [None]:
# Searching for the track ID by its name and its artist's name
def get_track_id(track_name, artist_name, access_token):
    url = 'https://api.spotify.com/v1/search'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        'q': f'track:{track_name} artist:{artist_name}', 
        'limit': 1,
        'type': 'track',  
        'market': 'US'  
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        # Parsing the response JSON
        search_results = response.json()
        # Getting the track ID of the first search result
        if 'tracks' in search_results and search_results['tracks']['items']:
            track_id = search_results['tracks']['items'][0]['id']
            return track_id
        else:
            return None
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None


In [None]:
# Searching for the artist ID by their name
def get_artist_id(artist_name, access_token):
    url = 'https://api.spotify.com/v1/search'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        'q': artist_name, 
        'limit': 1,
        'type': 'artist',  
        'market': 'US' 
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        # Parsing the response JSON
        search_results = response.json()
        # Getting the track ID of the first search result
        if 'artists' in search_results:
            artist_id = search_results['artists']['items'][0]['id']
            return artist_id
        else:
            return None  
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

In [None]:
def get_track_features(track_id, access_token):
    track_url = 'https://api.spotify.com/v1/tracks/{track_id}'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    url = track_url.format(track_id=track_id)
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # Parsing the response JSON
        track_data = response.json()
        # Extracting features and storing them in a dictionary
        track_info = {
        'track_name': track_data['name'],
        'main_artist': track_data['artists'][0]['name'], 
        'all_artists': [],  
        'popularity': track_data['popularity'],
        'duration_ms': track_data['duration_ms'],
        'explicit': track_data['explicit'],
        'release_date': track_data['album']['release_date']
        }
        # to get all the artists featured on the song
        for artist in track_data['artists']:
            name=artist['name']
            track_info['all_artists'].append(name)

        return track_info

    else:
        print(f"Error: {response.status_code}, {response.text}")
        return 0

In [None]:
def get_artist_features(artist_id, access_token):
    url = 'https://api.spotify.com/v1/artists/{artist_id}'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    url = url.format(artist_id=artist_id)
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # Parsing the response JSON
        artist_data = response.json()
        # Extracting features and storing them in a dictionary
        artist_info = {
        'artist_name': artist_data['name'],
        'genres': artist_data['genres'],
        'popularity': artist_data['popularity'],
        'followers': artist_data['followers']['total'],
        }
        return artist_info

    else:
        print(f"Error: {response.status_code}, {response.text}")
        return 0

### This part does not work because of spotify's latest update 

In [None]:
def get_audio_features(track_id, access_token):
    track_url = 'https://api.spotify.com/v1/audio-features/{track_id}'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    url = track_url.format(track_id=track_id)
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        # Parsing the response JSON
        audio_features = response.json()
        print("response is: ", response)

        audio_info = {
        'danceability': audio_features['danceability'],
        'energy': audio_features['energy'], 
        'loundness': audio_features['loudness'],
        'accousticness': audio_features['acousticness'],
        'instrumentalness': audio_features['instrumentalness'],
        'liveness': audio_features['liveness'],
        'tempo': audio_features['tempo'],
        'valence': audio_features['valence'],
        'key': audio_features['key'],
        'mode': audio_features['mode'],
        'speechiness': audio_features['speechiness'],
        'time_signature': audio_features['time_signature']
        }

        
        return audio_info
    else:
        print(f"Error: {response.status_code}, {response.text}")  
        return 0  

In [None]:
# Processing and display the results
def display_tracks(data):
    if data and 'tracks' in data:
        tracks = data['tracks']['items']
        for track in tracks:
            name = track['name']
            artist = track['artists'][0]['name']
            album = track['album']['name']
            print(f"Track: {name}, Artist: {artist}, Album: {album}")
    else:
        print("No tracks found.")

# Genius API

In [None]:
# Access token

access_token= "" # deleted because this will be on github public
base_url = "https://api.genius.com"

In [3]:
# Getting lyrics from genius API using song name and artist name
def search_track_genius(track_name, artist_name):
    search_url = base_url + "/search"
    query = f"{track_name} {artist_name}" 
    params = {'q': query}
    headers = {'Authorization': 'Bearer ' + access_token}
    
    response = requests.get(search_url, params=params, headers=headers)
    
    if response.status_code == 200:
        if response.json()['response']['hits']==[]:
            return None
        else:
            return response.json()['response']['hits'][0]['result']['id']
    else:
        return None

def get_lyrics_url(track_id):
    song_url = f"{base_url}/songs/{track_id}"
    headers = {'Authorization': 'Bearer ' + access_token}
    
    response = requests.get(song_url, headers=headers)
    
    if response.status_code == 200:
        song_info = response.json()['response']['song']
        lyrics_path = song_info['path']
        return "https://genius.com" + lyrics_path
    else:
        print("Error fetching song details:", response.status_code)
        return None

import time

def get_lyrics(lyrics_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
    }
    if lyrics_url:
        response = requests.get(lyrics_url)
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')        

            lyrics_div = soup.find_all('div', {'data-lyrics-container': 'true'})
            
            if lyrics_div:
                lyrics=""
                for ly in lyrics_div:
                    lyr=ly.get_text(separator=" ")
                    # print("\n"+lyr)
                    lyr=lyr.strip()
                    lyrics=lyrics+" "+lyr
                return lyrics

            else:
                print("Lyrics not found on the page.")
                return None
        else:
            print("Error fetching page content:", response.status_code)
            return None
    else: 
        return None    
    
def clean_lyrics(lyrics):
    if lyrics:
        result = re.sub(r'\[.*?\]', '', lyrics)
        return result.strip()
    else: 
        return "No Lyrics Found"
    


In [20]:
# trial of genius:

song_id=search_track_genius("Xanny", "Billie Eilish")
url=get_lyrics_url(song_id)
print(url)
lyrics=get_lyrics(url)
lyrics_cleaned=clean_lyrics(lyrics)
print(lyrics_cleaned)

https://genius.com/Billie-eilish-xanny-lyrics
What is it about them? I must be missing something They just keep doing nothing Too intoxicated to be scared Better off without them They're nothing but unstable Bring ashtrays to the table And that's about the only thing they share  I'm in their secondhand smoke Still just drinking canned Coke I don't need a Xanny to feel better On designated drives home Only one who's not stoned Don't give me a Xanny, now or ever  Can you check your Uber rating? Oh my god ( And it's like, wait, like, when? )  Wakin' up at sundown (Ooh) They're late to every party (Ooh) Nobody's ever sorry (Ooh) Too inebriated now to dance Morning as they come down (Come down) Their pretty heads are hurting (Hurting) They're awfully bad at learning (Learning) Make the same mistakes, blame circumstance  I'm in their secondhand smoke Still just drinking canned Coke I don't need a Xanny to feel better On designated drives home Only one who's not stoned Don't give me a Xanny, 

In [None]:
def main():
    # Getting access token
    access_token = get_access_token(CLIENT_ID, CLIENT_SECRET)
    if not access_token:
        return

    # Searching for tracks from the year 2017
    year = 2017
    data = search_tracks(year, access_token)
    if data:
        display_tracks(data)

In [None]:
main()

# Extracting the lyrics for the hit Billboard songs using Genius API

In [23]:
hit_df = pd.read_csv("Billboard_data.csv")
hit_df.head()

Unnamed: 0,song,artist,peak_position,weeks_on_chart,date_of_charting
0,Empire State Of Mind (Part II) Broken Down,Alicia Keys,55,1,1/2/2010
1,On Fire,Lil Wayne,62,2,1/2/2010
2,Elevator,Eminem,67,1,1/2/2010
3,Hell Breaks Loose,Eminem Featuring Dr. Dre,29,1,1/2/2010
4,I Get It In,Omarion Featuring Gucci Mane,83,7,1/2/2010


In [None]:
hit_df["song_id"] = hit_df.apply(lambda row: search_track_genius(row["song"], row["artist"]), axis=1)
hit_df["lyrics_url"]= hit_df.apply(lambda row: get_lyrics_url(row["song_id"]), axis=1)
hit_df["lyrics_not_cleaned"]= hit_df.apply(lambda row: get_lyrics(row["lyrics_url"]), axis=1)
hit_df["lyrics_cleaned"]= hit_df.apply(lambda row: clean_lyrics(row["lyrics_not_cleaned"]), axis=1)

In [None]:
hit_df.to_csv("Billboard_data_lyrics.csv", index=True)

In [25]:
hit_df

Unnamed: 0,song,artist,peak_position,weeks_on_chart,date_of_charting,song_id,lyrics_url,lyrics_not_cleaned,lyrics_cleaned
6500,Good Girl Gone Missin',Morgan Wallen,69,1,3/18/2023,8779730,https://genius.com/Morgan-wallen-good-girl-gon...,"[Verse 1] Bar lights, bar stool Too many, one...","Bar lights, bar stool Too many, one fool Long ..."
6501,Me To Me,Morgan Wallen,71,1,3/18/2023,8779717,https://genius.com/Morgan-wallen-me-to-me-lyrics,[Verse 1] Say you want a little wild in your ...,Say you want a little wild in your Friday Kind...
6502,Last Drive Down Main,Morgan Wallen,65,1,3/18/2023,8779716,https://genius.com/Morgan-wallen-last-drive-do...,[Chorus] Takin' one last drive down Main Migh...,Takin' one last drive down Main Might as well ...
6503,Had It,Morgan Wallen,75,1,3/18/2023,8779728,https://genius.com/Morgan-wallen-had-it-lyrics,"[Verse 1] Glass on the floor, tears in your e...","Glass on the floor, tears in your eyes Sheets ..."
6504,Outlook,Morgan Wallen,76,1,3/18/2023,8779731,https://genius.com/Morgan-wallen-outlook-lyrics,[Verse 1] I used to think that it was me agai...,I used to think that it was me against the wor...
...,...,...,...,...,...,...,...,...,...
6795,Mourning,Post Malone,36,13,8/26/2023,9123773,https://genius.com/Post-malone-mourning-lyrics,[Chorus] Don't wanna sober up The sun is kill...,Don't wanna sober up The sun is killin' my buz...
6796,Blank Space,Taylor Swift,1,38,8/26/2023,542389,https://genius.com/Taylor-swift-blank-space-ly...,"[Verse 1] Nice to meet you, where you been? I...","Nice to meet you, where you been? I could show..."
6797,Aint Gotta Dollar,Oliver Anthony Music,82,2,9/2/2023,9419738,https://genius.com/Oliver-anthony-music-aint-g...,Well I ain't gotta dollar But I don't need a ...,Well I ain't gotta dollar But I don't need a d...
6798,Next Thing You Know,Jordan Davis,23,31,9/2/2023,8285882,https://genius.com/Jordan-davis-next-thing-you...,[Verse 1] You swear that you're stayin' singl...,"You swear that you're stayin' single, next thi..."


NOTE: genre of track is not available. only the genres of the artist who sang the track

low level (audio details): 
danceability, energy, loudness, acousticness, instrumentalness, tempo, valence, speechiness