<a href="https://colab.research.google.com/github/DivamSanghvi/MoodMusic-Webscraping/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install spotipy


Collecting spotipy
  Downloading spotipy-2.24.0-py3-none-any.whl.metadata (4.9 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.1.0-py3-none-any.whl.metadata (9.1 kB)
Downloading spotipy-2.24.0-py3-none-any.whl (30 kB)
Downloading redis-5.1.0-py3-none-any.whl (261 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.2/261.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis, spotipy
Successfully installed redis-5.1.0 spotipy-2.24.0


In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import os
import time
from spotipy.exceptions import SpotifyException

# Spotify API credentials
client_id = ''
client_secret = ''

# Authentication
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Function to get top tracks of an artist
def get_artist_top_tracks(artist_name):
    try:
        results = sp.search(q='artist:' + artist_name, type='artist')
        items = results['artists']['items']
        if len(items) > 0:
            artist = items[0]
            artist_id = artist['id']
            top_tracks = sp.artist_top_tracks(artist_id)
            return top_tracks['tracks'], artist['genres']  # Returning artist genres as well
    except Exception as e:
        print(f"Error fetching top tracks for {artist_name}: {e}")
    return [], []

# Function to get audio features for multiple tracks
def get_tracks_features(track_ids):
    while True:
        try:
            features = sp.audio_features(track_ids)
            return features
        except SpotifyException as e:
            if e.http_status == 429:  # Rate limit exceeded
                print("Rate limit exceeded. Retrying after 15 seconds...")
                time.sleep(15)  # Increased wait time to 15 seconds
            else:
                print(f"Error fetching track features: {e}")
                return None  # Return None if any error occurs

# Function to validate track features and return a default value if missing
def validate_feature(feature, default_value=0):
    return feature if feature is not None else default_value

# Function to scrape data for an artist
def scrape_artist_data(artist_name):
    tracks, genres = get_artist_top_tracks(artist_name)
    data = []
    track_ids = [track['id'] for track in tracks]

    if track_ids:
        # Fetch features for all tracks at once
        features_list = get_tracks_features(track_ids)

        if features_list is not None:
            for track, features in zip(tracks, features_list):
                if features:  # Check if features exist
                    album = track['album']  # Getting album info
                    data.append({
                        'song_id': track['id'],
                        'artist_name': artist_name,
                        'track_name': track['name'],
                        'album_name': album['name'],  # Album name
                        'album_release_date': album['release_date'],  # Release date
                        'genres': ', '.join(genres) if genres else 'Unknown',  # List of genres, default to 'Unknown'
                        'valence': validate_feature(features.get('valence')),
                        'energy': validate_feature(features.get('energy')),
                        'danceability': validate_feature(features.get('danceability')),
                        'loudness': validate_feature(features.get('loudness')),
                        'tempo': validate_feature(features.get('tempo')),
                        'speechiness': validate_feature(features.get('speechiness')),
                        'instrumentalness': validate_feature(features.get('instrumentalness')),
                        'popularity': validate_feature(track.get('popularity'))
                    })
    return data

# Function to scrape the top artists from a playlist or a generic search (to mimic top 1000 artists)
def get_top_artists(limit=1000):
    all_artists = []
    offset = 0
    while len(all_artists) < limit:
        try:
            results = sp.search(q="genre:pop", type="artist", limit=50, offset=offset)  # Get 50 artists at a time
            artists = results['artists']['items']
            if not artists:
                break  # Stop if no more artists are found
            all_artists.extend([artist['name'] for artist in artists])
            offset += 50  # Move to the next batch
            time.sleep(1)  # Delay to avoid hitting the rate limit
        except Exception as e:
            print(f"Error fetching top artists: {e}")
            break
    return all_artists[:limit]  # Return only up to the requested limit

# Function to save data to a CSV file
def save_data_to_csv(data, filename='spotify_data.csv'):
    df = pd.DataFrame(data)
    file_exists = os.path.isfile(filename)
    # Write to CSV (append if exists)
    df.to_csv(filename, mode='a', header=not file_exists, index=False)
    print(f"Data saved to {filename}")

# Main function
def main():
    artists = get_top_artists(limit=1000)
    all_data = []
    try:
        # Scrape data for all artists
        for artist in artists:
            print(f"Scraping data for {artist}...")
            artist_data = scrape_artist_data(artist)
            all_data.extend(artist_data)

        # Save the final data
        save_data_to_csv(all_data)

    except Exception as e:
        # Save the data even if an error occurs
        print(f"Error occurred: {e}. Saving the data scraped until now.")
        save_data_to_csv(all_data)

if __name__ == "__main__":
    main()


Scraping data for Taylor Swift...
Scraping data for Drake...
Scraping data for Sabrina Carpenter...
Scraping data for Post Malone...
Scraping data for The Weeknd...
Scraping data for Billie Eilish...
Scraping data for Chappell Roan...
Scraping data for SZA...
Scraping data for Bruno Mars...
Scraping data for Noah Kahan...
Scraping data for Lana Del Rey...
Scraping data for Rihanna...
Scraping data for Ariana Grande...
Scraping data for Lil Wayne...
Scraping data for Charli xcx...
Scraping data for Lady Gaga...
Scraping data for Hozier...
Scraping data for Olivia Rodrigo...
Scraping data for Nicki Minaj...
Scraping data for Justin Bieber...
Scraping data for Feid...
Scraping data for Coldplay...
Scraping data for Rauw Alejandro...
Scraping data for Jelly Roll...
Scraping data for Beyoncé...
Scraping data for Doja Cat...
Scraping data for Kali Uchis...
Scraping data for Benson Boone...
Scraping data for Imagine Dragons...
Scraping data for Ty Dolla $ign...
Scraping data for David Guetta.