In [None]:
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials

In [29]:
df = pd.read_csv('../popular_songs_metadata.csv')

In [39]:
CLIENT_ID = ''
CLIENT_SECRET = ''
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret = CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [40]:
import base64
import requests

def get_access_token(client_id, client_secret):
    """Obtain the access token from Spotify."""
    endpoint = "https://accounts.spotify.com/api/token"
    encoded_credentials = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode()
    headers = {
        "Authorization": f"Basic {encoded_credentials}",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    payload = {
        "grant_type": "client_credentials"
    }

    response = requests.post(endpoint, headers=headers, data=payload)
    response_data = response.json()
    
    if response.status_code == 200:
        return response_data["access_token"]
    else:
        raise Exception("Failed to obtain access token")


def search_track(access_token, track_name, artist_name):
    """Search for a track on Spotify and return its first result."""
    query = f"track:{track_name} artist:{artist_name}"
    endpoint = f"https://api.spotify.com/v1/search?q={query}&type=track&limit=1"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    response = requests.get(endpoint, headers=headers)
    results = response.json()
    
    if results['tracks']['items']:
        return results['tracks']['items'][0]
    else:
        return None
    
def get_artist_genres(access_token, artist_id):
    """Get genres for a given artist."""
    endpoint = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    response = requests.get(endpoint, headers=headers)
    artist_info = response.json()
    
    return artist_info.get('genres', [])


def get_audio_features(access_token, track_id):
    """Get audio features for a given track."""
    endpoint = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    response = requests.get(endpoint, headers=headers)
    audio_features = response.json()
    
    # Depending on your needs, you might want to return only specific features
    return audio_features


import pandas as pd

def update_df_with_spotify_data(df, access_token):
    """Update DataFrame rows with Spotify data, including audio features and artist genres.
    Data is saved in chunks of 1000 rows."""
    chunk_size = 1000
    number_of_chunks = len(df) // chunk_size + (1 if len(df) % chunk_size > 0 else 0)

    for chunk in range(0, len(number_of_chunks)):  # Adjusted to skip the first two chunks
        start = chunk * chunk_size
        end = start + chunk_size
        current_chunk = df.iloc[start:end].copy()  # Make a copy for updates

        for local_index, (index, row) in enumerate(current_chunk.iterrows()):
            track_name = row['title']
            artist_name = row['artist_name']

            track_info = search_track(access_token, track_name, artist_name)
            if track_info:
                artist_id = track_info['artists'][0]['id']
                track_id = track_info['id']
                genres = get_artist_genres(access_token, artist_id)
                
                # Update genres in the chunk
                current_chunk.at[index, 'genres'] = ', '.join(genres)
                
                audio_features = get_audio_features(access_token, track_id)
                print(audio_features)
                
                # Update audio features in the chunk

                for key, value in audio_features.items():
                    current_chunk.at[index, key] = value 
                
                print(f"Updated row {index} with genres and audio features for '{track_name}' by '{artist_name}'.")
                current_chunk.loc[[index]]

            else:
                print(f"Track '{track_name}' by '{artist_name}' not found. Skipping.")
        
        chunk_filename = f"spotify_data_chunk_{chunk+1}.csv"
        current_chunk.to_csv(chunk_filename, index=False)
        print(f"Chunk {chunk+1} saved to {chunk_filename}.")






In [41]:
access_token = get_access_token(CLIENT_ID, CLIENT_SECRET)   
print(access_token)

BQA-_F-zSxhQ8-2be1eup_SFpFHNNXSPU4vQHCZYePDBBzM0xaViO3ox5srmWzluAxQZVoNC0wh58Cxf9JM89KHllI2pZkiekTj7-3VJkL1spx9CKpI


In [42]:

updated_df = update_df_with_spotify_data(df, access_token)

{'danceability': 0.694, 'energy': 0.637, 'key': 2, 'loudness': -7.555, 'mode': 0, 'speechiness': 0.0321, 'acousticness': 0.636, 'instrumentalness': 0.00413, 'liveness': 0.129, 'valence': 0.397, 'tempo': 136.007, 'type': 'audio_features', 'id': '2fbTbfyhKAb3sKySsTeOy9', 'uri': 'spotify:track:2fbTbfyhKAb3sKySsTeOy9', 'track_href': 'https://api.spotify.com/v1/tracks/2fbTbfyhKAb3sKySsTeOy9', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/2fbTbfyhKAb3sKySsTeOy9', 'duration_ms': 270773, 'time_signature': 4}
Updated row 0 with genres and audio features for 'Almaz' by 'Randy Crawford'.
{'danceability': 0.613, 'energy': 0.71, 'key': 5, 'loudness': -4.689, 'mode': 1, 'speechiness': 0.0304, 'acousticness': 0.295, 'instrumentalness': 5.2e-06, 'liveness': 0.12, 'valence': 0.401, 'tempo': 141.987, 'type': 'audio_features', 'id': '1xebBdGOBa4vg3a4PvPiar', 'uri': 'spotify:track:1xebBdGOBa4vg3a4PvPiar', 'track_href': 'https://api.spotify.com/v1/tracks/1xebBdGOBa4vg3a4PvPiar', 'analysis_url'

KeyError: 'tracks'