In [40]:
import pandas as pd

In [41]:
df = pd.read_csv('../popular_songs_metadata.csv')

In [42]:
CLIENT_ID = 'c69dcc8b88574693bb9a8f67269450b3'
CLIENT_SECRET = 'a2b4ab0e224e4863aeab704bd92fdc08'
REDIRECT_URI = 'https://localhost:8080/callback/'

In [43]:
import base64
import requests

def get_access_token(client_id, client_secret):
    """Obtain the access token from Spotify."""
    endpoint = "https://accounts.spotify.com/api/token"
    encoded_credentials = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode()
    headers = {
        "Authorization": f"Basic {encoded_credentials}",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    payload = {
        "grant_type": "client_credentials"
    }

    response = requests.post(endpoint, headers=headers, data=payload)
    response_data = response.json()
    
    if response.status_code == 200:
        return response_data["access_token"]
    else:
        raise Exception("Failed to obtain access token")


def search_track(access_token, track_name, artist_name):
    """Search for a track on Spotify and return its first result."""
    query = f"track:{track_name} artist:{artist_name}"
    endpoint = f"https://api.spotify.com/v1/search?q={query}&type=track&limit=1"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    response = requests.get(endpoint, headers=headers)
    results = response.json()
    
    if results['tracks']['items']:
        return results['tracks']['items'][0]
    else:
        return None
    
def get_artist_genres(access_token, artist_id):
    """Get genres for a given artist."""
    endpoint = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    response = requests.get(endpoint, headers=headers)
    artist_info = response.json()
    
    return artist_info.get('genres', [])


def get_audio_features(access_token, track_id):
    """Get audio features for a given track."""
    endpoint = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    response = requests.get(endpoint, headers=headers)
    audio_features = response.json()
    
    # Depending on your needs, you might want to return only specific features
    return audio_features

def update_df_with_spotify_data(df, access_token):
    """Update DataFrame rows with Spotify data, including audio features and artist genres.
    Saves the DataFrame to a CSV file every 1000 lines."""
    for index, row in df.iterrows():
        track_name = row['title']
        artist_name = row['artist_name']

        track_info = search_track(access_token, track_name, artist_name)
        if track_info:
            artist_id = track_info['artists'][0]['id']
            track_id = track_info['id']  # Get the track's ID for audio features
            genres = get_artist_genres(access_token, artist_id)
            
            # Save genres as a comma-separated string
            df.at[index, 'genres'] = ', '.join(genres)
            
            audio_features = get_audio_features(access_token, track_id)
            # Save requested audio features
            df.at[index, 'key'] = audio_features.get('key')
            df.at[index, 'loudness'] = audio_features.get('loudness')
            df.at[index, 'mode'] = audio_features.get('mode')
            df.at[index, 'speechiness'] = audio_features.get('speechiness')
            df.at[index, 'acousticness'] = audio_features.get('acousticness')
            df.at[index, 'instrumentalness'] = audio_features.get('instrumentalness')
            df.at[index, 'liveness'] = audio_features.get('liveness')
            df.at[index, 'valence'] = audio_features.get('valence')
            df.at[index, 'tempo'] = audio_features.get('tempo')
            
            print(f"Updated row {index + 1} with genres and audio features for '{track_name}' by '{artist_name}'.")

                # Save the DataFrame to a CSV file every 1000 lines
            if (index + 1) % 1000 == 0:
                backup_filename = f"spotify_data_api_{index}.csv"
                df.to_csv(backup_filename, index=False)
                print(f"DataFrame saved to {backup_filename} after {index + 1} rows.")
        else:
            print(f"Track '{track_name}' by '{artist_name}' not found. Skipping.")

    # Final save to ensure any remaining changes are also persisted
    final_filename = "spotify_data_final_2.csv"
    df.to_csv(final_filename, index=False)
    print(f"Final DataFrame saved to {final_filename}.")

    return df


In [44]:
access_token = get_access_token(CLIENT_ID, CLIENT_SECRET)   
print(access_token)

BQBFOQKWdMiAP4qZrZFNZX7XN0bL1txUWYgtetimjbxE1RhLpRyFmsfVFPWKuboNMp91O-oIvSMzILnZ7-GgzxxzTp-r3eKlNhN0Wy3BCcWbi9pa2XE


In [45]:

updated_df = update_df_with_spotify_data(df, access_token)




Updated row 1 with genres and audio features for 'XRDS' by 'Covenant'.
Updated row 2 with genres and audio features for 'Orgelblut' by 'Bohren & Der Club Of Gore'.
Updated row 3 with genres and audio features for 'Orgelblut' by 'Bohren & Der Club Of Gore'.
Updated row 4 with genres and audio features for 'Orgelblut' by 'Bohren & Der Club Of Gore'.
Updated row 5 with genres and audio features for 'Orgelblut' by 'Bohren & Der Club Of Gore'.
Updated row 6 with genres and audio features for 'Orgelblut' by 'Bohren & Der Club Of Gore'.


KeyboardInterrupt: 