In [None]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
import re

# Set your Spotify API credentials
client_id = '59cc93dd59de457cab13e322a6f8040d'
client_secret = '9b846b485e684a4e857a9f8f9bfdacb4'

# Initialize Spotify client
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Function to extract Spotify track ID from the URL or URI
def extract_track_id(spotify_url):
    if pd.isna(spotify_url):
        return None
    match = re.search(r'spotify:track:([a-zA-Z0-9]+)', spotify_url)
    if match:
        return match.group(1)
    return None

# Function to get audio features for a track
def get_audio_features(track_id):
    try:
        if track_id is None:
            return None
        
        features = sp.audio_features(track_id)[0]
        if features is None:
            return None
            
        return {
            'danceability': features['danceability'],
            'energy': features['energy'],
            'key': features['key'],
            'loudness': features['loudness'],
            'mode': features['mode'],
            'speechiness': features['speechiness'],
            'acousticness': features['acousticness'],
            'instrumentalness': features['instrumentalness'],
            'liveness': features['liveness'],
            'valence': features['valence'],
            'tempo': features['tempo'],  # This is beats per minute (BPM)
            'duration_ms': features['duration_ms']
        }
    except Exception as e:
        print(f"Error fetching features for track {track_id}: {e}")
        return None

# Load your CSV file
# Based on your example, I'll try to determine the structure, but you may need to adjust this
df = pd.read_csv('/Users/rajatsharma/Documents/introspectAI/introspect.ai/combining-apple-spotify/final-dataset-apple-spotify/spotifyHeartRate.csv')

# Handle the unusual format of your data
# You might need to adjust this based on the actual structure of your CSV
if len(df.columns) == 1:
    # If all data is in a single column, we need to parse it
    # The column might have a complex name that includes all the headers
    column_name = df.columns[0]
    
    # Extract the URL part from each row
    df['spotify_url'] = df[column_name].str.extract(r'(spotify:track:[a-zA-Z0-9]+)')

# Extract track IDs
df['track_id'] = df['spotify_url'].apply(extract_track_id)

# Get audio features for each track
features_list = []
batch_size = 50  # Process in batches to avoid rate limiting

for i in range(0, len(df), batch_size):
    batch = df.iloc[i:i+batch_size]
    print(f"Processing batch {i//batch_size + 1} of {(len(df) + batch_size - 1)//batch_size}")
    
    for idx, row in batch.iterrows():
        track_id = row['track_id']
        features = get_audio_features(track_id)
        if features:
            features['track_id'] = track_id
            features['original_index'] = idx
            features_list.append(features)
    
    # Sleep to avoid hitting rate limits
    if i + batch_size < len(df):
        time.sleep(1)

# Create a dataframe with the audio features
features_df = pd.DataFrame(features_list)

# Merge with the original dataframe
result_df = pd.merge(df, features_df, on='track_id', how='left')

# Save the results
result_df.to_csv('spotify_tracks_with_features.csv', index=False)

print(f"Analysis complete. Processed {len(features_list)} tracks successfully.")