In [1]:
!pip install spotipy

Collecting spotipy
  Downloading spotipy-2.24.0-py3-none-any.whl.metadata (4.9 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.2.0-py3-none-any.whl.metadata (9.1 kB)
Downloading spotipy-2.24.0-py3-none-any.whl (30 kB)
Downloading redis-5.2.0-py3-none-any.whl (261 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m945.1 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis, spotipy
Successfully installed redis-5.2.0 spotipy-2.24.0


In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time

# Replace these with your own Spotify API credentials
client_id = '8042e694381d4b16acf8492c7d42c95b'
client_secret = 'be5a6280c6b04f0bb5d9a77c87f48524'

# Authenticate with Spotify
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

# Function to search for tracks and collect data by genre
def search_tracks_by_genre(year, genre, country_code='IN', limit=50, offset=0):
    query = f'year:{year} genre:"{genre}"'
    results = sp.search(q=query, type='track', market=country_code, limit=limit, offset=offset)
    return results['tracks']['items']

# Function to collect audio features for a list of track IDs
def get_audio_features(track_ids):
    audio_features = []
    for i in range(0, len(track_ids), 50):  # Batch requests in groups of 50
        batch_features = sp.audio_features(track_ids[i:i+50])
        audio_features.extend(batch_features)
        time.sleep(0.1)
    return audio_features

# Genres to search for
genres = [
     'carnatic', 'bhangra', 'rap', 'jain bhajan',
    'odia pop', 'mantra', 'hare krishna', 'hindustani instrumental',
     'sandalwood', 'tamil pop', 'punjabi pop',
    'tamil devotional', 'ghazal', 'classic kollywood',
    'modern bollywood', 'bhajan', 'pop', 'classic bollywood', 'kollywood',
    'classic bhangra', 'carnatic vocal', 'filmi', 'indian classical', 'tollywood',
     'hindustani classical', 'hip hop', 'sufi',
    'chutney', 'rock', 'lata'
]

# Collect tracks data for the years 2023 and 2024 for each genre
def collect_tracks_by_genre_and_year(years, genres, max_iterations=20):

    tracks_data = []
    for year in years:
        for genre in genres:
            print(f"Collecting tracks for year {year}, genre '{genre}'...")
            for i in range(max_iterations):
                print(f"Fetching batch {i+1} for {year}, genre '{genre}'")
                offset = i * 50  # Offset increases by 50 for each iteration to fetch the next set of records

                # Check if offset exceeds the Spotify API limit
                if offset >= 1000:
                    print(f"Reached maximum offset for {year}, genre '{genre}'. Stopping further requests.")
                    break  # Exit the inner loop if offset limit is reached

                tracks = search_tracks_by_genre(year, genre, offset=offset)
                track_ids = [track['id'] for track in tracks]

                # If no more tracks are returned, break the loop for this genre and year
                if not track_ids:
                    break

                # Fetch audio features for these tracks
                audio_features = get_audio_features(track_ids)

                # Combine track info with audio features
                for track, features in zip(tracks, audio_features):
                    if features:  # Sometimes audio features may be None
                        track_info = {
                            'track_name': track['name'],
                            'artist': ', '.join([artist['name'] for artist in track['artists']]),
                            'album': track['album']['name'],
                            'release_date': track['album']['release_date'],
                            'popularity': track['popularity'],
                            'track_id': track['id'],
                            'year': year,
                            'genre': genre,
                            # Audio features
                            'danceability': features['danceability'],
                            'energy': features['energy'],
                            'key': features['key'],
                            'loudness': features['loudness'],
                            'mode': features['mode'],
                            'speechiness': features['speechiness'],
                            'acousticness': features['acousticness'],
                            'instrumentalness': features['instrumentalness'],
                            'liveness': features['liveness'],
                            'valence': features['valence'],
                            'tempo': features['tempo'],
                            'duration_ms': features['duration_ms']
                        }
                        tracks_data.append(track_info)
                time.sleep(0.5)  # Small delay to avoid hitting API rate limits
    return tracks_data

# Collect the data for years 2023 and 2024
years = [2023,2024]
tracks_data = collect_tracks_by_genre_and_year(years, genres, max_iterations=20)

# Convert to DataFrame
df = pd.DataFrame(tracks_data)

# Save to CSV
df.to_csv('indian_tracks_2023_2024_with_features.csv', index=False)
print("Data saved to indian_tracks_2023_2024_with_features.csv")