In [None]:
from dotenv import load_dotenv
import os
import requests
# Load environment variables from .env file
load_dotenv()

# Access the environment variables
spotify_id = os.getenv('SPOTIFY_ID')
api_secret = os.getenv('API_SECRET')

In [None]:
# Fetch access token
auth_response = requests.post(
    'https://accounts.spotify.com/api/token',
    data={
        'grant_type': 'client_credentials',
        'client_id': spotify_id,
        'client_secret': api_secret,
    }
)
auth_response_data = auth_response.json()
access_token = auth_response_data['access_token']



In [None]:
# Fetch playlists
headers = {
    'Authorization': f'Bearer {access_token}'
}
response = requests.get('https://api.spotify.com/v1/users/jjrryyaa/playlists', headers=headers)
playlists = response.json()['items']

# Create a map of playlist names to tracks
playlist_tracks = {}
playlist_artists = {}
playlist_artist_ids = {}

for playlist in playlists:
    playlist_name = playlist['name']
    playlist_id = playlist['id']
    tracks = []
    offset = 0
    limit = 45
    while True:
        tracks_response = requests.get(f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks?limit={limit}&offset={offset}', headers=headers)
        tracks_page = tracks_response.json()['items']
        if not tracks_page:
            break
        tracks.extend(tracks_page)
        offset += limit
    track_names = [track['track']['name'] for track in tracks]
    artists_names = [artist['name'] for track in tracks for artist in track['track']['artists']]
    artist_id = [artist['id'] for track in tracks for artist in track['track']['artists']]
    playlist_tracks[playlist_name] = track_names
    playlist_artists[playlist_name] = artists_names
    playlist_artist_ids[playlist_name] = artist_id


In [None]:
# Flatten the map of artists and query the genre for each artist
from itertools import chain 

# Flatten the list of artists
all_artists = list(chain.from_iterable(playlist_artists.values()))
all_ids = list(chain.from_iterable(playlist_artist_ids.values()))
artist_id_map = dict(zip(all_artists, all_ids))

# Query the genre for each artist# Query the genre for each artist
artist_genres = {}
for i in range(0, len(artist_id_map), 45):
    batch = list(artist_id_map.items())[i:i+45]
    artist_ids = ','.join([artist_id for _, artist_id in batch])
    artist_response = requests.get(f'https://api.spotify.com/v1/artists?ids={artist_ids}', headers=headers)
    artist_data = artist_response.json()['artists']
    for artist in artist_data:
        artist_genres[artist['name']] = artist.get('genres', [])



In [None]:
# Create a map of if any genre's names are a subsection of another
genre_map = {}
for artist, genres in artist_genres.items():
    for genre in genres:
        for other_genre in genres:
            if genre != other_genre and genre in other_genre:
                if other_genre not in genre_map:
                    genre_map[other_genre] = set()
                genre_map[other_genre].add(genre)


In [None]:
# Calculate the percentage genre for each playlist
playlist_genre_percentage = {}
for playlist_name, artists in playlist_artists.items():
    genre_count = {}
    for artist in artists:
        genres = artist_genres.get(artist, [])
        for genre in genres:
            # Include sub-genres from the genre map
            if len(genre_map.get(genre, [])) > 0:
                for sub_genre in genre_map.get(genre, []):
                    if not sub_genre in genres:
                        genres.append(sub_genre)
        for genre in genres:
            genre_count[genre] = genre_count.get(genre, 0) + 1
    total_artists = len(artists)
    genre_percentage = {genre: (count / total_artists) * 100 for genre, count in genre_count.items()}
    playlist_genre_percentage[playlist_name] = genre_percentage

print(playlist_genre_percentage)

In [None]:
# Add visualization for the percentage genre for each playlist
import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud

for playlist_name, genre_percentage in playlist_genre_percentage.items():
    wordcloud = WordCloud(width=800, height=400, background_color ='white').generate_from_frequencies(genre_percentage)
    plt.figure(figsize=(8, 4))
    plt.imshow(wordcloud)
    plt.axis('off')
    plt.title(playlist_name)
    plt.show()