In [1]:
from stackapi import StackAPI
from bs4 import BeautifulSoup
import requests as re

from time import sleep
from random import randint
import pandas as pd

from datetime import datetime, timedelta

In [2]:
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = os.environ.get("SPOTIPY_CLIENT_ID")
client_secret = os.environ.get("SPOTIPY_CLIENT_SECRET")

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

#### Find top song per genre

**!** Spotify doesn't expose genres for tracks, the closest you can get is to check the genre of the album the track is on. Genre is however only part of the full album object, and not the simple album object. To get the album's genre, first requesting the Track metadata to get the album URI, and continue to request Album metadata. You can also get an artist's genres by calling the Artist metadata endpoint.

In [3]:
sp.recommendation_genre_seeds()

{'genres': ['acoustic',
  'afrobeat',
  'alt-rock',
  'alternative',
  'ambient',
  'anime',
  'black-metal',
  'bluegrass',
  'blues',
  'bossanova',
  'brazil',
  'breakbeat',
  'british',
  'cantopop',
  'chicago-house',
  'children',
  'chill',
  'classical',
  'club',
  'comedy',
  'country',
  'dance',
  'dancehall',
  'death-metal',
  'deep-house',
  'detroit-techno',
  'disco',
  'disney',
  'drum-and-bass',
  'dub',
  'dubstep',
  'edm',
  'electro',
  'electronic',
  'emo',
  'folk',
  'forro',
  'french',
  'funk',
  'garage',
  'german',
  'gospel',
  'goth',
  'grindcore',
  'groove',
  'grunge',
  'guitar',
  'happy',
  'hard-rock',
  'hardcore',
  'hardstyle',
  'heavy-metal',
  'hip-hop',
  'holidays',
  'honky-tonk',
  'house',
  'idm',
  'indian',
  'indie',
  'indie-pop',
  'industrial',
  'iranian',
  'j-dance',
  'j-idol',
  'j-pop',
  'j-rock',
  'jazz',
  'k-pop',
  'kids',
  'latin',
  'latino',
  'malay',
  'mandopop',
  'metal',
  'metal-misc',
  'metalcore',


#### Selected genres :
* blues
* classical
* country
* electro
* funk
* hip-hop
* j-pop
* latino
* metal
* pop
* punk
* reggae
* rock

In [4]:
# loop this list throught the sp.recommendations()
# extract albums IDs from the reco
# extact tracks IDs from albums
# reaveal audio_features from tracks
# bin them according to audio_features

In [5]:
genres = ['blues','classical','country','electro','funk','hip-hop','j-pop','latino','metal','pop','punk','reggae','rock']
result = []
ids = []

def reco_loop(x):
    for item in x:
        result.append(sp.recommendations(seed_genres = [item]))
    for el in range(0,len(result)):
        ids.append(result[el]['tracks'][0]['album']['id'])
    return ids

In [6]:
# Returns Album ID from first element of each JSON from each genre.
album_ids = reco_loop(genres)

In [7]:
len(album_ids)

13

In [21]:
album_ids

['0cw6Sv7IwZ87aLPPvNPSd0',
 '2hvHI1YepnxxbwVqru5VTZ',
 '79uExtsVJH2aNcS2Xu4Cim',
 '6kVK2SrA41p9MngYbkMXF2',
 '0Vj6k41UiNc3GUH8kEig1i',
 '4Csoz10NhNJOrCTUoPBdUD',
 '6whaxzm5MNttT7nVl3XO8b',
 '1lOegkmEfMEMhHy0loZKXV',
 '7G2PY8yve3Db0PeGsosb4x',
 '3yWRq9Dd2UO5xyqxTjLDmp',
 '53W7O35aXGUa1AyhXYtLpf',
 '55fxaMIEdo2z7n4SW7qfKD',
 '6r7LZXAVueS5DqdrvXJJK7']

In [8]:
# extract songs from a given album

def get_track_ids_from_albums(x):
    return list(set([i["id"] for j in x for i in sp.album(j)["tracks"]["items"]]))

In [9]:
get_track_ids_from_albums(album_ids)

['4hfPI0Wb6Vrb6bUv6BNrON',
 '1UhmANULygND830HDoa6iM',
 '0i735GMaAdxERa6k563yKp',
 '6YFewEWHZB04bJfGYgbYCC',
 '6EKrBVHDk20hwjYWGPQQlF',
 '7u17w29WWTiftzGHs8rhpw',
 '6G9sWidCZ7e1CVv37hjCj1',
 '2j4AKjlKl7iDVLWJ3rKifU',
 '4BZcmWwDcnNXQuVEaZA8a5',
 '5ucHc0xNuzMssqnr3FDnxp',
 '13adZXD5qXvZk0OtN6WUEy',
 '7iqWZNpHBAK5jE6e9gPKj9',
 '1f1vUJ1eAY1swaAnDxWd23',
 '0kBZUOxX4XrTu6spRQiUOt',
 '2nSixAWlDX7TW6Zlui1sRw',
 '5cikVJA2ExOI9TTkC06vkN',
 '236eGPBmuMNkPWBpGN2Cfl',
 '1cjRGRxMNA8bdpFMbupG0w',
 '77lGafvlU68CeHgB2pkHC9',
 '3pktmNAbxSBjCDD5u2meYs',
 '2TElexeJMX3OyXil4DWbVs',
 '4rQyDKHWokzTRo8axBDlz0',
 '0nhCLRoqki3JF0fGbuSOrV',
 '0fAnYkjbAnYz1qzOWimdjT',
 '6mOOqBUMPHovrbncBwMdQq',
 '1PSBzsahR2AKwLJgx8ehBj',
 '3vLUBXxEyJdEyzrHIeEAxY',
 '7wqWSTBpYjsFX4JDDQaU1w',
 '6B39pYK4m4tyWioWnjRq2V',
 '1wjyGxvNK5POKbIfNroB3E',
 '7uMb8RUN7S0bXdWXpI2euL',
 '1bLXh09ihiXb3S5hkdRvci',
 '5bG2wMLua5ywaXjwmSQ42r',
 '2h9QegkylwprytG4AE4Yzc',
 '3YROTILzpPbedQXDgeU9mb',
 '15Q2Tp792e30i2AfJmGwYz',
 '7w1JDEjmXYw2DwllK1EwCg',
 

### 

___________

## SONG FEATURES EXTRACT INTO DATAFRAME (prototype)

In [10]:
genres = ['blues','classical','country','electro','funk','hip-hop','j-pop','latino','metal','pop','punk','reggae','rock']

### Extract album ID according to genre

In [11]:
genres = ['blues','classical','country','electro','funk','hip-hop','j-pop','latino','metal','pop','punk','reggae','rock']
result = []
ids = []

def reco_loop(x):
    for item in x:
        result.append(sp.recommendations(seed_genres = [item]))
    for el in range(0,len(result)):
        ids.append(result[el]['tracks'][0]['album']['id'])
    return ids

In [12]:
reco_albums_1 = reco_loop(genres)

In [13]:
reco_albums_1

['3Vp5uEx8koFke8NyfElrlD',
 '1NVw0NWLA3NJwQy89A2pd6',
 '5L0e8X6Mf9lfjs2miK2WUB',
 '38ojnzxpPFT09ee03aCTK7',
 '1ZTr1RtD8WP2El0vkvI1R2',
 '3arNdjotCvtiiLFfjKngMc',
 '6VxPrvnkHuHgpD1043Xlga',
 '5t3hXKG2uyhYSlMaRViaEH',
 '0VvDozXY0AHdsWhHJAWWTF',
 '2SCQAZZ5pc90l9SPBsWCIQ',
 '1gsyJWUvwjnsNgYUxpOfLR',
 '2ugVVkmRbNvW3aczv66i19',
 '2xSppFiPUTWqZ9cdF6CQnY']

### From returned album ids, extract all track Ids

In [14]:
def get_track_ids_from_albums(album_ids):
    return list(set([i["id"] for j in album_ids for i in sp.album(j)["tracks"]["items"]]))

In [15]:
tracks_id_1 = get_track_ids_from_albums(reco_albums_1)

In [16]:
# list of track IDs
tracks_id_1

['4VZ0UOLwnTnQSLZJme5lyQ',
 '5dFwDUe9DMl28LpROkhZu4',
 '1L4ca7Ti3S795YrBdTzE4T',
 '5ZdziocLu1aGzSRixpW3rr',
 '6lIyXleBCaWzOn9Ltjg6E5',
 '2N3U8uTYhjX2KwGT0pf5x0',
 '2WxzLbJfPA2vJGxQiKd7Jq',
 '7dUdMZqfGSIt0ZkmTOgRLA',
 '3twxRT6f42UpjE2xUVcdG3',
 '6O7Et1ArYoS9nOKwRjK3Oq',
 '2C4XQsZFeze4yMatFzY9M2',
 '1GOuHB20lqVyxv3GdxovPY',
 '3yju5QYFjndmGvaG2wVvPv',
 '1dbLmUsZJFTbHCsAGE4x2D',
 '6wwPtKxaMEbQYc6atQmhTE',
 '4yRMcsWVWCYGQPj5am1UCP',
 '4JMvnscEzgZNXzppm8V0Qs',
 '3nHbQNMuGuEtIVhPGVfFbF',
 '3zQN4zs4BiEnSI3sh2PZwa',
 '5EVXcYg6G8at6IDQbi9M2F',
 '7vcryRa5cl0iJ4Bif3dKWU',
 '3943Xwoj2F9TfuLgHDYM8J',
 '341LEcA134Ms2ri6WhJUwV',
 '3I8MbKRkS2IBDuslhCS3iT',
 '1Wdhwm3TtIuZvohSZNpZwP',
 '4kBbLU8WnCWknsYXHPCsv2',
 '3yXknYHbkcSca92PSSVniX',
 '5OGzWbdJVqlHGIVqqZyoPX',
 '44YNLnTujChFsBmuVts4me',
 '06M6ANp2yXdhpGssKPhT69',
 '18gj6WTWnjJA5s7FSTVQwj',
 '3EVgZBKWXY6xOtS8mrhW1C',
 '5w3rCESBlHOV2CLYUONlDO',
 '67h3AeLwglIaguXt95F0jQ',
 '4WKGfdb5aYHv4srH7zuclB',
 '2XKmD2Fy4rVWzL7CFDpd2s',
 '2546X1mmMxfa9ZtFfVk1E0',
 

### Extract audio features from returned track id list

In [17]:
audio_feat_1 = sp.audio_features(tracks_id_1[:100])

In [18]:
# list, with embedded dictionary as items
len(audio_feat_1)

100

In [22]:
audio_feat_

[{'danceability': 0.766,
  'energy': 0.805,
  'key': 8,
  'loudness': -2.771,
  'mode': 1,
  'speechiness': 0.0443,
  'acousticness': 0.705,
  'instrumentalness': 0,
  'liveness': 0.0928,
  'valence': 0.967,
  'tempo': 114.195,
  'type': 'audio_features',
  'id': '4VZ0UOLwnTnQSLZJme5lyQ',
  'uri': 'spotify:track:4VZ0UOLwnTnQSLZJme5lyQ',
  'track_href': 'https://api.spotify.com/v1/tracks/4VZ0UOLwnTnQSLZJme5lyQ',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4VZ0UOLwnTnQSLZJme5lyQ',
  'duration_ms': 168653,
  'time_signature': 3},
 {'danceability': 0.828,
  'energy': 0.592,
  'key': 8,
  'loudness': -7.08,
  'mode': 1,
  'speechiness': 0.0501,
  'acousticness': 0.678,
  'instrumentalness': 0.000107,
  'liveness': 0.187,
  'valence': 0.795,
  'tempo': 101.646,
  'type': 'audio_features',
  'id': '5dFwDUe9DMl28LpROkhZu4',
  'uri': 'spotify:track:5dFwDUe9DMl28LpROkhZu4',
  'track_href': 'https://api.spotify.com/v1/tracks/5dFwDUe9DMl28LpROkhZu4',
  'analysis_url': 'https://api

### Creating dataframe from returned audiofeatures

In [19]:
df1=pd.DataFrame(audio_feat_1)

df1=df1[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

df1

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms
0,0.766,0.8050,-2.771,0.0443,0.70500,0.000000,0.0928,0.9670,114.195,4VZ0UOLwnTnQSLZJme5lyQ,168653
1,0.828,0.5920,-7.080,0.0501,0.67800,0.000107,0.1870,0.7950,101.646,5dFwDUe9DMl28LpROkhZu4,436760
2,0.234,0.8280,-5.048,0.0412,0.00119,0.000000,0.1580,0.4040,168.073,1L4ca7Ti3S795YrBdTzE4T,265520
3,0.505,0.4630,-10.653,0.1240,0.51800,0.000016,0.7150,0.6590,105.661,5ZdziocLu1aGzSRixpW3rr,214924
4,0.167,0.0581,-21.249,0.0356,0.96900,0.363000,0.0873,0.0733,75.979,6lIyXleBCaWzOn9Ltjg6E5,294133
...,...,...,...,...,...,...,...,...,...,...,...
95,0.630,0.6280,-4.016,0.3270,0.05150,0.000000,0.0910,0.2660,121.307,7sLDnUauivS2vN6Z9EfUJT,282667
96,0.557,0.8130,-5.745,0.0340,0.00758,0.247000,0.1120,0.1740,115.452,17k0ItD5xU09ORtsUrb2dO,210427
97,0.454,0.5030,-7.610,0.0265,0.37600,0.007600,0.2420,0.4010,150.999,5v9e0mJg74D7cFfYxKDJH0,245693
98,0.158,0.1540,-17.596,0.0384,0.95300,0.658000,0.1200,0.1660,74.988,4WIyNmRe7htCNYwMOpF18p,312933


In [20]:
df1.duplicated().sum()

0