In [16]:
import spotipy
import pandas as pd
import numpy as np
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth

#read cid from file
with open("ids/cid.txt", "r") as file:
    cid = file.read()

#read secret from file
with open("ids/secret.txt", "r") as file:
    secret = file.read()

#Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

# Generating Data

Plan: I will create a markov chain of sessions in order to predict which sessions follow which sessions.

I will also create a large list of songs, and treat each of my plalists as a certain session. This will be in a pandas dataframe. This will include the buinary representation of whether or not it belongs to a certain session.

### Step 1: Get my list of songs from spotify plalists

I will use the spotify api to get my list of songs from my playlists. I will assign each playlist a session beginning with 5 playlists. One for each session.

Playslists: 
    Programming
    Jazz
    Samba
    Happy Rap
    Modern Soul

In [17]:

session_1_uri = "spotify:playlist:4BbncuGqRY0GzR0t57PCKy"
session_2_uri = "spotify:playlist:1UMi6pQ4hqyDMbEhbshD9d"
session_3_uri = "spotify:playlist:55yqh8VajCF49DDohXO0nS"
session_4_uri = "spotify:playlist:28wpyiXjRFe8RoE65UHtWo"
session_5_uri = "spotify:playlist:1Bjl9TNi4gdi093qQzalmC"

sessions = [session_1_uri, session_2_uri, session_3_uri, session_4_uri, session_5_uri]


In [23]:
# create a pandas dataframe with the features of the songs in all the playlists
def get_playlist_features(sessions):
    playlist_features_list = []
    for i, playlist_URI in enumerate(sessions):
        playlist = sp.playlist(playlist_URI)
        for track in playlist["tracks"]["items"]:
            track_uri = track["track"]["uri"]
            playlist_features = sp.audio_features(track_uri)[0]
            playlist_features["playlist"] = playlist_URI

            # #get the artists of the track
            # playlist_features["artists"] = track["track"]["artists"]

            # #get the genres of the album for the track
            # album = sp.album(track["track"]["album"]["external_urls"]["spotify"])
            # playlist_features["genres"] = album["genres"]

            #assign a session number to each playlist one hot encoded for all sessions
            playlist_features["session"] = i
            playlist_features_list.append(playlist_features)
    #one hot encode the session number
    playlist_features_df = pd.DataFrame(playlist_features_list)
    playlist_features_df = pd.concat([playlist_features_df, pd.get_dummies(playlist_features_df["session"], prefix="session")], axis=1)
    playlist_features_df.drop(["session"], axis=1, inplace=True)
    return playlist_features_df

In [24]:
playlist_features_df = get_playlist_features(sessions)
playlist_features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,track_href,analysis_url,duration_ms,time_signature,playlist,session_0,session_1,session_2,session_3,session_4
0,0.645,0.498,7,-9.681,0,0.0376,0.811,0.529,0.101,0.331,...,https://api.spotify.com/v1/tracks/6rbbDK18oqK3...,https://api.spotify.com/v1/audio-analysis/6rbb...,193629,4,spotify:playlist:4BbncuGqRY0GzR0t57PCKy,1,0,0,0,0
1,0.675,0.327,8,-13.501,0,0.0434,0.272,0.782,0.138,0.354,...,https://api.spotify.com/v1/tracks/1TmgMwcry5pz...,https://api.spotify.com/v1/audio-analysis/1Tmg...,198826,4,spotify:playlist:4BbncuGqRY0GzR0t57PCKy,1,0,0,0,0
2,0.605,0.687,9,-13.928,0,0.045,0.479,0.949,0.104,0.12,...,https://api.spotify.com/v1/tracks/2LD2gT7gwAur...,https://api.spotify.com/v1/audio-analysis/2LD2...,345187,4,spotify:playlist:4BbncuGqRY0GzR0t57PCKy,1,0,0,0,0
3,0.76,0.401,1,-9.537,0,0.0674,0.564,0.259,0.0977,0.619,...,https://api.spotify.com/v1/tracks/2La21GqU4fKT...,https://api.spotify.com/v1/audio-analysis/2La2...,248892,4,spotify:playlist:4BbncuGqRY0GzR0t57PCKy,1,0,0,0,0
4,0.865,0.384,10,-11.953,1,0.0577,0.024,0.434,0.0682,0.852,...,https://api.spotify.com/v1/tracks/2y8PnhXwnyHT...,https://api.spotify.com/v1/audio-analysis/2y8P...,219560,4,spotify:playlist:4BbncuGqRY0GzR0t57PCKy,1,0,0,0,0


In [25]:
playlist_features_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253 entries, 0 to 252
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      253 non-null    float64
 1   energy            253 non-null    float64
 2   key               253 non-null    int64  
 3   loudness          253 non-null    float64
 4   mode              253 non-null    int64  
 5   speechiness       253 non-null    float64
 6   acousticness      253 non-null    float64
 7   instrumentalness  253 non-null    float64
 8   liveness          253 non-null    float64
 9   valence           253 non-null    float64
 10  tempo             253 non-null    float64
 11  type              253 non-null    object 
 12  id                253 non-null    object 
 13  uri               253 non-null    object 
 14  track_href        253 non-null    object 
 15  analysis_url      253 non-null    object 
 16  duration_ms       253 non-null    int64  
 1

In [26]:
playlist_features_df.type

0      audio_features
1      audio_features
2      audio_features
3      audio_features
4      audio_features
            ...      
248    audio_features
249    audio_features
250    audio_features
251    audio_features
252    audio_features
Name: type, Length: 253, dtype: object

### Step 2: Create a markov chain of sessions

In [9]:
import random

def simulate_markov_chain(sessions):
  # Initialize the starting state
  current_state = sessions[0]

  # Create a dictionary to hold the probabilities of transitioning
  # from one state to another
  transitions = {}

  # Loop through the sessions and populate the transition probabilities
  for i in range(len(sessions) - 1):
    # Get the current and next state
    current_session = sessions[i]
    next_session = sessions[i + 1]

    # If this is the first time seeing this transition,
    # initialize the probability to 0
    if current_session not in transitions:
      transitions[current_session] = {next_session: 0}
    elif next_session not in transitions[current_session]:
      transitions[current_session][next_session] = 0

    # Increment the probability of transitioning from the current
    # session to the next session
    transitions[current_session][next_session] += 1

  # Normalize the probabilities so they sum to 1
  for current_session in transitions:
    total = sum(transitions[current_session].values())
    for next_session in transitions[current_session]:
      transitions[current_session][next_session] /= total

  # Simulate the Markov chain for a number of steps
  num_steps = 20
  for i in range(num_steps):
    # Print the current state
    print(current_state)

    # Get the possible next states and their probabilities
    next_states = transitions[current_state]
    print(next_states)

    # Choose the next state based on the probabilities
    next_state = random.choices(list(next_states.keys()), next_states.values())[0]
    print(next_state)

    # Update the current state
    current_state = next_state

# Example usage

simulate_markov_chain(sessions)


spotify:playlist:4BbncuGqRY0GzR0t57PCKy
{'spotify:playlist:1UMi6pQ4hqyDMbEhbshD9d': 1.0}
spotify:playlist:1UMi6pQ4hqyDMbEhbshD9d
spotify:playlist:1UMi6pQ4hqyDMbEhbshD9d
{'spotify:playlist:55yqh8VajCF49DDohXO0nS': 1.0}
spotify:playlist:55yqh8VajCF49DDohXO0nS
spotify:playlist:55yqh8VajCF49DDohXO0nS
{'spotify:playlist:28wpyiXjRFe8RoE65UHtWo': 1.0}
spotify:playlist:28wpyiXjRFe8RoE65UHtWo
spotify:playlist:28wpyiXjRFe8RoE65UHtWo
{'spotify:playlist:1Bjl9TNi4gdi093qQzalmC': 1.0}
spotify:playlist:1Bjl9TNi4gdi093qQzalmC
spotify:playlist:1Bjl9TNi4gdi093qQzalmC


KeyError: 'spotify:playlist:1Bjl9TNi4gdi093qQzalmC'