In [262]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import json
import time

scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

def find_songs_genre(genre, n):
    results = sp.recommendations(seed_genres=[genre], limit=n)
    songs = {}
    for track in results['tracks']:
        attr = {}
        auido_feat = sp.audio_features(track["id"])
        attr["name"] = track["name"]
        attr["artists"] = track["artists"][0]["name"]
        attr["length"] = round(track["duration_ms"]/1000)
        attr["pop"] = track["popularity"]
        attr["valence"] = round(auido_feat[0]["valence"]*100, 2)
        attr["energy"] = round(auido_feat[0]["energy"]*100, 2)
        songs[track["name"]] = attr
    return songs


def make_a_list(genre, n):
    songs = {}
    while len(songs) < 100:
        songs_add = find_songs_genre(genre, n)
        songs.update(songs_add)
        time.sleep(1)
    return songs

def create_json(genre, n):
    with open(f'song_db/{genre}.json', 'w') as f:
        json.dump(make_a_list(genre, n), f, indent=4, sort_keys=True)

In [None]:
def for_every_genre(file, n):
    with open(file, "r") as f:
        genres = [line.strip() for line in f.readlines()]
        genres = [line.strip("\'") for line in genres]
    for genre in genres:
        create_json(genre, n)

file_name = "genres_remain.txt"
for_every_genre(file_name, 100)

In [268]:
# to get the generes and songs json files location
import os

cwd = os.getcwd()
cwd += "/song_db"


def get_genere_dict(path):
    
    json_files = os.listdir(path)
    genere_dict = {}
    
    for file in json_files:
        file_path = f"{path}/{file}"
        genere = file.split(".")[0]
        with open(file_path) as fp:
            genere_dict[genere] = json.load(fp)
        
    return genere_dict

In [296]:
# get the generes dictionary and the list of geners
genere_dict = get_genere_dict(cwd)
generes = list(genere_dict.keys())
# select number of generes you want 
num_of_gens = 3
selected_generes = random.sample(generes, num_of_gens)
# get all songs form those generes
songs = {}

for genere in selected_generes:
    songs.update(genere_dict[genere])

print(f"The selected generes are {selected_generes}")

The selected generes are ['comedy', 'forro', 'hip-hop']


In [315]:
import random
# step 1 define constants and parameters

TOTAL_NUM_SONGS = len(songs)
NUM_SONGS = 20  # Number of songs of the playlist
POPULATION_SIZE = 50  # Number of candidate solutions in each generation
NUM_GENERATIONS = 500  # Number of generations
MUTATION_RATE = 0.1  # Probability of mutation

desired_values = {
    'length': NUM_SONGS*240,
    'energy': 80,
    'valence': 70,
    'pop': 60
}

#### I think we shoul change the fitness function so it also takes into account the time duration of the playlist, but I don't think adding it like the substraction of the "desired lentgh" - "the playlist length" is the best way...
#### Also I think it's too simple, we could add some more stuff

In [299]:
# step 2 define fitness function
# in our case will be to reduce the sum of the difference of the paramenters

def calculate_fitness(solution, desired_values):
    total_diff = 0
    playlist_length = 0
    # solution is a list of the final songs
    for song_id in solution:
        # songs is the dictionary with {song_id:{length:x, energy:y, valence:z, pop:n}, ...}
        song = songs[song_id]
        
        # get the difference between the song values and the desired ones
        energy_diff = abs(song['energy'] - desired_values['energy'])
        valence_diff = abs(song['valence'] - desired_values['valence'])
        popularity_diff = abs(song['pop'] - desired_values['pop'])
        # get the sum of the differences
        total_diff += energy_diff + valence_diff + popularity_diff
        
        # obtain length of the playlist
        playlist_length += song['length']
    
    # get the length difference
    length_diff = abs(desired_values['length'] - playlist_length)
        
    return total_diff#, length_diff

In [300]:
# step 3 get a initial population

def generate_individual(songs):
    
    song_indices = random.sample(range(TOTAL_NUM_SONGS), NUM_SONGS)
    return [list(songs.keys())[i] for i in song_indices]

population = [generate_individual(songs) for _ in range(POPULATION_SIZE)]
# now it generates a population of POPULATION_SIZE (50) playlists each one with NUM_SONGS (20) number songs

In [301]:
# step 4 define genetic operators

# SELECTION
def selection(population, desired_values):
    fitness_scores = [calculate_fitness(individual, desired_values) for individual in population]
    sorted_population = [x for _, x in sorted(zip(fitness_scores, population))]
    selected_population = sorted_population[:int(POPULATION_SIZE * 0.2)]  # Select top 20% fittest individuals
    return selected_population

# we end up with a population of 10 (20% POPULATION_SIZE)

In [302]:
# CROSSOVER
def crossover(parent1, parent2):
    # select random crossover point
    crossover_point = random.randint(1, NUM_SONGS - 1)
    # get two childs with the crossover of the parents
    child1, child2 = list(range(NUM_SONGS)), list(range(NUM_SONGS))
    
    # we avoid duplicates with this for loop
    # we do crossover until crossover point
    for i in range(0, crossover_point):
        
        if parent1[i] not in parent2:
            child2[i] = parent1[i]
        else:
            child2[i] = parent2[i]
        
        if parent2[i] not in parent1:
            child1[i] = parent2[i]
        else:
            child1[i] = parent1[i]
    
    # from crossover point onward is the same
    for i in range(crossover_point, NUM_SONGS):
        child1[i] = parent1[i]
        child2[i] = parent2[i]
    
    return child1, child2

def perform_crossover(selected_population):
    offspring_population = []
    # get a new population POPULATION_SIZE
    while len(offspring_population) < POPULATION_SIZE:
        parent1, parent2 = random.sample(selected_population, 2)
        child1, child2 = crossover(parent1, parent2)
        offspring_population.extend([child1, child2])
    return offspring_population[:POPULATION_SIZE]

# go back to a population of POPULATION_SIZE (50) with its individuals crossed

In [303]:
# MUTATION
def mutation(individual, songs):
    for i in range(NUM_SONGS):
        if random.random() < MUTATION_RATE:
            swap_index = random.randint(0, TOTAL_NUM_SONGS - 1)
            # in case the new song (mutation is already in the playlist do again)
            while list(songs.keys())[swap_index] in individual:
                swap_index = random.randint(0, TOTAL_NUM_SONGS - 1)
            individual[i] = list(songs.keys())[swap_index] # REVISAR PUEDE METER DUPLICADOS
    return individual

def perform_mutation(offspring_population, songs):
    mutated_population = [mutation(individual, songs) for individual in offspring_population]
    return mutated_population

In [304]:
# step 5 create the main loop for the genetic algorithm

def genetic_algorithm(songs, population, desired_values):
    for _ in range(NUM_GENERATIONS):
        selected_population = selection(population, desired_values)
        offspring_population = perform_crossover(selected_population)
        mutated_population = perform_mutation(offspring_population, songs)
        population = mutated_population

    # Get the fittest individual from the final population
    fitness_scores = [calculate_fitness(individual, desired_values) for individual in population]
    best_individual = population[fitness_scores.index(min(fitness_scores))]
    return best_individual

In [313]:
# step 9 run the GA with the desired parameters

desired_values = {
    'length': 4800,
    'energy': 80,
    'valence': 70,
    'pop': 60
}

best_individual = genetic_algorithm(songs, population, desired_values)
print("Best Individual:", best_individual)

Best Individual: ['The Gangsta, The Killa And The Dope Dealer', 'Cash Out (feat. ScHoolboy Q, PARTYNEXTDOOR & D.R.A.M.)', 'DUCKWORTH.', 'Go Off (with Lil Uzi Vert, Quavo & Travis Scott)', 'Slow Motion', 'Soundtrack 2 My Life', 'Victory (feat. The Notorious B.I.G. & Busta Rhymes)', 'Hard Knock Life (Ghetto Anthem)', 'New Freezer (feat. Kendrick Lamar)', 'Disco Inferno', 'Amanhecer', 'N.Y. State of Mind', 'Declaração de Amor', 'Backseat Freestyle', 'No Problem (feat. Lil Wayne & 2 Chainz)', 'Gimme the Loot - 2005 Remaster', 'BRACKETS', 'Strictly Business', 'Moment Of Truth', 'RGF Island']


In [314]:
calculate_fitness(best_individual, desired_values)

557.9999999999999