In [1]:
import pandas as pd
import numpy as np

import gurobipy as gb
from gurobipy import *

In [2]:
# Replace non-ASCII with '_'
def sanitize_name(name):
    return ''.join([c if ord(c) < 128 else '_' for c in name]) 

In [3]:
df = pd.read_csv("df2_with_labels_and_genders.csv")
df = df.iloc[:,1:]
df = df.sample(n=1000)
df

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre,album_label,artist_gender
5851,1PMyULgDs4FZNhFi7MPw8n,Siddharth Slathia,Haare Haare Hum To Dil Se Haare (Unplugged Ver...,Haare Haare Hum To Dil Se Haare - Unplugged Ve...,65,157648,False,0.480,0.369,7,...,0.0405,0.835000,0.00000,0.1000,0.5480,159.687,4,pop,bfm records,male
3623,1MfSho5QSHYxSAZ2nxZFDt,Carcass,Surgical Steel (Complete Edition),Thrasher's Abattoir,15,110426,False,0.148,0.998,6,...,0.2320,0.000003,0.18400,0.1840,0.0615,197.312,4,grindcore,Nuclear Blast,Unknown
3192,0YhWnmiXMwmSVQU9KL1cdf,DVBBS;Space Primates;GASHI,SLEEP,Say It (feat. GASHI),46,192755,False,0.698,0.846,4,...,0.0465,0.041000,0.00000,0.1660,0.1100,127.012,4,progressive-house,Unknown,Unknown
6294,2SSYLcSiT7oImgsS8cPyQu,DJ Fresh;Rita Ora,Nextlevelism,Hot Right Now (feat. RITA ORA) - Radio Edit,59,182333,False,0.524,0.972,4,...,0.0431,0.006560,0.00058,0.2240,0.4760,175.017,4,drum-and-bass,Ministry of Sound,female
1764,6f280Iw58re3P3ac9xrSNJ,Maurice Ravel;Duo Synopsis,"Ravel, Schuloff, Honegger, Schnittke: 20th Cen...","Sonata for Violin and Cello in C Major, M. 73:...",0,207040,False,0.389,0.206,5,...,0.0354,0.948000,0.34400,0.0967,0.2750,81.968,4,classical,EMI Classics,male
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6893,2awk2br4XeJn2L6Dl9stuj,Cali Y El Dandee;Reik,Halloween 2022 Perreo Vol. 5,Borracho De Amor,1,175640,False,0.620,0.754,9,...,0.1160,0.057800,0.00000,0.0993,0.8280,90.064,4,latin,Unknown,Unknown
1579,4TFYvTpA7QLnCcMlugJj6W,Wyatt Cenac,Furry Dumb Fighter,Ladies and Germs,20,488605,True,0.524,0.532,10,...,0.9480,0.884000,0.00000,0.7650,0.5100,55.704,3,comedy,Aspecialthing Records,male
5380,0LbeHtEANtIeQeUK5VZpvl,音樂磁場,音樂磁場19-台語經典名曲,無人熟識,22,264933,False,0.673,0.351,9,...,0.0282,0.253000,0.00289,0.0820,0.5440,100.002,4,mandopop,Unknown,Unknown
2527,5BI2cPWqgrtJINrbhO5tKw,Nockis,Weihnachten Playlist,Am Weihnachtsabend fehlst du mir,0,213306,False,0.653,0.779,9,...,0.0254,0.205000,0.00000,0.1360,0.8560,111.945,4,disco,Raumklang,Unknown


In [4]:
model = gb.Model("Spotify Recommendation")
# We ask Gurobi not to print too much on screen
model.Params.OutputFlag = 0

#Importing User preference
dfw = pd.read_csv("User_Music_Preferences.csv")
dfw = dfw.sample(n = 1)

#Importing data from the dataset
trackNames = df['track_name'].fillna('Unknown Track').astype(str)
popularity = df['popularity'].reset_index(drop=True)
duration = df['duration_ms'].reset_index(drop=True)
explicit = df['explicit'].reset_index(drop=True)
danceability = df['danceability'].reset_index(drop=True)
energy = df['energy'].reset_index(drop=True)
key = df['key'].reset_index(drop=True)
loudness = df['loudness'].reset_index(drop=True)
speechiness = df['speechiness'].reset_index(drop=True)
acousticness = df['acousticness'].reset_index(drop=True)
instrumentalness = df['instrumentalness'].reset_index(drop=True)
liveness = df['liveness'].reset_index(drop=True)
valence = df['valence'].reset_index(drop=True)
tempo = df['tempo'].reset_index(drop=True)
trackGenre = df['track_genre'].reset_index(drop=True)
albumLabel = df['album_label'].reset_index(drop=True)
artistGender = df['artist_gender'].reset_index(drop=True)
allGenres = trackGenre.unique()

n =len(trackNames)
ng = len(allGenres)
M = 100000
popularityCriteria = 1
proportionThreshold = 0.2
explicitValue = dfw['Explicit']
#Limiting the character names
shortenedNames = [sanitize_name(name[:150]) for name in trackNames]

#Main variables
t = model.addVars(n, vtype = GRB.BINARY, name = shortenedNames)
y = model.addVars(n, vtype = GRB.BINARY, name = "auxiliray variables for Popularity Constraint")
g = model.addVars(ng, vtype = GRB.BINARY, name = [genre for genre in allGenres])

#Weightages
w = []
w.append(int(dfw["totalDanceability"]))
w.append(int(dfw["totalEnergy"]))
w.append(int(dfw["totalSpeechiness"]))
w.append(int(dfw["totalAcousticness"]))
w.append(int(dfw["totalInstrumentalness"]))
w.append(int(dfw["totalLiveness"]))
w.append(int(dfw["totalValence"]))

#Objectives
totalPopularity = gb.quicksum(popularity[i] * t[i] for i in range(n))
totalDanceability = gb.quicksum(danceability[i] * t[i] for i in range(n))
totalEnergy = gb.quicksum(energy[i] * t[i] for i in range(n))
totalSpeechiness = gb.quicksum(speechiness[i] * t[i] for i in range(n))
totalAcousticness = gb.quicksum(acousticness[i] * t[i] for i in range(n))
totalInstrumentalness = gb.quicksum(instrumentalness[i] * t[i] for i in range(n))
totalLiveness = gb.quicksum(liveness[i] * t[i] for i in range(n))
totalValence = gb.quicksum(valence[i] * t[i] for i in range(n))
weightedTotal = w[0]*totalDanceability + w[1]*totalEnergy + w[2]*totalSpeechiness + w[3]*totalAcousticness + w[4]*totalInstrumentalness + w[5]*totalLiveness + w[6]*totalValence

#Objective Functions
model.setObjectiveN(totalPopularity,priority = 1, index = 1)
model.setObjectiveN(weightedTotal, priority = 1, index = 0)
model.ModelSense = GRB.MAXIMIZE

#Constraints

#Max Duration
model.addConstr(gb.quicksum(duration[i]*t[i] for i in range(n)) <= 1800000, name = "Max Duration")

#Popularity Threshold
explicitCriteria = 1
for i in range(n):    
    model.addConstr(popularity[i]*t[i] >=  popularityCriteria*y[i], name = f"Popularity Threshold for track {i+1}a")
    model.addConstr(t[i] <= y[i], name = f"Popularity Threshold for track {i+1}b")
#Explicit Content    
model.addConstr(gb.quicksum(explicit[i]*t[i] for i in range(n)) <= M*explicitCriteria, name = "Explicit Constraint")

#Mapping Genres
for i in range(n):
    # Get the index of the genre for the current track
    genre_index = list(allGenres).index(trackGenre[i])
    # If track `i` is selected, ensure that the corresponding genre variable `g` is set to 1
    model.addConstr(g[genre_index] == t[i], name=f"Genre Constraint for track {i+1}")

#Female Artists
female_sum = gb.quicksum(t[i] for i in range(n) if artistGender[i] == "female")
total_sum = gb.quicksum(t[i] for i in range(n))
model.addConstr(female_sum >= proportionThreshold * total_sum, name="Female Proportion Constraint")

model.optimize()

Set parameter Username
Academic license - for non-commercial use only - expires 2025-08-29


  w.append(int(dfw["totalDanceability"]))
  w.append(int(dfw["totalEnergy"]))
  w.append(int(dfw["totalSpeechiness"]))
  w.append(int(dfw["totalAcousticness"]))
  w.append(int(dfw["totalInstrumentalness"]))
  w.append(int(dfw["totalLiveness"]))
  w.append(int(dfw["totalValence"]))


In [5]:
# Collect results
selected_tracks = []
total_duration = 0
total_popularity_score = 0
weighted_score = 0
female_count = 0
total_selected_tracks = 0

print("\nSelected Tracks:")
print("---------------")
for i in range(n):
    if t[i].x > 0:  # If track is selected
        track_info = {
            "Name": shortenedNames[i],
            "Genre": trackGenre[i],
            "Popularity": popularity[i],
            "Duration (ms)": duration[i],
            "Danceability": danceability[i],
            "Energy": energy[i],
            "Speechiness": speechiness[i],
            "Acousticness": acousticness[i],
            "Instrumentalness": instrumentalness[i],
            "Liveness": liveness[i],
            "Valence": valence[i],
            "Artist Gender": artistGender[i]
        }
        selected_tracks.append(track_info)
        
        # Summing up the metrics for selected tracks
        total_duration += duration[i]
        total_popularity_score += popularity[i]
        weighted_score += (
            w[0] * danceability[i] +
            w[1] * energy[i] +
            w[2] * speechiness[i] +
            w[3] * acousticness[i] +
            w[4] * instrumentalness[i] +
            w[5] * liveness[i] +
            w[6] * valence[i]
        )

        # Count tracks by female artists
        if artistGender[i] == "female":
            female_count += 1
        
        # Count total selected tracks
        total_selected_tracks += 1

# Print selected tracks with details
for track in selected_tracks:
    print(f"Track: {track['Name']}")
    print(f"  Genre: {track['Genre']}")
    print(f"  Popularity: {track['Popularity']}")
    print(f"  Duration (ms): {track['Duration (ms)']}")
    print(f"  Attributes:")
    print(f"    Danceability: {track['Danceability']}, Energy: {track['Energy']}")
    print(f"    Speechiness: {track['Speechiness']}, Acousticness: {track['Acousticness']}")
    print(f"    Instrumentalness: {track['Instrumentalness']}, Liveness: {track['Liveness']}")
    print(f"    Valence: {track['Valence']}")
    print(f"  Artist Gender: {track['Artist Gender']}")
    print("")

# Print summary of the key metrics for the selected tracks
print("\nSummary of Selected Tracks:")
print("--------------------------")
print(f"Total Duration (ms): {total_duration}")
print(f"Total Popularity Score: {total_popularity_score}")
print(f"Weighted Score: {weighted_score:.2f}")

# Female artist constraint result
female_proportion = female_count / total_selected_tracks if total_selected_tracks > 0 else 0
print("\nFemale Artist Proportion:")
print("-------------------------")
print(f"Female Artists: {female_count} out of {total_selected_tracks} selected tracks")
print(f"Proportion of Female Artists: {female_proportion:.2%}")
print(f"Proportion Threshold Required: {proportionThreshold:.2%}")

# Optional: Printing auxiliary variable results for debugging
print("\nAuxiliary Variables:")
print("---------------------")
for var in y.values():
    if var.x > 0:
        print(f"{var.varName} = {var.x}")
print("---------------------")
for genre_var in g.values():
    if genre_var.x > 0:
        print(f"{genre_var.varName} = {genre_var.x}")


Selected Tracks:
---------------
Track: Farq hai
  Genre: indian
  Popularity: 57
  Duration (ms): 184090
  Attributes:
    Danceability: 0.8, Energy: 0.209
    Speechiness: 0.0408, Acousticness: 0.865
    Instrumentalness: 4.11e-05, Liveness: 0.252
    Valence: 0.74
  Artist Gender: Unknown

Track: Tum Jo Aaye
  Genre: indian
  Popularity: 66
  Duration (ms): 286443
  Attributes:
    Danceability: 0.574, Energy: 0.841
    Speechiness: 0.0948, Acousticness: 0.399
    Instrumentalness: 1e-06, Liveness: 0.205
    Valence: 0.927
  Artist Gender: male

Track: Vinayagane Vinai Theerpavane Revival
  Genre: indian
  Popularity: 47
  Duration (ms): 184117
  Attributes:
    Danceability: 0.42, Energy: 0.666
    Speechiness: 0.077, Acousticness: 0.818
    Instrumentalness: 0.123, Liveness: 0.377
    Valence: 0.653
  Artist Gender: female

Track: Dil Beparvah - The Dewarists, Season 5
  Genre: indian
  Popularity: 54
  Duration (ms): 250000
  Attributes:
    Danceability: 0.762, Energy: 0.391
  

In [6]:
import pandas as pd

# Define the data
data = {
    "Track Name": ["Best Thing", "Tell Me Why", "Cherry Wine", "Far Away From", "War With Her", "Waves", 
                   "Say Goodbye", "Space Makes", "Tell Me Why (Taylor Swift)"],
    "Genre": ["Chill"] * 9,
    "Popularity": [58, 58, 65, 57, 64, 66, 64, 57, 58],
    "Duration (ms)": [219065, 161016, 173286, 132800, 193373, 133747, 150415, 136419, 142267],
    "Danceability": [0.479, 0.770, 0.740, 0.663, 0.622, 0.840, 0.706, 0.557, 0.684],
    "Energy": [0.364, 0.621, 0.563, 0.236, 0.671, 0.338, 0.297, 0.429, 0.0892],
    "Speechiness": [0.0606, 0.454, 0.0400, 0.0507, 0.026, 0.0436, 0.132, 0.189, 0.0609],
    "Acousticness": [0.902, 0.447, 0.340, 0.892, 0.014, 0.135, 0.468, 0.387, 0.981],
    "Instrumentalness": [0.000081, 0, 0, 0.302, 0.304, 0.0225, 0, 0.000478, 0.951],
    "Liveness": [0.101, 0.158, 0.0824, 0.0788, 0.304, 0.0635, 0.0725, 0.192, 0.261],
    "Valence": [0.372, 0.301, 0.577, 0.236, 0.572, 0.927, 0.123, 0.596, 0.223],
    "Artist Gender": ["Female", "Unknown", "Male", "Male", "Male", "Unknown", "Male", "Unknown", "Female"]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# If you want to display it as an HTML table (e.g., in Jupyter Notebook)
from IPython.display import display
display(df.style.set_table_styles(
    [{'selector': 'table', 'props': [('border-collapse', 'collapse')]},
     {'selector': 'th, td', 'props': [('border', '1px solid black'), ('padding', '5px')]}]
).set_caption("Detailed Summary of Selected Tracks"))


                   Track Name  Genre  Popularity  Duration (ms)  Danceability  \
0                  Best Thing  Chill          58         219065         0.479   
1                 Tell Me Why  Chill          58         161016         0.770   
2                 Cherry Wine  Chill          65         173286         0.740   
3               Far Away From  Chill          57         132800         0.663   
4                War With Her  Chill          64         193373         0.622   
5                       Waves  Chill          66         133747         0.840   
6                 Say Goodbye  Chill          64         150415         0.706   
7                 Space Makes  Chill          57         136419         0.557   
8  Tell Me Why (Taylor Swift)  Chill          58         142267         0.684   

   Energy  Speechiness  Acousticness  Instrumentalness  Liveness  Valence  \
0  0.3640       0.0606         0.902          0.000081    0.1010    0.372   
1  0.6210       0.4540         0.44

Unnamed: 0,Track Name,Genre,Popularity,Duration (ms),Danceability,Energy,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Artist Gender
0,Best Thing,Chill,58,219065,0.479,0.364,0.0606,0.902,8.1e-05,0.101,0.372,Female
1,Tell Me Why,Chill,58,161016,0.77,0.621,0.454,0.447,0.0,0.158,0.301,Unknown
2,Cherry Wine,Chill,65,173286,0.74,0.563,0.04,0.34,0.0,0.0824,0.577,Male
3,Far Away From,Chill,57,132800,0.663,0.236,0.0507,0.892,0.302,0.0788,0.236,Male
4,War With Her,Chill,64,193373,0.622,0.671,0.026,0.014,0.304,0.304,0.572,Male
5,Waves,Chill,66,133747,0.84,0.338,0.0436,0.135,0.0225,0.0635,0.927,Unknown
6,Say Goodbye,Chill,64,150415,0.706,0.297,0.132,0.468,0.0,0.0725,0.123,Male
7,Space Makes,Chill,57,136419,0.557,0.429,0.189,0.387,0.000478,0.192,0.596,Unknown
8,Tell Me Why (Taylor Swift),Chill,58,142267,0.684,0.0892,0.0609,0.981,0.951,0.261,0.223,Female
