In [1]:
from src.python_files.manual_clustering import get_clustering_genres
from sklearn.preprocessing import MultiLabelBinarizer
from src.python_files.spotify_helper import get_genres
import pandas as pd

# Reading data from a CSV file
data = pd.read_csv('../data.csv')[:-10]

data["popularity"] = data["popularity"].apply(lambda pop: pop/100)
data["tempo"] = data["tempo"].apply(lambda tpo: tpo/100)
data["loudness"] = data["loudness"].apply(lambda ldn: ldn/40)
data["popularity"] = data["popularity"].fillna(data["popularity"].median())
data["genres"] = data["genres"].fillna("")
data["genres"] = data["genres"].str.split(".")

df = pd.DataFrame(data)

df = df.drop(columns=['songname','artist', 'id'])

# Define input features and target variables
features = df[['time', 'dayofweek', 'month', 'temp']]
targets = df.drop(columns=['time', 'dayofweek', 'month', 'temp', 'genres'])

# spotify_genre_seeds = get_genres()['genres']
spotify_genre_seeds = get_clustering_genres()

for idx in range(len(data['genres'])):
    song_genres = data['genres'][idx]
    simplified_song_genres = []
    for song_genre in song_genres:
        for genre in spotify_genre_seeds:
            if genre in song_genre:
                simplified_song_genres.append(genre)
    data.at[idx, 'genres'] = simplified_song_genres

In [2]:
from src.python_files.num_predict import train_gradient_boosting_regressor
from src.python_files.cat_predict import train_random_forest_classifier


classifier_target = data['genres']

mlb = MultiLabelBinarizer()
classifier_target = mlb.fit_transform(classifier_target)

classifier_target = pd.DataFrame(classifier_target, columns=mlb.classes_)

model = train_random_forest_classifier(features, classifier_target)[0]

features = pd.concat([features, classifier_target], axis=1)

multi_output_gbr = train_gradient_boosting_regressor(features, targets)[0]

In [3]:
from src.python_files.record import get_weather_info, get_hour_info, get_weekday_info, get_month_info
import nest_asyncio
import asyncio

nest_asyncio.apply()

temp = float(asyncio.run(get_weather_info()))
time = float(get_hour_info())
day = float(get_weekday_info())
month = float(get_month_info())

In [4]:
import numpy as np

# time, dow, month, temp
cat_predict_value = pd.DataFrame([[time, day, month, temp]], columns=["time", "dayofweek", "month", "temp"])

class_predict = model.predict(cat_predict_value)

probabilities = model.predict_proba(cat_predict_value)

genre_prediction = []
# idx, value
top_prob = [0, 0]

for prob_idx in range(len(probabilities)):
    if len(probabilities[prob_idx][0]) == 2 and probabilities[prob_idx][0][1] > .2:
        genre_prediction.append(1)
    else:
        genre_prediction.append(0)
            
genre_prediction = np.array(genre_prediction).reshape(1, -1)

genre_df = pd.DataFrame(genre_prediction, columns=mlb.classes_)

num_predict_value = pd.concat([pd.DataFrame([[time, day, month, temp]], columns=["time", "dayofweek", "month", "temp"]), genre_df], axis=1)
    
# Make predictions
y_pred = multi_output_gbr.predict(num_predict_value)
 
print(mlb.inverse_transform(genre_prediction))


[('pop',)]


In [5]:
dataset = pd.read_csv('../dataset.csv')

predicted_cols=['popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

ignore_cols = ["duration_ms", "popularity", "loudness"]

prediction_weighting = [
    1, #popularity
    1, #danceability
    1.5, #energy
    0, #loudness
    1, #speechiness
    .9, #acousticness
    .9, #instrumentalness
    .1, #liveness
    1.5, #valence
    2.5, #tempo
    1 #duration ms
]

data = dataset

dataset["popularity"] = dataset["popularity"].apply(lambda pop: pop/100)
dataset["tempo"] = dataset["tempo"].apply(lambda tpo: tpo/100)
dataset["loudness"] = dataset["loudness"].apply(lambda ldn: ldn/40)

song_similarity_dict = {}

def calc_similarity_ratings(data):
        accuracy_score = 0
        popularity_thresh = False
        for col_idx in range(len(predicted_cols)):
            if predicted_cols[col_idx] not in ignore_cols:
                # need to figure out how to do this for best results
                if (y_pred[0][col_idx] > .5 and data[predicted_cols[col_idx]][i] > .5) or (y_pred[0][col_idx] < .5 and data[predicted_cols[col_idx]][i] < .5):
                    accuracy_score += abs(y_pred[0][col_idx] - data[predicted_cols[col_idx]][i]) * prediction_weighting[col_idx]
                else:
                    accuracy_score += abs(pow(y_pred[0][col_idx] - data[predicted_cols[col_idx]][i], 2)) * prediction_weighting[col_idx]  
        if data["popularity"][i] > .7:
            popularity_thresh = True
        song_artists = data["artist"][i]
        
        return accuracy_score, popularity_thresh, song_artists

for i in range(len(data["songname"])):
    if data["songname"][i] not in song_similarity_dict.keys():
        accuracy_score, popularity_thresh, song_artists = calc_similarity_ratings(data)
        song_similarity_dict[data["songname"][i]] = {"accuracy" : accuracy_score, "popularity" : popularity_thresh, "artists" : song_artists, "data_idx" : i}
    

In [6]:
def get_closest_song(drop_score):
    best_score = 10000
    best_song_name = ""
    best_artists_name = ""
    
    for songname in song_similarity_dict.keys():
        
        song = song_similarity_dict[songname]
        
        if song["accuracy"] < best_score and songname not in best_names and song["popularity"]:
            best_score = song["accuracy"]
            best_song_name = songname
            best_artists_name = song["artists"]
        
    if best_score >= drop_score:
        return "DROP", "DROP"
    else:
        return best_song_name, best_artists_name
    

In [7]:
from src.python_files.spotify_helper import get_recs
from src.python_files.record import get_spotify_audio_features

drop_score = 100
playlist_length = 50

best_names = []
best_artists = []

top_song, top_artist = get_closest_song(drop_score)

# if top_song != "DROP":
#     recs = get_recs(top_song, top_artist)
#     rec_data = []
#     for rec in recs:
#         print(get_spotify_audio_features({"playback" : rec}))
    
"""
    danceability
energy
speechiness
acousticness
instrumentalness
liveness
valence
tempo
"""
    

for x in range(playlist_length):
    best_song_name, best_artists_name = get_closest_song(drop_score)
    
    if best_song_name == "DROP":
        break
    
    # print(best_song_name + " score: " + str(best_score) + " by " + song_similarity_dict[best_song_name]["artists"])

    best_names.append(best_song_name)
    best_artists.append(song_similarity_dict[best_song_name]["artists"])
    
print(best_names)
# print(best_artists)

['Breathe (In the Air)', 'this is what falling in love feels like', 'Tum Se Hi', 'Chamber Of Reflection', 'Mrs Magic', 'The Other Side Of Paradise', 'Lucky', 'Pal Pal Dil Ke Paas- Title Track', 'Water Fountain', 'What A Time (feat. Niall Horan)', 'Buttercup', 'Welcome Home, Son', 'Father And Son', 'Agar Tum Saath Ho', 'Agar Tum Saath Ho (From "Tamasha")', 'Let It Go - From "Frozen"/Soundtrack Version', 'Goodbye Yellow Brick Road - Remastered 2014', 'Hold My Girl', 'Mr Loverman', "Rocket Man (I Think It's Going To Be A Long, Long Time)", 'Rock and A Hard Place', 'Angie', 'Happy Together', 'Make You Say', 'I Say a Little Prayer', 'Comfortably Numb', 'Something - Remastered 2009', 'Vienna', 'Slow Dancing in a Burning Room', 'High Enough - RAC Remix', 'Take Me To Church', 'Die For You', 'we fell in love in october', 'Khairiyat', 'Pal', 'like i need u', 'Here Comes The Sun - Remastered 2009', 'Shayad', 'Serendipity - Full Length Edition', "Shouldn't Be", 'abcdefu', 'Killing Me Softly With H

In [8]:
import os
from dotenv import load_dotenv
from src.python_files.spotify_helper import update_playlist

load_dotenv()

playlist_id = os.getenv('PLAYLIST_ID')
update_playlist(best_names, best_artists, playlist_id)

# update_playlist(best_names, best_artists)

In [9]:
from tabulate import tabulate

RED = "\033[91m"
GREEN = "\033[92m"
RESET = "\033[0m"

pred_act = [["track", "accuracy"], ["", "0"]]

for col_idx in range(len(predicted_cols)):
    if predicted_cols[col_idx] not in ignore_cols:
        pred_act[0].append(predicted_cols[col_idx])
        pred_act[1].append(str(round(y_pred[0][col_idx], 3)))
    
outlier_dist = .07
    
for name in best_names:
    r = song_similarity_dict[name]["data_idx"]
    temp_arr = [round(song_similarity_dict[name]["accuracy"], 3)]
    for c in predicted_cols:
        if c not in ignore_cols:
            cr_data = data[c][r]
            if abs(float(pred_act[1][len(temp_arr) + 1]) - cr_data) < outlier_dist:
                temp_arr.append(str(data[c][r]))
            elif float(pred_act[1][len(temp_arr) + 1]) - cr_data > outlier_dist:
                temp_arr.append(RED + str(data[c][r]) + RESET)
            else:
                temp_arr.append(GREEN + str(data[c][r]) + RESET)
    temp_arr.insert(0, name[:20])
    pred_act.append(temp_arr)
        
print(tabulate(pred_act, headers='firstrow', tablefmt='plain')) 

track                   accuracy    danceability    energy    speechiness    acousticness    instrumentalness    liveness    valence    tempo
                           0               0.611     0.506         0.056           0.389             0.407         0.216       0.393  1.295
Breathe (In the Air)       0.425           [91m0.431[0m     [91m0.373[0m         0.0346          0.389             [92m0.728[0m         [91m0.143[0m       [91m0.253[0m  1.28153
this is what falling       0.577           [91m0.422[0m     0.44          0.0544          [92m0.617[0m             [91m0[0m             [91m0.0837[0m      0.332  1.28934
Tum Se Hi                  0.591           0.609     0.538         0.0273          0.328             [91m0[0m             [91m0.125[0m       [92m0.608[0m  1.30015
Chamber Of Reflectio       0.599           [91m0.538[0m     0.557         0.0914          [91m0.262[0m             [92m0.914[0m         [91m0.102[0m       [92m0.506[0m  1.310