In [1]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer
import pandas as pd

# Reading data from a CSV file
data = pd.read_csv('../data.csv')

data["popularity"] = data["popularity"].apply(lambda pop: pop/100)
data["tempo"] = data["tempo"].apply(lambda tpo: tpo/100)
data["loudness"] = data["loudness"].apply(lambda ldn: ldn/40)
data["popularity"] = data["popularity"].fillna(data["popularity"].median())
data["genres"] = data["genres"].fillna("")
data["genres"] = data["genres"].str.split(".")

df = pd.DataFrame(data)[:-10]

df = df.drop(columns=['songname','artist', 'id'])

# Define input features and target variables
features = df[['time', 'dayofweek', 'month', 'temp']]
targets = df.drop(columns=['time', 'dayofweek', 'month', 'temp', 'genres'])

classifier_target = df['genres']

mlb = MultiLabelBinarizer()
classifier_target = mlb.fit_transform(classifier_target)

In [2]:
from sklearn.preprocessing import LabelEncoder
from src.python_files.num_predict import train_gradient_boosting_regressor
from src.python_files.cat_predict import train_random_forest_classifier

model = train_random_forest_classifier(features, classifier_target)[0]

genres_arr = []
for item in df['genres']:
    genres_arr.append(item[0])
le = LabelEncoder()
genres_encoded = le.fit_transform(genres_arr)
features.insert(4, 'genres', genres_encoded)

multi_output_gbr = train_gradient_boosting_regressor(features, targets)[0]

In [3]:
from src.python_files.record import get_weather_info, get_hour_info, get_weekday_info, get_month_info
import nest_asyncio
import asyncio

nest_asyncio.apply()

temp = float(asyncio.run(get_weather_info()))
time = float(get_hour_info())
day = float(get_weekday_info())
month = float(get_month_info())

In [4]:
from rapidfuzz import fuzz

# time, dow, month, temp
cat_predict_value = pd.DataFrame([[time, day, month, temp]], columns=["time", "dayofweek", "month", "temp"])

class_predict = model.predict(cat_predict_value)

probabilities = model.predict_proba(cat_predict_value)

top_probs_idx = []
# idx, value
top_prob = [0, 0]

for prob_idx in range(len(probabilities)):
    if len(probabilities[prob_idx][0]) == 2 and probabilities[prob_idx][0][1] > .1:
        top_probs_idx.append(prob_idx)
        if top_prob[1] < probabilities[prob_idx][0][1] and mlb.classes_[prob_idx] != "":
            top_prob[1] = probabilities[prob_idx][0][1]
            top_prob[0] = prob_idx
            
comparison_cats = []

for idx in top_probs_idx:
    if mlb.classes_[idx] == "":
        continue
    print(mlb.classes_[idx])
    print(fuzz.token_set_ratio(mlb.classes_[top_prob[0]], mlb.classes_[idx]))
    if fuzz.token_set_ratio(mlb.classes_[top_prob[0]], mlb.classes_[idx]) >= 10:
        comparison_cats.append(mlb.classes_[idx])

num_predict_cats = le.transform([comparison_cats[0]])
    
num_predict_value = pd.DataFrame([[time, day, month, temp, num_predict_cats]], columns=["time", "dayofweek", "month", "temp", "genres"])    

# Make predictions
y_pred = multi_output_gbr.predict(num_predict_value)
 
print(comparison_cats)
# print(y_pred)
# print((df.drop(columns=['time', 'dayofweek', 'month', 'temp', 'genres'])).keys())


 alt z
100.0
hip hop
16.66666666666667
indie hip hop
11.111111111111114
indie r&b
14.285714285714292
pop
0.0
rap
25.0
[' alt z', 'hip hop', 'indie hip hop', 'indie r&b', 'rap']


In [5]:
dataset = pd.read_csv('../dataset.csv')

predicted_cols=['popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

ignore_cols = ["duration_ms", "popularity", "loudness"]

prediction_weighting = [
    1, #popularity
    1, #danceability
    1.5, #energy
    0, #loudness
    1, #speechiness
    .9, #acousticness
    .9, #instrumentalness
    .1, #liveness
    1.5, #valence
    2.5, #tempo
    1 #duration ms
]

# data = dataset
# 
# dataset["popularity"] = dataset["popularity"].apply(lambda pop: pop/100)
# dataset["tempo"] = dataset["tempo"].apply(lambda tpo: tpo/100)
# dataset["loudness"] = dataset["loudness"].apply(lambda ldn: ldn/40)

song_similarity_dict = {}

def calc_similarity_ratings(data):
        accuracy_score = 0
        intersection = False
        popularity_thresh = False
        for col_idx in range(len(predicted_cols)):
            if predicted_cols[col_idx] not in ignore_cols:
                # need to figure out how to do this for best results
                if (y_pred[0][col_idx] > .5 and data[predicted_cols[col_idx]][i] > .5) or (y_pred[0][col_idx] < .5 and data[predicted_cols[col_idx]][i] < .5):
                    accuracy_score += abs(y_pred[0][col_idx] - data[predicted_cols[col_idx]][i]) * prediction_weighting[col_idx]
                else:
                    accuracy_score += abs(pow(y_pred[0][col_idx] - data[predicted_cols[col_idx]][i], 2)) * prediction_weighting[col_idx]
        for genre in data["genres"][i]:
            for comparison_genre in comparison_cats: 
                if fuzz.token_set_ratio(genre, comparison_genre) >= 50:
                    intersection = True      
        if not intersection:
            accuracy_score += 0
        if data["popularity"][i] > y_pred[0][predicted_cols.index("popularity")] - 10:
            popularity_thresh = True
        song_artists = data["artist"][i]
        
        return accuracy_score, intersection, popularity_thresh, song_artists

for i in range(len(data["songname"])):
    if data["songname"][i] not in song_similarity_dict.keys():
        accuracy_score, intersection, popularity_thresh, song_artists = calc_similarity_ratings(data)
        song_similarity_dict[data["songname"][i]] = {"accuracy" : accuracy_score, "intersection" : intersection, "popularity" : popularity_thresh, "artists" : song_artists, "data_idx" : i}
    

In [6]:
def get_closest_song(drop_score):
    best_score = 10000
    best_song_name = ""
    best_artists_name = ""
    
    for songname in song_similarity_dict.keys():
        
        song = song_similarity_dict[songname]

        if song["accuracy"] < best_score and songname not in best_names and song["popularity"] - 10:
            best_score = song["accuracy"]
            best_song_name = songname
            best_artists_name = song["artists"]
        
    if best_score >= drop_score:
        return "DROP", "DROP"
    else:
        return best_song_name, best_artists_name
    

In [7]:
from src.python_files.spotify_helper import get_recs
from src.python_files.record import get_spotify_audio_features

drop_score = 1
playlist_length = 50

best_names = []
best_artists = []

top_song, top_artist = get_closest_song(drop_score)

# if top_song != "DROP":
#     recs = get_recs(top_song, top_artist)
#     rec_data = []
#     for rec in recs:
#         print(get_spotify_audio_features({"playback" : rec}))
    
"""
    danceability
energy
speechiness
acousticness
instrumentalness
liveness
valence
tempo
"""
    

for x in range(playlist_length):
    best_song_name, best_artists_name = get_closest_song(drop_score)
    
    if best_song_name == "DROP":
        break
    
    # print(best_song_name + " score: " + str(best_score) + " by " + song_similarity_dict[best_song_name]["artists"])

    best_names.append(best_song_name)
    best_artists.append(song_similarity_dict[best_song_name]["artists"])
    
print(comparison_cats)
print(best_names)
# print(best_artists)

[' alt z', 'hip hop', 'indie hip hop', 'indie r&b', 'rap']
['Cariño', 'Worth It.', 'Rollerblades', "I Love You I'm Sorry", 'Hush - Still Woozy Remix', 'us. (feat. Taylor Swift)', 'The Bird Song', 'Tu Corazón Es Mío...', "I've Been In Love", 'Passionfruit', 'C U Girl', 'Underdressed at the Symphony', 'Otro Atardecer', 'Risk', 'Musta Been a Ghost', 'Roommates', 'Dead Weight', 'In My Life - Remastered 2009', 'You (feat. Travis Scott)', 'Never Felt So Alone', 'I miss you I’m sorry', 'Amoeba', 'Leftovers', "Let's Stay Together", 'Matilda', 'Close The Door', 'Reckless & Sweet', 'safety', 'Where Are You Going', 'Pure', 'Reflections', 'People Watching', 'Linger - SiriusXM Session', 'Garden Kisses', 'Tadow', 'Everybody Wants To Rule The World', 'double take', 'FORFOURFORE', "That's Life", 'No One Noticed', 'SIDEKICK (with Joyce Wrice) - BONUS', 'Big Black Car', 'Slowly', 'lowkey', 'BIRDS OF A FEATHER', 'Beautiful Things', 'Sailor Song', 'Bruise', 'Smith & Westin', 'Jet Fuel']


In [8]:
import os
from dotenv import load_dotenv
from src.python_files.spotify_helper import update_playlist

load_dotenv()

playlist_id = os.getenv('PLAYLIST_ID')
update_playlist(best_names, best_artists, playlist_id)

# update_playlist(best_names, best_artists)

In [9]:
from tabulate import tabulate

RED = "\033[91m"
GREEN = "\033[92m"
RESET = "\033[0m"

pred_act = [["track", "accuracy"], ["", "0"]]

for col_idx in range(len(predicted_cols)):
    if predicted_cols[col_idx] not in ignore_cols:
        pred_act[0].append(predicted_cols[col_idx])
        pred_act[1].append(str(round(y_pred[0][col_idx], 3)))
    
outlier_dist = .07
    
for name in best_names:
    r = song_similarity_dict[name]["data_idx"]
    temp_arr = [round(song_similarity_dict[name]["accuracy"], 3)]
    for c in predicted_cols:
        if c not in ignore_cols:
            cr_data = data[c][r]
            if abs(float(pred_act[1][len(temp_arr) + 1]) - cr_data) < outlier_dist:
                temp_arr.append(str(data[c][r]))
            elif float(pred_act[1][len(temp_arr) + 1]) - cr_data > outlier_dist:
                temp_arr.append(RED + str(data[c][r]) + RESET)
            else:
                temp_arr.append(GREEN + str(data[c][r]) + RESET)
    temp_arr.insert(0, name[:20])
    pred_act.append(temp_arr)
        
print(tabulate(pred_act, headers='firstrow', tablefmt='plain')) 

track                   accuracy    danceability    energy    speechiness    acousticness    instrumentalness    liveness    valence    tempo
                           0               0.566     0.547         0.055            0.416            0.119         0.148       0.378  1.104
Cariño                     0.411           [92m0.646[0m     [91m0.47[0m          0.0284           0.4              [91m0.0348[0m        0.11        [92m0.651[0m  1.13995
Worth It.                  0.414           [92m0.736[0m     0.483         0.0682           [92m0.505[0m            [91m1.39e-05[0m      [92m0.351[0m       [92m0.606[0m  1.09932
Rollerblades               0.443           0.566     [91m0.448[0m         0.0799           [92m0.54[0m             [91m7.22e-05[0m      0.0956      [91m0.199[0m  1.10802
I Love You I'm Sorry       0.446           0.53      [91m0.406[0m         0.033            [92m0.656[0m            [91m0[0m             0.133       0.338  1.1594
Hush - 