In [4]:
import random

import pandas as pd
import numpy as np
from scipy import stats

# from oauth2client.service_account import ServiceAccountCredentials 
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials 

In [5]:
CREDENTIALS_PATH_GOOGLE = '../google-credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
SPREADSHEET = '1b75J-QTGrujSgF9r0_JPOKkcXAwzFVwpETOAyVBw8ak'

In [6]:
# Load service account credentials.
__credentials = Credentials.from_service_account_file(CREDENTIALS_PATH_GOOGLE, scopes=SCOPES)

# Creates Google Sheets API (v4/latest) service.
service = build('sheets', 'v4', credentials=__credentials)

FileNotFoundError: [Errno 2] No such file or directory: '../google-credentials.json'

In [38]:
# Gets values from Ach! Musik: Notations sheet.
values = service.spreadsheets().values().get(spreadsheetId=SPREADSHEET, range='Notations').execute()['values']
headers = values.pop(0)

In [39]:
# Format data as pd.DataFrame
data = pd.DataFrame(values, columns=headers)
data

Unnamed: 0,genre,sub_genre,artist,album,song,Qu,Gr,Vi,Ro,Sa,Gl,Rx,Cl,Lu
0,Rock,Alternative,4 Non Blondes,"Bigger, Better, Faster, More!",What's Up,875,85,,87,,,,,
1,Pop,Synth,A-Ha,Hunting High and Low,Take on Me,8,,,9,,,,,
2,Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",8,85,5,8,,,,,5
3,Pop,Disco,Abba,Abba,Mamma Mia,85,875,65,83,,6,,,
4,Pop,Disco,Abba,Abba,SOS,9,9,7,84,,625,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2051,Hip-Hop,Rap,"Dr. Dre, Snoop Dogg",2001,The Next Episode,,82,,,,,,,
2052,Hip-Hop,Rap,"Dr. Dre, Eminem",2001,Forgot About Dre,,7,,,,,,,
2053,Hip-Hop,Rap,"Dr. Dre, Snoop Dogg",The Chronic,"Nuthin' But A ""G"" Thang",,775,,,,,,,
2054,Hip-Hop,Rap,2Pac,All Eyez On Me,Ambitionz Az A Ridah,,775,,,,,,,


In [40]:
# Saving as csv for later use
data.to_csv("../data/achmusik.csv", index=False, decimal=",")

In [7]:
data = pd.read_csv("../data/achmusik.csv")
data

Unnamed: 0,genre,sub_genre,artist,album,song,Qu,Gr,Vi,Ro,Sa,Gl,Rx,Cl,Lu
0,Rock,Alternative,4 Non Blondes,"Bigger, Better, Faster, More!",What's Up,875,85,,87,,,,,
1,Pop,Synth,A-Ha,Hunting High and Low,Take on Me,8,,,9,,,,,
2,Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",8,85,5,8,,,,,5
3,Pop,Disco,Abba,Abba,Mamma Mia,85,875,65,83,,6,,,
4,Pop,Disco,Abba,Abba,SOS,9,9,7,84,,625,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2051,Hip-Hop,Rap,"Dr. Dre, Snoop Dogg",2001,The Next Episode,,82,,,,,,,
2052,Hip-Hop,Rap,"Dr. Dre, Eminem",2001,Forgot About Dre,,7,,,,,,,
2053,Hip-Hop,Rap,"Dr. Dre, Snoop Dogg",The Chronic,"Nuthin' But A ""G"" Thang",,775,,,,,,,
2054,Hip-Hop,Rap,2Pac,All Eyez On Me,Ambitionz Az A Ridah,,775,,,,,,,


In [8]:
# Getting the decimals right -- commas to points and no more Nones
data = data.set_index(["genre", "sub_genre", "artist", "album", "song"])
data.fillna(value="", inplace=True)

for i in range(data.columns.size):
    data[data.columns[i]] = data[data.columns[i]].str.replace(",", ".")
    data[data.columns[i]] = pd.to_numeric(data[data.columns[i]], errors='coerce')

In [49]:
kept_people = ["Qu", "Gr", "Vi", "Ro"]
default_grade = 5

# Keeping only present people at the hypothetical party!
data = data.filter(kept_people)

# Hard to do this shit inplace -- if no grades at all, give it a chance to play with default grade
data = data.dropna(how="all").append(data[data.isnull().all(axis=1)].fillna(default_grade))
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Qu,Gr,Vi,Ro
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Rock,Alternative,4 Non Blondes,"Bigger, Better, Faster, More!",What's Up,8.75,8.50,,8.7
Pop,Synth,A-Ha,Hunting High and Low,Take on Me,8.00,,,9.0
Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",8.00,8.50,5.0,8.0
Pop,Disco,Abba,Abba,Mamma Mia,8.50,8.75,6.5,8.3
Pop,Disco,Abba,Abba,SOS,9.00,9.00,7.0,8.4
...,...,...,...,...,...,...,...,...
Metal,Symphonique,Within Temptation,The Unforgiving,Murder,5.00,5.00,5.0,5.0
Metal,Symphonique,Within Temptation,The Unforgiving,Shot in the Dark,5.00,5.00,5.0,5.0
Metal,Symphonique,Within Temptation,The Unforgiving,Sinead,5.00,5.00,5.0,5.0
Metal,Symphonique,Within Temptation,The Unforgiving,Stairway to the Skies,5.00,5.00,5.0,5.0


## Score voting

In [69]:
COUNT_FACTOR = .3
COUNT_INHIB = len(kept_people) // 2
MIN_SCORE = 6
PLAYLIST_SIZE = 200
ELIMINATING_GRADE = 4.9

# To avoid having to hard code the amount of columns for cases where the next cell is re-ran, we initialize columns
data["mean"] = 0
data["count"] = 0
data["score"] = 0
data["rank"] = 0

In [70]:
# Mean of all notes for each track
data["mean"] = data[data.columns[:-4]].mean(axis=1)

# Amount of notes for each track
data["count"] = data.count(axis=1) - 4

# Helping songs graded by more people in the group
data["score"] = data["mean"] + (COUNT_FACTOR * (data["count"] - COUNT_INHIB))

# Truncating to keep only the acceptable songs
data = data[data["score"] > MIN_SCORE]

data = data.sort_values("score", ascending=False)
data["rank"] = data["score"].rank(method="min")
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Qu,Gr,Vi,Ro,mean,count,score,rank
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Rock,Psychedelic,David Bowie,David Bowie,Space Oddity,9.75,8.75,9.75,9.0,9.3125,4,9.9125,1859.0
Rock,,The Beatles,Abbey Road,Here Comes The Sun,10.00,9.25,9.50,8.4,9.2875,4,9.8875,1858.0
Rock,,Dire Straits,Dire Straits,Sultans Of Swing,10.00,9.25,9.50,8.3,9.2625,4,9.8625,1857.0
Rock,Glam,David Bowie,Hunky Dory,Life on Mars,9.50,9.50,8.50,9.2,9.1750,4,9.7750,1856.0
Rock,,The Beatles,The Beatles,While My Guitar Gently Weeps,9.75,9.50,,9.1,9.4500,3,9.7500,1855.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Metal,Symphonique,Within Temptation,Mother Earth,Never Ending Story,6.50,,,,6.5000,1,6.2000,5.0
Hip-Hop,,Caballero,Laisse Nous Faire Vol.1,Discret Mais Efficace,6.25,,6.00,,6.1250,2,6.1250,1.0
Hip-Hop,,Caballero,Laisse Nous Faire Vol.1,Patinoire,5.25,,7.00,,6.1250,2,6.1250,1.0
Jazz,Hip-Hop,Kenichiro Nishihara,Jazzy Folklore,My Love My Life,6.25,,6.00,,6.1250,2,6.1250,1.0


In [71]:
# Removing tracks with at least one grade under the minimum required
data = data[data[data.columns[:-4]].min(axis=1) > ELIMINATING_GRADE]
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Qu,Gr,Vi,Ro,mean,count,score,rank
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Rock,Psychedelic,David Bowie,David Bowie,Space Oddity,9.75,8.75,9.75,9.0,9.3125,4,9.9125,1859.0
Rock,,The Beatles,Abbey Road,Here Comes The Sun,10.00,9.25,9.50,8.4,9.2875,4,9.8875,1858.0
Rock,,Dire Straits,Dire Straits,Sultans Of Swing,10.00,9.25,9.50,8.3,9.2625,4,9.8625,1857.0
Rock,Glam,David Bowie,Hunky Dory,Life on Mars,9.50,9.50,8.50,9.2,9.1750,4,9.7750,1856.0
Rock,,The Beatles,The Beatles,While My Guitar Gently Weeps,9.75,9.50,,9.1,9.4500,3,9.7500,1855.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Metal,Symphonique,Within Temptation,Mother Earth,Never Ending Story,6.50,,,,6.5000,1,6.2000,5.0
Hip-Hop,,Caballero,Laisse Nous Faire Vol.1,Discret Mais Efficace,6.25,,6.00,,6.1250,2,6.1250,1.0
Hip-Hop,,Caballero,Laisse Nous Faire Vol.1,Patinoire,5.25,,7.00,,6.1250,2,6.1250,1.0
Jazz,Hip-Hop,Kenichiro Nishihara,Jazzy Folklore,My Love My Life,6.25,,6.00,,6.1250,2,6.1250,1.0


In [315]:
if PLAYLIST_SIZE < 1:
    playlist = data.sample(frac=PLAYLIST_SIZE, weights="rank")
else:
    playlist = data.sample(n=PLAYLIST_SIZE, weights="rank")

playlist

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Qu,Gr,Vi,Ro,mean,count,score,rank
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Dance,Electro,French 79,Olympic,Vertigo Valley,8.25,,7.0,,7.625000,2,7.625000,563.0
Rock,,The Beatles,Revolver,Eleanor Rigby,8.50,9.00,8.5,,8.666667,3,8.966667,1724.0
Rock,Progressive,Pink Floyd,The Dark Side Of The Moon,Breathe (In The Air),9.50,8.75,,,9.125000,2,9.125000,1786.0
Rock,Rockabilly,Elvis Presley,,Hound Dog,8.25,8.50,,,8.375000,2,8.375000,1269.0
Rock,Progressive,Supertramp,Breakfast in America,Oh Darling,10.00,8.75,8.0,,8.916667,3,9.216667,1807.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Metal,Industrial,Marilyn Manson,The Pale Emperor,Killing Strangers,8.50,,,8.9,8.700000,2,8.700000,1568.0
Electro,Disco,Todd Terje,It's Album Time,Strandbar,9.00,,8.5,,8.750000,2,8.750000,1608.0
Jazz,OST,Kristofer Maddigan,Cuphead,The King's Court,8.25,,7.5,,7.875000,2,7.875000,778.0
Electro,Pop,Parcels,Live Vol. 1,Enter,8.75,,9.0,5.5,7.750000,3,8.050000,988.0


## genre matrix and distances

In [316]:
DEFAULT_TRANSITION = "4,0"
DEFAULT_THRESHOLD = 7

transitions = pd.read_csv("../data/transitions.csv").fillna(DEFAULT_TRANSITION)
transitions.index = transitions["Unnamed: 0"]
transitions.drop("Unnamed: 0", axis=1, inplace=True)

# Getting the decimals right -- commas to points and no more Nones
for i in range(transitions.columns.size):
    transitions[transitions.columns[i]] = transitions[transitions.columns[i]].str.replace(",", ".")
    transitions[transitions.columns[i]] = pd.to_numeric(transitions[transitions.columns[i]], errors='coerce')

playlist.reset_index(inplace=True)

In [317]:
# This is so horribly un-optimized... May the Python Lords forgive me.

shuffled_playlist = playlist.iloc[0:1].drop(playlist.columns[5:], axis=1)
playlist.drop(0, inplace=True)
current_genre = shuffled_playlist.iloc[0]["genre"]
current_artist = shuffled_playlist.iloc[0]["artist"]

In [320]:
threshold = 8
chain = 0
chain_factor = .6
desperation_factor = .6
remove_indices = []

while playlist.size > 0:
    for row in playlist.iterrows():
        if (transitions[current_genre][row[1]["genre"]] + (chain * chain_factor) > threshold and row[1]["artist"] != current_artist) or threshold < 0:
            # Song accepted -- increment or reset chain
            if current_genre == row[1]["genre"]:
                chain += 1
            else:
                current_genre = row[1]["genre"]
                chain = 0

            # Add song to shuffled playlist and its index to a list for further removal
            shuffled_playlist = shuffled_playlist.append(playlist.loc[row[0]].drop(playlist.columns[5:]))
            remove_indices.append(row[0])
            threshold = DEFAULT_THRESHOLD
            
    # Removing songs that were added during the for loop
    if remove_indices:
        playlist.drop(remove_indices, inplace=True)
        remove_indices = []
    else:
        threshold -= desperation_factor

shuffled_playlist

Unnamed: 0,genre,sub_genre,artist,album,song
0,Dance,Electro,French 79,Olympic,Vertigo Valley
1,Rock,,The Beatles,Revolver,Eleanor Rigby
2,Rock,Progressive,Pink Floyd,The Dark Side Of The Moon,Breathe (In The Air)
3,Rock,Rockabilly,Elvis Presley,,Hound Dog
4,Rock,Progressive,Supertramp,Breakfast in America,Oh Darling
...,...,...,...,...,...
194,Metal,Progressive,Tool,Fear Inoculum,Pneuma
195,Metal,Industrial,Marilyn Manson,The Pale Emperor,Killing Strangers
196,Electro,Disco,Todd Terje,It's Album Time,Strandbar
152,Electro,Alternative,Gorillaz,Gorillaz,M1 A1


In [306]:
shuffled_playlist.to_csv("../test.csv", index=False)

In [249]:
transitions

Unnamed: 0_level_0,Acapella,Acoustic,Blues,Chanson,Classique,Country,Dance,Disco,Electro,Folk,Funk,Hip-Hop,Jazz,Metal,Musical,OST,Pop,R&B,Reggae,Rock
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Acapella,10,4.0,4,4.0,4.0,4.0,325,4,4.0,4.0,4,4.0,4,1,4.0,4.0,4.0,4.0,4.0,4
Acoustic,4,10.0,4,4.0,4.0,4.0,4,4,4.0,4.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Blues,4,4.0,10,4.0,4.0,4.0,2,4,4.0,4.0,35,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Chanson,4,4.0,4,10.0,4.0,4.0,4,4,4.0,4.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Classique,4,4.0,4,4.0,10.0,4.0,4,4,4.0,4.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Country,4,4.0,4,4.0,4.0,10.0,4,4,4.0,4.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Dance,325,4.0,2,4.0,4.0,4.0,10,4,4.0,4.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Disco,4,4.0,4,4.0,4.0,4.0,4,10,4.0,4.0,4,4.0,4,1,4.0,4.0,4.0,4.0,4.0,575
Electro,4,4.0,4,4.0,4.0,4.0,4,4,10.0,4.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4
Folk,4,4.0,4,4.0,4.0,4.0,4,4,4.0,10.0,4,4.0,4,4,4.0,4.0,4.0,4.0,4.0,4


# Playground

In [42]:
data = data.reset_index()

In [150]:
COL = ["Qu", "Gr", "Vi"]
BY = "artist"
AMNT = 10

best = data[[BY, *COL]].dropna(how="any").groupby(BY).filter(lambda x: len(x) >= AMNT).groupby(BY).mean()[COL]
best[COL].mean(axis=1).sort_values(ascending=False).head(10)

artist
The Beatles         8.296474
The Doors           8.238636
Pink Floyd          8.069444
System Of A Down    7.986667
Ghost               7.800000
Daft Punk           7.732143
Gojira              7.666667
Gorillaz            7.601852
Muse                7.568182
Alice in Chains     7.282051
dtype: float64