In [159]:
import random

import pandas as pd
import numpy as np
from scipy import stats

# from oauth2client.service_account import ServiceAccountCredentials 
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials 

In [5]:
CREDENTIALS_PATH_GOOGLE = 'google-credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
SPREADSHEET = '1b75J-QTGrujSgF9r0_JPOKkcXAwzFVwpETOAyVBw8ak'

In [6]:
# Load service account credentials.
__credentials = Credentials.from_service_account_file(CREDENTIALS_PATH_GOOGLE, scopes=SCOPES)

# Creates Google Sheets API (v4/latest) service.
service = build('sheets', 'v4', credentials=__credentials)

In [22]:
# Gets values from Ach! Musik: Notations sheet.
values = service.spreadsheets().values().get(spreadsheetId=SPREADSHEET, range='Notations').execute()['values']
headers = values.pop(0)

In [31]:
# Format data as pd.DataFrame
data = pd.DataFrame(values, columns=headers)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ntQ,ntG,ntV,ntR,ntS,ntGl,ntRx,ntC,ntL
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",8,85,5,8,,,,,5
Pop,Disco,Abba,Abba,Mamma Mia,85,875,65,83,,6,,,
Pop,Disco,Abba,Abba,SOS,9,9,7,84,,625,,,
Pop,Disco,Abba,Arrival,"Knowing me, knowing you",75,85,55,65,,,,,
Pop,Disco,Abba,Super Trouper,Lay all your love on me,8,85,4,85,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Rock,Hard Rock,ZZ Top,Eliminator,Sharp Dressed Man,9,,85,9,,,,,
Rock,Hard Rock,ZZ Top,Eliminator,Thug,825,,875,,,,,,
Rock,Hard Rock,ZZ Top,Eliminator,TV Dinners,775,,7,,,,,,
Rock,Surf Vocal,The Beach Boys,All Summer Long,I Get Around,85,,,,,,,,


In [9]:
# Saving as csv for later use
data.to_csv("../data/achmusik.csv", index=True, decimal=",")

In [183]:
data = pd.read_csv("../data/achmusik.csv")

In [184]:
# Getting the decimals right -- commas to points and no more Nones
data = data.set_index(["genre", "sub_genre", "artist", "album", "song"])
data.fillna(value="", inplace=True)

for i in range(data.columns.size):
    data[data.columns[i]] = data[data.columns[i]].str.replace(",", ".")
    data[data.columns[i]] = pd.to_numeric(data[data.columns[i]], errors='coerce')

In [185]:
kept_people = ["Qu", "Gr", "Vi", "Ro"]
default_grade = 5

# Keeping only present people at the hypothetical party!
data = data.filter(kept_people)

# Hard to do this shit inplace -- if no grades at all, give it a chance to play with default grade
data = data.dropna(how="all").append(data[data.isnull().all(axis=1)].fillna(default_grade))
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Qu,Gr,Vi,Ro
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",8.0,8.50,5.0,8.0
Pop,Disco,Abba,Abba,Mamma Mia,8.5,8.75,6.5,8.3
Pop,Disco,Abba,Abba,SOS,9.0,9.00,7.0,8.4
Pop,Disco,Abba,Arrival,"Knowing me, knowing you",7.5,8.50,5.5,6.5
Pop,Disco,Abba,Super Trouper,Lay all your love on me,8.0,8.50,4.0,8.5
...,...,...,...,...,...,...,...,...
Metal,Symphonique,Within Temptation,The Unforgiving,Murder,5.0,5.00,5.0,5.0
Metal,Symphonique,Within Temptation,The Unforgiving,Shot in the Dark,5.0,5.00,5.0,5.0
Metal,Symphonique,Within Temptation,The Unforgiving,Sinead,5.0,5.00,5.0,5.0
Metal,Symphonique,Within Temptation,The Unforgiving,Stairway to the Skies,5.0,5.00,5.0,5.0


## Score voting

In [186]:
COUNT_FACTOR = .1
COUNT_INHIB = len(kept_people) // 2
MIN_SCORE = 5
PLAYLIST_SIZE = 300  # Can be an int or a fraction 0 < q <= 1

# To avoid having to hard code the amount of columns for cases where the next cell is re-ran, we initialize columns
data["mean"] = 0
data["count"] = 0
data["score"] = 0
data["rank"] = 0

In [187]:
# Mean of all notes for each track
data["mean"] = data[data.columns[:-4]].mean(axis=1)

# Amount of notes for each track
data["count"] = data.count(axis=1) - 4

# Helping songs graded by more people in the group
data["score"] = data["mean"] + (COUNT_FACTOR * (data["count"] - COUNT_INHIB))

# Truncating to keep only the acceptable songs
data = data[data["score"] > MIN_SCORE]

data = data.sort_values("score", ascending=False)
data["rank"] = data["score"].rank(method="min")
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Qu,Gr,Vi,Ro,mean,count,score,rank
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Rock,,Dire Straits,Dire Straits,Sultans Of Swing,10.00,,,,10.000000,1,9.900000,1685.0
Rock,Acoustic,Eric Clapton,Unplugged,Layla,10.00,,9.75,9.0,9.583333,3,9.683333,1683.0
Rock,,The Beatles,Abbey Road,Here Comes The Sun,10.00,9.25,9.50,,9.583333,3,9.683333,1683.0
Rock,Progressive,Pink Floyd,Animals,Pigs (Three Different Ones),9.75,,,,9.750000,1,9.650000,1682.0
Rock,,The Beatles,The Beatles,While My Guitar Gently Weeps,9.75,9.50,,9.1,9.450000,3,9.550000,1681.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Metal,Symphonique,Within Temptation,The Unforgiving,Where is the Edge,5.00,5.00,5.00,5.0,5.000000,4,5.200000,5.0
Pop,Indie,Charlotte Cardin,Main Girl,Main Girl,5.25,,,,5.250000,1,5.150000,4.0
Hip-Hop,Punk,Stupeflip,The Hypnoflip Invasion,Dangereux !!,5.25,,5.00,,5.125000,2,5.125000,3.0
Reggae,Ska-Punk,The Skints,Swimming Lessons,Gets on Top,,,,5.2,5.200000,1,5.100000,2.0


In [135]:
if PLAYLIST_SIZE < 1:
    playlist = data.sample(frac=PLAYLIST_SIZE, weights="rank")
else:
    playlist = data.sample(n=PLAYLIST_SIZE, weights="rank")

playlist

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ntQ,ntG,ntV,ntR,mean,count,score,rank
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Hip Hop,,MC Hammer,Please Hammer Don't Hurt 'Em,U Can't Touch This,7.75,,,,7.750000,1,7.650000,533.0
Metal,Melodic Death,Amon Amarth,Berserker,Crack the Sky,8.50,8.5,6.50,,7.833333,3,7.933333,765.0
Metal,Power Metal,Powerwolf,The Sacrament of Sin,Fist by Fist (Sacralize or Strike),,,,8.9,8.900000,1,8.800000,1309.0
Metal,Thrash Metal,Metallica,...And Justice for All,Blackened,,8.5,,,8.500000,1,8.400000,1058.0
Metal,Thrash,Megadeth,Rust in Peace,Hangar 18,9.00,9.0,,8.3,8.766667,3,8.866667,1318.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Rock,Hard Rock,ZZ Top,Eliminator,Thug,8.25,,8.75,,8.500000,2,8.500000,1137.0
Metal,Doom/Gothic,Paradise Lost,Draconian Times,Shadowkings,7.00,7.5,,,7.250000,2,7.250000,307.0
Jazz,OST,Kristofer Maddigan,Cuphead,Inkwell Hell,8.50,,8.00,,8.250000,2,8.250000,946.0
Metal,Progressive,Tool,Lateralus,Parabola,,9.0,,,9.000000,1,8.900000,1338.0


In [177]:
ADJACENT_GENRES = {"Metal": ["Rock", "Metal", "Grunge"], 
                   "Rock": ["Metal", "Rock", "Punk", "Grunge", "Pop", "Blues"], 
                   "Jazz": ["Classique", "Jazz", "Acoustic", "Blues"],
                   "Classique": ["Jazz", "Classique", "Acoustic"]
                   "Electro": ["Dance", "Pop", "Electro", "House"],
                   "Dance": ["Dance", "Electro", "Pop", "Disco", "House", "Funk"],
                   "Funk": ["Funk", "Disco", "House", "Pop"],
                   "Pop": ["Funk", "Disco", "House", "Electro", "Dance", "Rock"]}

# Listing genres to prepare distance matrix
genres_labels = pd.DataFrame([genre for genre in playlist.reset_index()["genre"].unique()], columns=["genre"])
# genres_labels["index"] = np.arange(0, len(genres), 1)
genres_labels

genres_labels

Unnamed: 0,genre
0,Hip Hop
1,Metal
2,Rock
3,Funk
4,Jazz
5,Electro
6,Disco
7,Chanson
8,Classique
9,Pop


In [163]:
# Rearranging playlist to avoid sudden genre changes
genres = [playlist for _, playlist in playlist.groupby("genre")]
random.shuffle(genres)

pd.concat(genres)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,ntQ,ntG,ntV,ntR,mean,count,score,rank
genre,sub_genre,artist,album,song,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Metal,Melodic Death,Amon Amarth,Berserker,Crack the Sky,8.50,8.5,6.50,,7.833333,3,7.933333,765.0
Metal,Power Metal,Powerwolf,The Sacrament of Sin,Fist by Fist (Sacralize or Strike),,,,8.9,8.900000,1,8.800000,1309.0
Metal,Thrash Metal,Metallica,...And Justice for All,Blackened,,8.5,,,8.500000,1,8.400000,1058.0
Metal,Thrash,Megadeth,Rust in Peace,Hangar 18,9.00,9.0,,8.3,8.766667,3,8.866667,1318.0
Metal,Progressive,Leprous,The Congregation,The Flood,9.00,8.0,,,8.500000,2,8.500000,1137.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Electro,Alt Rock,Gorillaz,Demon Days,Kids With Guns,9.75,,9.25,,9.500000,2,9.500000,1431.0
Electro,Pop,Parcels,Live Vol. 1,Redline,8.00,,8.75,,8.375000,2,8.375000,1027.0
Electro,Alt Rock,Gorillaz,Plastic Beach,On Melancholy Hill,8.50,,8.50,,8.500000,2,8.500000,1137.0
Electro,Pop,Parcels,Live Vol. 1,Overnight,8.25,,9.00,,8.625000,2,8.625000,1205.0


In [78]:
top_artists = data.groupby("artist").filter(lambda x: len(x) > 4).groupby("artist").mean()["mean"].sort_values(ascending=False).tail(10)
top_artists

artist
Nightwish            6.856818
Leprous              6.773551
Paradise Lost        6.763889
Stand High Patrol    6.655556
The Skints           6.631250
Herbie Hancock       6.621429
Happy End            6.543403
Within Temptation    6.177604
Neurosis             5.879167
The Pretenders       5.590909
Name: mean, dtype: float64

## Transforming notes into ranks

In [102]:
# Default pandas ranking
for i in range (5, data.columns.size):
    data[data.columns[i]] = data[data.columns[i]].rank()
data

Unnamed: 0,genre,sub_genre,artist,album,song,ntQ,ntG,ntV,ntR,ntS,ntGl,ntRx,ntC,ntL
0,Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",701.5,347.0,43.5,107.0,,,,,3.0
1,Pop,Disco,Abba,Abba,Mamma Mia,980.5,434.0,162.0,131.0,,8.5,,,
2,Pop,Disco,Abba,Abba,SOS,1170.0,503.5,242.5,138.5,,11.0,,,
3,Pop,Disco,Abba,Arrival,"Knowing me, knowing you",377.5,347.0,58.5,26.0,,,,,
4,Pop,Disco,Abba,Super Trouper,Lay all your love on me,701.5,347.0,16.0,143.5,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1485,Rock,Hard Rock,ZZ Top,Eliminator,If I Could Only Flag Her Down,377.5,,242.5,,,,,,
1486,Rock,Hard Rock,ZZ Top,Eliminator,Legs,1219.5,,461.5,150.5,,,,10.0,
1487,Rock,Hard Rock,ZZ Top,Eliminator,Sharp Dressed Man,1170.0,,582.5,179.0,,,,,
1488,Rock,Hard Rock,ZZ Top,Eliminator,Thug,846.0,,641.5,,,,,,
