In [1]:
import pandas as pd
import seaborn as sns

import math

from data import load_from_api

In [13]:
data = pd.read_csv("../data/csv/achmusik.csv")

In [3]:
people=None
count_factor=.05
inhib_factor=2
min_score=7
size=200
default_grade=5
eliminating_grade=4.6

In [215]:
if people is None:
    people = ["Qu", "Vi", "Ro"]

for i in range(data.columns.size):
    data[data.columns[i]] = pd.to_numeric(data[data.columns[i]], errors='coerce')

# Keeping only present people at the hypothetical party!
data = data.filter(people)

# Hard to do this shit inplace -- if no grades at all, give it a chance to play with default grade
data = data.dropna(how="all").append(data[data.isnull().all(axis=1)].fillna(default_grade))

# Normalize grades
# data[people] = (data[people] - data[people].mean()) / (data[people].max() - data[people].min())

# Mean of all notes for each track
data["mean"] = data[data.columns].mean(axis=1)
# Amount of notes for each track
data["count"] = data.count(axis=1) - 1
# Helping songs graded by more people in the group
data["score"] = data["mean"] + (count_factor * (data["count"] - inhib_factor))
# Truncating to keep only the acceptable songs
data = data[data["score"] > min_score]

# Using ranking of scores as weight for the playlist bootstrap
print("Creating playlist...")
data = data.sort_values("score", ascending=False)
data["rank"] = data["score"].rank(method="min")

# Eliminating tracks with a grade under the required minimum
data = data[data[data.columns[:-4]].min(axis=1) > eliminating_grade]
playlist = data.sample(n=size, weights="rank")

data

Creating playlist...


Unnamed: 0,Qu,Vi,Ro,mean,count,score,rank
3155,9.75,9.50,,9.625,2,9.625,2378.0
628,9.75,9.75,9.0,9.500,3,9.550,2377.0
3226,9.50,9.50,,9.500,2,9.500,2375.0
3159,9.50,9.50,,9.500,2,9.500,2375.0
2517,,,9.5,9.500,1,9.450,2366.0
...,...,...,...,...,...,...,...
2511,,6.50,7.6,7.050,2,7.050,2.0
2104,7.00,7.00,7.0,7.000,3,7.050,2.0
9,8.00,5.00,8.0,7.000,3,7.050,2.0
3550,7.50,8.50,5.0,7.000,3,7.050,2.0


# GENRE DEFAULTS

In [4]:
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials

CREDENTIALS_PATH_GOOGLE = '../google-credentials.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
SPREADSHEET = '1b75J-QTGrujSgF9r0_JPOKkcXAwzFVwpETOAyVBw8ak'

# Load service account credentials.
__credentials = Credentials.from_service_account_file(CREDENTIALS_PATH_GOOGLE, scopes=SCOPES)

# Creates Google Sheets API (v4/latest) service.
service = build('sheets', 'v4', credentials=__credentials)
# Gets values from Ach! Musik: Notations sheet.
values = service.spreadsheets().values().get(spreadsheetId=SPREADSHEET, range="genre_default").execute()['values']
headers = values.pop(0)
# Format data as pd.DataFrame
defaults = pd.DataFrame(values, columns=headers)

In [5]:
for i in range(1, defaults.columns.size):
    defaults[defaults.columns[i]] = pd.to_numeric(defaults[defaults.columns[i]], errors='coerce')
defaults = defaults.set_index(["genre"])
defaults

Unnamed: 0_level_0,Qu,Gr,Vi,Ro,Sa,Gl,Rx,Cl,Lu,Gë,Et,Ti
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acapella,6.0,5.0,,5.0,,,,,,,,
Acoustic,4.75,4.0,,5.0,,,,,,,,
Blues,6.25,,,6.0,,,,,,,,
Chanson,5.75,7.0,,4.0,,,,,,,,
Classique,6.0,6.0,,5.0,,,,,,,,
Country,5.0,4.5,,5.0,,,,,,,,
Dance,6.0,,,6.0,,,,,,,,
Disco,7.5,,,6.0,,,,,,,,
Electro,5.0,,,6.0,,,,,,,,
Folk,5.25,5.0,,6.0,,,,,,,,


In [14]:
for i in range(5, data.columns.size):
    data[data.columns[i]] = pd.to_numeric(data[data.columns[i]], errors='coerce')

data

Unnamed: 0,genre,sub_genre,artist,album,song,Qu,Gr,Vi,Ro,Sa,Gl,Rx,Cl,Lu,Gë,Et,Ti,api:Spotify,alb?
0,Reggae,,10cc,Bloody Tourists,Dreadlock Holiday,7.25,7.00,7.0,,,,,,6.80,,,7.0,,
1,Rock,Soft Rock,10cc,The Original Soundtrack,I'm Not In Love,4.50,7.60,6.0,,,,,,6.50,,,6.0,,
2,Hip-Hop,Gangsta,2Pac,All Eyez On Me,All Eyez On Me,8.00,7.00,7.0,,,,,,7.75,,,7.0,,
3,Hip-Hop,Gangsta,2Pac,All Eyez On Me,Ambitionz Az A Ridah,6.75,7.75,7.0,,,,,,7.75,,,6.0,,
4,Rock,Alternative,4 Non Blondes,"Bigger, Better, Faster, More!",What's Up,8.75,8.50,7.0,8.7,,,,7.1,8.88,,,9.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3812,Metal,Progressive,Haken,Virus,Canary Yellow,,,,,7.0,,,,,,,,,
3813,Electro,Pop,Tame Impala,Currents,Let It Happen,,,8.5,8.2,,,,,,,,,,
3814,Rock,,Radiohead,Hail To the Thief,A Wolf At the Door,,,,9.0,,,,,,,,,,
3815,Rock,,Arctic Monkeys,Tranquility Base Hotel & Casino,Four Out Of Five,,,,8.8,,,,,,,,,,


In [15]:
for index, row in data.iterrows():
    for person, grade in row[5:-2].items():
        if math.isnan(grade) and defaults.at[row["genre"], person]:
            data.loc[index, person] = defaults.at[row["genre"], person]

In [16]:
data

Unnamed: 0,genre,sub_genre,artist,album,song,Qu,Gr,Vi,Ro,Sa,Gl,Rx,Cl,Lu,Gë,Et,Ti,api:Spotify,alb?
0,Reggae,,10cc,Bloody Tourists,Dreadlock Holiday,7.25,7.00,7.0,6.0,,,,,6.80,,,7.0,,
1,Rock,Soft Rock,10cc,The Original Soundtrack,I'm Not In Love,4.50,7.60,6.0,6.5,,,,,6.50,,,6.0,,
2,Hip-Hop,Gangsta,2Pac,All Eyez On Me,All Eyez On Me,8.00,7.00,7.0,4.0,,,,,7.75,,,7.0,,
3,Hip-Hop,Gangsta,2Pac,All Eyez On Me,Ambitionz Az A Ridah,6.75,7.75,7.0,4.0,,,,,7.75,,,6.0,,
4,Rock,Alternative,4 Non Blondes,"Bigger, Better, Faster, More!",What's Up,8.75,8.50,7.0,8.7,,,,7.1,8.88,,,9.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3812,Metal,Progressive,Haken,Virus,Canary Yellow,6.75,,,7.0,7.0,,,,,,,,,
3813,Electro,Pop,Tame Impala,Currents,Let It Happen,5.00,,8.5,8.2,,,,,,,,,,
3814,Rock,,Radiohead,Hail To the Thief,A Wolf At the Door,7.25,7.50,,9.0,,,,,,,,5.0,,
3815,Rock,,Arctic Monkeys,Tranquility Base Hotel & Casino,Four Out Of Five,7.25,7.50,,8.8,,,,,,,,5.0,,


In [9]:
data

Unnamed: 0,genre,sub_genre,artist,album,song,Qu,Gr,Vi,Ro,Sa,Gl,Rx,Cl,Lu,Gë,Et,Ti,api:Spotify,alb?
0,Reggae,,10cc,Bloody Tourists,Dreadlock Holiday,7.25,7.0,7.0,6.0,,,,,6.8,,,7.0,,
1,Rock,Soft Rock,10cc,The Original Soundtrack,I'm Not In Love,4.5,7.6,6.0,6.5,,,,,6.5,,,6.0,,
2,Hip-Hop,Gangsta,2Pac,All Eyez On Me,All Eyez On Me,8.0,7.0,7.0,4.0,,,,,7.75,,,7.0,,
3,Hip-Hop,Gangsta,2Pac,All Eyez On Me,Ambitionz Az A Ridah,6.75,7.75,7.0,4.0,,,,,7.75,,,6.0,,
4,Rock,Alternative,4 Non Blondes,"Bigger, Better, Faster, More!",What's Up,8.75,8.5,7.0,8.7,,,,7.1,8.88,,,9.0,,
5,Pop,Synth,A-Ha,Hunting High and Low,Take on Me,8.0,6.5,7.0,9.0,,,,7.2,,,,8.0,,
6,Electro,Synthwave,A.L.I.S.O.N,Space Station,Golden Dust,7.25,,8.0,6.0,,,,,,,,8.5,,
7,Electro,House,Aaron Smith,,Dancin,3.5,,5.5,3.0,,,,,,,,4.0,,17.0
8,Electro,House,"Aaron Smith, KRONO",,Dancin (KRONO Remix),7.5,,6.5,8.5,,,,,,8.5,,8.0,,17.0
9,Pop,Disco,Abba,Abba,"I do, I do, I do, I do, I do",8.0,8.5,5.0,8.0,,,,4.0,5.0,,,5.0,,


# Playground

In [42]:
data = data.reset_index()

In [150]:
COL = ["Qu", "Gr", "Vi"]
BY = "artist"
AMNT = 10

best = data[[BY, *COL]].dropna(how="any").groupby(BY).filter(lambda x: len(x) >= AMNT).groupby(BY).mean()[COL]
best[COL].mean(axis=1).sort_values(ascending=False).head(10)

artist
The Beatles         8.296474
The Doors           8.238636
Pink Floyd          8.069444
System Of A Down    7.986667
Ghost               7.800000
Daft Punk           7.732143
Gojira              7.666667
Gorillaz            7.601852
Muse                7.568182
Alice in Chains     7.282051
dtype: float64