In [2]:
import sys, itertools
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import sklearn, time, math
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix

if '../tools' not in sys.path:
    sys.path.append('../tools')
from helper_functions import * 

import warnings
warnings.filterwarnings('ignore')

D = load_all_nonmfc()
X, Y = load_all_fv(3, 3)
D['FV'] = X
svms = ['FV', 'eng', 'hcdf', 'brightness']
rf = ['zerocross', 'roughness']
def tail(it):
    next(it)
    return it
categories, arrs = zip(*sorted(list((k, v) for k, v in D.items() if k in set(svms + rf))))
ends = [0] + list(itertools.accumulate(a.shape[1] for a in arrs))
ends = zip(ends, ends[1:])
excerpts = dict(zip(categories, ends))
X = np.concatenate(arrs, axis=1)

Read in eng for all genres
Read in chroma for all genres
Read in t for all genres
Read in keystrength for all genres
Read in brightness for all genres
Read in zerocross for all genres
Read in roughness for all genres
Read in inharmonic for all genres
Read in hcdf for all genres
Using existing FVs from file ../generated-fv/FV3-3.mat
N = 1000
MFCC training feature ranges means [-0.0201, 0.0090] sds [0.0202, 0.0614]


In [3]:
class SVCSub(SVC):
    def __init__(self, name):
        super(SVCSub, self).__init__(probability=True)
        self.name = name
    def fit(self, X, Y):
        super(SVCSub, self).fit(X[:,slice(*excerpts[self.name])], Y)
        return self
    def predict_proba(self, X):
        return super(SVCSub, self).predict_proba(X[:,slice(*excerpts[self.name])])
class RFSub(RandomForestClassifier):
    def __init__(self, name, md, ne):
        if not md: md = 10
        if not ne: ne = 50
        super(RFSub, self).__init__(max_depth=md, n_estimators=ne)
        self.name = name
    def fit(self, X, Y):
        super(RFSub, self).fit(X[:,slice(*excerpts[self.name])], Y)
        return self
    def predict_proba(self, X):
        return super(RFSub, self).predict_proba(X[:,slice(*excerpts[self.name])])
def make_classifier(name):
    if name == 'zerocross': return RFSub(name, 10, 50)
    if name == 'roughness': return RFSub(name, 20, 200)
    return SVCSub(name)

In [5]:
from sklearn.ensemble import VotingClassifier

ests = [(s, make_classifier(s)) for s in svms+rf]
print([s[1].name for s in ests])
c = VotingClassifier(estimators=ests, voting='soft')
skf = StratifiedKFold(Y, n_folds=8, shuffle=True, random_state=1)
np.average(cross_val_score(c, X, Y, cv=skf, n_jobs=-1))
# todo this, then FV on chroma

['FV', 'eng', 'hcdf', 'brightness', 'zerocross', 'roughness']


0.49198717948717946