In [94]:
print(__doc__)

# Author: Ron Weiss <ronweiss@gmail.com>, Gael Varoquaux
# License: BSD 3 clause

# $Id$

import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np

from sklearn import datasets, svm
from sklearn.cross_validation import StratifiedKFold
from sklearn.externals.six.moves import xrange
from sklearn.mixture import GaussianMixture
from sklearn.metrics import precision_score, recall_score, accuracy_score, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

from tqdm import tqdm_notebook as tqdm
from scipy import interp
import pickle

Automatically created module for IPython interactive environment


  from numpy.core.umath_tests import inner1d


In [39]:
classes = 40

In [69]:
# sample class
class VoiceSample:
    path = ''
    personId = -1
    feature = []
    
    def __init__(self, path):
        self.path = path
        self.personId = getPerson(path)
        self.getAvgMFCC()
        
    def __str__(self):
        return self.path
    
    def getAvgMFCC(self):
        y, sr = librosa.load(self.path,sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr,n_mfcc=13)
        self.feature = np.append(np.average(mfccs, 1), np.std(mfccs, 1))
        
def getTrainFeature(data):
    return np.asarray([data[i].feature for i in xrange(len(data))])

In [73]:
data = pickle.load(open("data", "rb"))

[X_train, X_test, y_train, y_test] = data
X_train = getTrainFeature(X_train)
X_test = getTrainFeature(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

In [75]:
classifier = GaussianMixture(n_components=classes, init_params='random')

In [76]:
classifier.means_init  = np.array([X_train[y_train == i].mean(axis=0)
                                  for i in xrange(classes)])

In [77]:
classifier.fit(X_train)
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print accuracy

0.6979490366687383


In [92]:
clf =svm.SVC(kernel = "rbf", gamma=0.0008)
clf.fit(X_train, y_train) 
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print accuracy

0.7967681789931634


In [119]:
# new gmm
clf_gmm = GaussianMixture(n_components=classes, init_params='random')
clf_gmm.means_init  = np.array([X_train[y_train == i].mean(axis=0)
                                  for i in xrange(classes)])
clf_svm = svm.SVC(kernel = "rbf", gamma=0.001, probability = True)

eclf = VotingClassifier(estimators=[('gmm', clf), ('svm', clf_svm)], voting='soft')
# eclf = VotingClassifier(estimators=[('gmm', classifier), ('svm', clf)], voting='hard')
eclf.fit(X_train, y_train)

VotingClassifier(estimators=[('gmm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0008, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=1, voting='soft', weights=None)

In [120]:
y_pred = eclf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print accuracy

AttributeError: predict_proba is not available when  probability=False

In [64]:
# Break up the dataset into non-overlapping training (75%) and testing
# (25%) sets.
skf = StratifiedKFold(iris.target, n_folds=4)
# Only take the first fold.
train_index, test_index = next(iter(skf))


X_train = iris.data[train_index]
y_train = iris.target[train_index]
X_test = iris.data[test_index]
y_test = iris.target[test_index]

In [None]:
n_classes = len(np.unique(y_train))

# Try GMMs using different types of covariances.
classifiers = dict((covar_type, GMM(n_components=n_classes,
                    covariance_type=covar_type, init_params='wc', n_iter=20))
                   for covar_type in ['spherical', 'diag', 'tied', 'full'])

n_classifiers = len(classifiers)

plt.figure(figsize=(3 * n_classifiers / 2, 6))
plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05,
                    left=.01, right=.99)


for index, (name, classifier) in enumerate(classifiers.items()):
    # Since we have class labels for the training data, we can
    # initialize the GMM parameters in a supervised manner.
    classifier.means_ = np.array([X_train[y_train == i].mean(axis=0)
                                  for i in xrange(n_classes)])

    # Train the other parameters using the EM algorithm.
    classifier.fit(X_train)

    h = plt.subplot(2, n_classifiers / 2, index + 1)
    make_ellipses(classifier, h)

    for n, color in enumerate('rgb'):
        data = iris.data[iris.target == n]
        plt.scatter(data[:, 0], data[:, 1], 0.8, color=color,
                    label=iris.target_names[n])
    # Plot the test data with crosses
    for n, color in enumerate('rgb'):
        data = X_test[y_test == n]
        plt.plot(data[:, 0], data[:, 1], 'x', color=color)

    y_train_pred = classifier.predict(X_train)
    train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
    plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy,
             transform=h.transAxes)

    y_test_pred = classifier.predict(X_test)
    test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
    plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy,
             transform=h.transAxes)

    plt.xticks(())
    plt.yticks(())
    plt.title(name)

plt.legend(loc='lower right', prop=dict(size=12))


plt.show()