In [7]:
import scipy.io as scipy
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from time import time
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

def TrainMyClassifier(XEstimate, YEstimate, XValidate, TrainMyClassifierParameters):
    from sklearn.svm import SVC
    from skrvm import RVC
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.multiclass import OneVsOneClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.gaussian_process.kernels import RBF
    from time import time
    t0 = time()
    # Paramaters should have this shape in order for it to work ==>  Parameters = {'C': [1e3, 1e4, 1e5], 'gamma': [0.001, 0.01, 0.1] }
    if TrainMyClassifierParameters[1] == 'SVM':
        # ################################################
        # Train a SVM classification model
        print("Fitting the classifier to the training set")
        param_grid = TrainMyClassifierParameters[0]
        clf = SVC(kernel='rbf', class_weight='balanced', decision_function_shape = 'ovo',**TrainMyClassifierParameters[0])
        clf = clf.fit(XEstimate, YEstimate)
        y_pred = clf.predict(XValidate)
        scores = clf.score(XEstimate,YEstimate)
        params = clf
        print("done in %0.3fs" % (time() - t0))
        return y_pred, scores, params
    elif TrainMyClassifierParameters[1] == 'RVM':
        # #############################################################################
        # Train a RVM classification model
        print("Fitting the classifier to the training set")
        t0 = time()
        clf = RVC(kernel='rbf',n_iter=1,**TrainMyClassifierParameters[0])
        clf.fit(XEstimate, YEstimate)
        y_pred = clf.predict(XValidate)
        scores = clf.score(XEstimate,YEstimate)
        params = clf
        print("done in %0.3fs" % (time() - t0))
        return y_pred, scores, params
    elif TrainMyClassifierParameters[1] == 'GP':
       # #############################################################################
        # Train a GP classification model
        print("Fitting the classifier to the training set")
        t0 = time()
        k_rbf = 1 * RBF(length_scale=TrainMyClassifierParameters[0]['length_scale'])
        clf = GaussianProcessClassifier(kernel = k_rbf, multi_class = 'one_vs_one')
        clf.fit(XEstimate, YEstimate)
        y_pred = clf.predict(XValidate)
        scores = clf.score(XEstimate,YEstimate)
        params = clf
        print("done in %0.3fs" % (time() - t0))
        return y_pred, scores, params
    else:
        print("Incorrect type of algorithm, please use only one of the supported classifiers SVM, RVM, GP")
        
def MyCrossValidate(XTrain,YTrain2,Nf,Algorithm):   
    pca = PCA(n_components=9)
    reduced_data = pca.fit_transform(XTrain)
    #nfold of 5
    kf = KFold(n_splits=Nf)
    kf.get_n_splits(XTrain)
    EstParameters = []
    EstConfMatrices = []
    ConfMatrix=np.zeros((5,5),dtype=int)
    YTrain = []
    i=0        
    for train_index, test_index in kf.split(XTrain):
        
        XEst1 = XTrain[train_index]
        YEst1 = YTrain2[train_index]      
        XValid = XTrain[test_index]
        YValid = YTrain2[test_index]  
        # down-sampling to 2000
        XEst = XEst1[:2000]
        XEst=np.concatenate((XEst,XEst1[4000:6000]))
        XEst=np.concatenate((XEst,XEst1[8000:10000]))
        XEst=np.concatenate((XEst,XEst1[12000:14000,]))
        
        YEst = YEst1[:2000]    
        YEst=np.concatenate((YEst,YEst1[4000:6000]))
        YEst=np.concatenate((YEst,YEst1[8000:10000]))
        YEst=np.concatenate((YEst,YEst1[12000:14000]))
        TrainMyClassifierParameters = []
        if Algorithm =="SVM":
            TrainMyClassifierParameters.append({'C': 1e5, 'gamma': 0.3 })
        elif Algorithm =="RVM":
            TrainMyClassifierParameters.append({ 'alpha' : 1e-04, 'beta' : 1e-04 })
        elif Algorithm =="GP":
            TrainMyClassifierParameters.append({'length_scale' : 10 })
        TrainMyClassifierParameters.append(Algorithm)
        
        y_pred, scores, params = TrainMyClassifier(XEst,YEst,XValid,TrainMyClassifierParameters)
        print 'Support vectors'
        print params.n_support_
        c_m, df, a_s  = MyConfusionMatrix(y_pred, YValid,['One','Two','Three','Four','Five'])
        
        YTrain.append(y_pred)
        EstParameters.append(params)       
        EstConfMatrices.append(df)       
        ConfMatrix=ConfMatrix+c_m  
    ConfMatrix = ConfMatrix/5   
    return YTrain,EstParameters, EstConfMatrices,ConfMatrix

def MyConfusionMatrix(Y,YValidate,ClassNames):
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import accuracy_score
    import pandas as pd
    c_r = classification_report(YValidate, Y)
    c_m = confusion_matrix(YValidate, Y)
    c_m = np.round((c_m / c_m.astype(np.float).sum(axis=0)), 3)
    a_s = accuracy_score(YValidate, Y)
    # labels = ['One','Two','Three','Four','Five'] - This is the format of the labels
    labels = ClassNames
    df = pd.DataFrame(c_m, dtype='str', index=labels)
    df.columns = ClassNames
    return c_m, df, a_s 
def TestMyClassifier(XTest, Parameters, EstParameters):
    from sklearn.metrics import classification_report
    from sklearn.metrics import confusion_matrix
    print("Predicting class on the test set")
    t0 = time()    
    pred_svc = EstParameters
    Ytest = pred_svc.predict(XTest)
    print("done in %0.3fs" % (time() - t0))
    return Ytest


##########################################################################################
mat1 = scipy.loadmat('Proj2FeatVecsSet1.mat')
mat2 = scipy.loadmat('Proj2TargetOutputsSet1.mat')
data_set =  mat1['Proj2FeatVecsSet1']
data_target = mat2['Proj2TargetOutputsSet1']

number_labels = []
for ars in data_target:
    if np.all(ars == [1, -1, -1, -1, -1]):
        ars = 1
        number_labels.append(ars)
    elif np.all(ars == [-1, 1, -1, -1, -1]):
        ars = 2
        number_labels.append(ars)
    elif np.all(ars == [-1, -1, 1, -1, -1]):
        ars = 3
        number_labels.append(ars)
    elif np.all(ars == [-1, -1, -1, 1, -1]):
        ars = 4
        number_labels.append(ars)
    elif np.all(ars == [-1, -1, -1, -1, 1]):
        ars = 5
        number_labels.append(ars)
number_labels = np.asarray(number_labels)

X_train, X_test, y_train, y_test = train_test_split(
    data_set, number_labels, test_size=0.20, stratify = data_target)#, random_state=39)
YTrain,EstParameters, EstConfMatrices,ConfMatrix = MyCrossValidate(X_train,y_train,5,'SVM')
print EstParameters
print EstConfMatrices
print ConfMatrix

Fitting the classifier to the training set
done in 1.412s
Support vectors
[ 90 108  99 125 101]
Fitting the classifier to the training set
done in 1.438s
Support vectors
[ 88 116 105 132 100]
Fitting the classifier to the training set
done in 1.441s
Support vectors
[ 88 121  98 125 103]
Fitting the classifier to the training set
done in 1.427s
Support vectors
[ 80 114 101 131  97]
Fitting the classifier to the training set
done in 1.391s
Support vectors
[ 85 121 101 135 100]
[SVC(C=100000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma=0.3, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False), SVC(C=100000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma=0.3, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False), SVC(C=100000.0, cache_size=200, class_weight='balan