In [1]:
from scipy.io import loadmat

dData = loadmat('TIMIT_Train.mat')
mX    = dData['mX']
vY    = dData['vY'].squeeze()
mX.shape, vY.shape

((10000, 41), (10000,))

In [2]:
import numpy as np

In [3]:
from scipy.io import loadmat

In [4]:
d = loadmat('data.mat')
d.keys()

dict_keys(['__header__', '__version__', '__globals__', 'trainSet', 'testSet'])

In [5]:
mX = d['trainSet'][0][0][0].T
vY = d['trainSet'][0][0][1].squeeze()
mX.shape, vY.shape

((10000, 41), (10000,))

In [6]:
from sklearn.linear_model    import LogisticRegression
from sklearn.svm             import SVC
from sklearn.ensemble        import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import cross_val_predict, KFold

In [7]:
mX -= mX.mean(0)
mX /= mX.std(0)

In [8]:
l = []
l.append(('SVM',      SVC()))
l.append(('LR',       LogisticRegression()))
l.append(('RF',       RandomForestClassifier()))
l.append(('Adaboost', AdaBoostClassifier()))

oKFold = KFold(20, shuffle=True, random_state=0)
for sName, oClf in l:
    vHatY    = cross_val_predict(oClf, mX, vY, cv=oKFold)
    accuracy = np.mean(vY == vHatY)
    print(f'{sName} = {accuracy}')

SVM = 0.8895
LR = 0.8605
RF = 0.8756
Adaboost = 0.8606


In [9]:
mTestX = d['testSet'][0][0][0].T
vTestY = d['testSet'][0][0][1].squeeze()
mTestX.shape, vTestY.shape

((5000, 41), (5000,))

In [10]:
mTestX -= mTestX.mean(0)
mTestX /= mTestX.std(0)

In [11]:
mX -= mX.mean(0)
mX /= mX.std(0)

In [12]:
oKFold2 = KFold(5, shuffle=True, random_state=0)
vHatY   = cross_val_predict(SVC(C=2, kernel='linear'), mX, vY, cv=oKFold2)
np.mean(vY == vHatY)

0.8631

In [13]:
oSVM = SVC(C=0.001, kernel='linear').fit(mX, vY)
oSVM.score(mTestX, vTestY)

0.8708

In [14]:
mTestX.std(0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1.])

In [15]:
from metric_learn import LFDA

mZ = LFDA(n_components=10).fit_transform(mX, vY)

In [16]:
l = []
l.append(('SVM',      SVC()))
l.append(('LR',       LogisticRegression()))
l.append(('RF',       RandomForestClassifier()))
l.append(('Adaboost', AdaBoostClassifier()))

oKFold = KFold(20, shuffle=True, random_state=0)
for sName, oClf in l:
    vHatY    = cross_val_predict(oClf, mZ, vY, cv=oKFold)
    accuracy = np.mean(vY == vHatY)
    print(f'{sName} = {accuracy}')

SVM = 0.8662
LR = 0.8605
RF = 0.8633
Adaboost = 0.8591


In [25]:
from sklearn.tree import DecisionTreeClassifier

oTree    = DecisionTreeClassifier(max_leaf_nodes=2)
vHatY    = cross_val_predict(AdaBoostClassifier(n_estimators=50, learning_rate=0.5), mX, vY, cv=KFold(50, shuffle=True))
np.mean(vY == vHatY)

0.8602