In [1]:
import numpy as np
from scipy import sparse
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.datasets import load_svmlight_file, load_svmlight_files, make_classification
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV, train_test_split

In [2]:
X, y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = PassiveAggressiveClassifier(loss='hinge', n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 0.7900
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


the best parameter:  {'C': 0.0625}
score: 0.7667
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 0.7767
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


the best parameter:  {'C': 0.0625}
score: 0.7667
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 0.7700
average_score: 0.7740
std_score: 0.0088


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


In [3]:
X, y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = PassiveAggressiveClassifier(loss='hinge', n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


the best parameter:  {'C': 0.0625}
score: 0.7962
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.5}
score: 0.7962
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


the best parameter:  {'C': 0.0625}
score: 0.7453
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


the best parameter:  {'C': 0.0625}
score: 0.7668
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 0.7855
average_score: 0.7780
std_score: 0.0196


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.0s finished


In [4]:
X, y = load_svmlight_file('datasets/mushrooms')
X = scale(X.todense())
y = np.array([1 if i == 2 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = PassiveAggressiveClassifier(loss='hinge', n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.6s finished


the best parameter:  {'C': 0.0625}
score: 1.0000
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.6s finished


the best parameter:  {'C': 0.0625}
score: 1.0000
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.3s finished


the best parameter:  {'C': 0.0625}
score: 1.0000
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.3s finished


the best parameter:  {'C': 0.0625}
score: 1.0000
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 1.0000
average_score: 1.0000
std_score: 0.0000


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.3s finished


In [5]:
X, y = make_classification(n_samples=4000, n_features=65, n_informative=65, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
y = y = np.array([1 if i == 1 else -1 for i in y])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

iter = 5
result = np.array([])

for i in range(iter):
    estimator = GridSearchCV(
        estimator = PassiveAggressiveClassifier(loss='hinge', n_iter=5, shuffle=True, random_state=i),
        param_grid = {'C': [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]},
        cv = 5,
        verbose = 1
    )
    
    estimator.fit(X_train, y_train)
    score = estimator.score(X_test, y_test)
    
    print('the best parameter: ', estimator.best_params_)
    print('score: {0:.4f}'.format(score))
    result = np.append(result, score)
    
print('average_score: {0:.4f}'.format(result.mean()))
print('std_score: {0:.4f}'.format(result.std()))

Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.125}
score: 0.9550
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.1s finished


the best parameter:  {'C': 0.0625}
score: 0.9442
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 0.9592
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.1s finished


the best parameter:  {'C': 0.0625}
score: 0.9525
Fitting 5 folds for each of 9 candidates, totalling 45 fits
the best parameter:  {'C': 0.0625}
score: 0.9408
average_score: 0.9503
std_score: 0.0068


[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.1s finished
