In [1]:
import numpy as np
from scipy import sparse
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.datasets import load_svmlight_file, load_svmlight_files, make_classification
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

# PA-I

In [2]:
def GridSearchPA1(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    iter = 10
    result = np.array([])

    for i in range(iter):
        bestscore = 0

        for C in [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]:
            score = PassiveAggressiveClassifier(C, loss='hinge', n_iter=5, shuffle=True, random_state=i).fit(X_train, Y_train).score(X_test, Y_test)

            if score > bestscore:
                bestscore = score

        result = np.append(result, bestscore)
        print('bestscore: {0:.4f}'.format(bestscore))
        print()      
        
    print('average_bestscore: {0:.4f}'.format(result.mean()))
    print('std_bestscore: {0:.4f}'.format(result.std()))

In [3]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

GridSearchPA1(X, Y)

bestscore: 0.7900

bestscore: 0.7700

bestscore: 0.7767

bestscore: 0.7700

bestscore: 0.7700

bestscore: 0.7267

bestscore: 0.7333

bestscore: 0.7467

bestscore: 0.7533

bestscore: 0.7533

average_bestscore: 0.7590
std_bestscore: 0.0189


In [4]:
X, Y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

GridSearchPA1(X, Y)

bestscore: 0.7962

bestscore: 0.8070

bestscore: 0.7453

bestscore: 0.7775

bestscore: 0.7855

bestscore: 0.8097

bestscore: 0.7641

bestscore: 0.8070

bestscore: 0.7721

bestscore: 0.7936

average_bestscore: 0.7858
std_bestscore: 0.0200


In [5]:
X, Y = load_svmlight_file('datasets/mushrooms')
X = scale(X.todense())
Y = np.array([1 if i == 2 else -1 for i in Y])

GridSearchPA1(X, Y)

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

average_bestscore: 1.0000
std_bestscore: 0.0000


In [6]:
X, Y = make_classification(n_samples=4000, n_features=65, n_informative=65, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
Y = np.array([1 if i == 1 else -1 for i in Y])

GridSearchPA1(X, Y)

bestscore: 0.9558

bestscore: 0.9450

bestscore: 0.9592

bestscore: 0.9525

bestscore: 0.9425

bestscore: 0.9467

bestscore: 0.9633

bestscore: 0.9483

bestscore: 0.9500

bestscore: 0.9417

average_bestscore: 0.9505
std_bestscore: 0.0068


# PA-II

In [7]:
def GridSearchPA2(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
    
    iter = 10
    result = np.array([])

    for i in range(iter):
        bestscore = 0

        for C in [2**-4, 2**-3, 2**-2, 2**-1, 1, 2, 4, 8, 16]:
            score = PassiveAggressiveClassifier(C, loss='squared_hinge', n_iter=5, shuffle=True, random_state=i).fit(X_train, Y_train).score(X_test, Y_test)

            if score > bestscore:
                bestscore = score

        result = np.append(result, bestscore)
        print('bestscore: {0:.4f}'.format(bestscore))
        print()      
        
    print('average_bestscore: {0:.4f}'.format(result.mean()))
    print('std_bestscore: {0:.4f}'.format(result.std()))

In [8]:
X, Y = load_svmlight_file('datasets/splice')
X = scale(X.todense())

GridSearchPA2(X, Y)

bestscore: 0.7833

bestscore: 0.7700

bestscore: 0.7833

bestscore: 0.7700

bestscore: 0.7767

bestscore: 0.7233

bestscore: 0.7400

bestscore: 0.7600

bestscore: 0.7667

bestscore: 0.7733

average_bestscore: 0.7647
std_bestscore: 0.0182


In [9]:
X, Y = load_svmlight_file('datasets/svmguide3')
X = scale(X.todense())

GridSearchPA2(X, Y)

bestscore: 0.7721

bestscore: 0.8016

bestscore: 0.6917

bestscore: 0.7721

bestscore: 0.7239

bestscore: 0.7962

bestscore: 0.7614

bestscore: 0.8070

bestscore: 0.7721

bestscore: 0.7721

average_bestscore: 0.7670
std_bestscore: 0.0336


In [10]:
X, Y = load_svmlight_file('datasets/mushrooms')
X = scale(X.todense())
Y = np.array([1 if i == 2 else -1 for i in Y])

GridSearchPA2(X, Y)

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

bestscore: 1.0000

average_bestscore: 1.0000
std_bestscore: 0.0000


In [11]:
X, Y = make_classification(n_samples=4000, n_features=65, n_informative=65, n_redundant=0, n_classes=2, n_clusters_per_class=1, 
                          weights=None, flip_y=0, shuffle=True, random_state=0)
X = scale(X)
Y = np.array([1 if i == 1 else -1 for i in Y])

GridSearchPA2(X, Y)

bestscore: 0.9558

bestscore: 0.9483

bestscore: 0.9600

bestscore: 0.9525

bestscore: 0.9450

bestscore: 0.9483

bestscore: 0.9633

bestscore: 0.9475

bestscore: 0.9517

bestscore: 0.9400

average_bestscore: 0.9512
std_bestscore: 0.0066


In [12]:
X, Y = load_svmlight_file('datasets/ijcnn1.val')
X = scale(X.todense())

GridSearchPA2(X, Y)

NameError: name 'GridSearchPA21' is not defined