# Gaussian Classification Processes

In this notebook, various classification tasks are investigated using the GP classification method. 

In [34]:
from GaussianProcesses import *
from Kernels import *


import warnings
warnings.simplefilter("error")
warnings.simplefilter("ignore", DeprecationWarning)
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", RuntimeWarning)
from tqdm import tqdm_notebook

%matplotlib inline

Here, very basic classification datasets are generated.

In [35]:

def generate():
    X = np.random.normal(0,2,150) + np.random.normal(5,10,150)
    y = []
    for x in X:
        if x < 0:
            y.append(-1)
        else:
            y.append(+1)
    y = np.array(y)
    return X,y

def gen2(size):
    x = np.array([np.random.uniform(-1,1, size),np.random.normal(-1,1, size)])
    X = x.T
    y = np.array(np.mean(X,1)>0,int)
    t = []
    for i in range(0,len(y)):
        if y[i] == 0:
            t.append(-1)
        else:
            t.append(y[i])
    y = np.array(t)
    
    return X,y

In [36]:
X_train, y_train = generate()
X_val, y_val = generate()

kern = SquareExponential()
class_prob = Sigmoid()
model = GaussianClassification(kern,class_prob)

X_train = X_train.reshape(-1,1)

model.fit(X_train, y_train)
y_mean, var = model.predict(X_train)
y_pred = model.classify(y_mean)
print(np.mean(np.array(y_pred) == np.array(y_train > 0,int)))

X_val = X_val.reshape(-1,1)

y_mean, var = model.predict(X_val)
y_pred = model.classify(y_mean)
print(np.mean(np.array(y_pred) == np.array(y_val > 0,int)))

1.0
1.0


In [37]:
X_train, y_train = gen2(150)
X_val, y_val = gen2(50)

kern = SquareExponential()
class_prob = Sigmoid()
model = GaussianClassification(kern,class_prob)

model.fit(X_train, y_train)
y_mean, var = model.predict(X_train)
y_pred = model.classify(y_mean)
print(np.mean(np.array(y_pred) == np.array(y_train > 0,int)))

y_mean, var = model.predict(X_val)
y_pred = model.classify(y_mean)
print(np.mean(np.array(y_pred) == np.array(y_val > 0,int)))

0.98
0.98


## UCI Banknote dataset

In [38]:
import pandas as pd

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt'
data = pd.read_csv(url ,header = None)

In [39]:
from sklearn.model_selection import train_test_split
X = data[[0,1,2,3]].values
y = data[4].values

t = []
for i in range(0,len(y)):
    if y[i] == 0:
        t.append(-1)
    else:
        t.append(y[i])
y = np.array(t)
    

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [40]:
kern = SquareExponential()
class_prob = Sigmoid()
model = GaussianClassification(kern,class_prob)



model.fit(X_train, y_train)
y_mean, var = model.predict(X_train)
y_pred = model.classify(y_mean)
print(np.mean(np.array(y_pred) == np.array(y_train > 0,int)))

#X_val = X_val.reshape(-1,1)

y_mean, var = model.predict(X_test)
y_pred = model.classify(y_mean)
print(np.mean(np.array(y_pred) == np.array(y_test > 0,int)))

1.0
1.0
