In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.functional as F
from torch.autograd  import Variable
import torch.optim as optim
from sklearn.utils import shuffle
import copy 
from torch.utils.tensorboard import SummaryWriter
torch.manual_seed(1)
np.random.seed(1)



In [2]:
class error_module(nn.Module):
    def __init__(self,size):
        super(error_module,self).__init__()
        self.error_linear = nn.Linear(size,1)
        self.Var_e = Variable(torch.ones(1, 1), requires_grad=True)
    def forward(self,x,prev_error):
        x = self.error_linear(x) + self.Var_e * prev_error
        
        return x



class classifier_module(nn.Module):
    def __init__(self,size):
        super(classifier_module,self).__init__()
        self.classifier_linear = nn.Linear(size,1)
        self.Var_w = Variable(torch.ones(1, 1), requires_grad=True)
    def forward(self,x, prev_error):
        x = self.classifier_linear(x) +  self.Var_w * prev_error 
        
        return x
    
    
class MyHingeLoss(torch.nn.Module):

    def __init__(self):
        super(MyHingeLoss, self).__init__()

    def forward(self, output, target):

        hinge_loss = 1 - torch.mul(output, target)
        hinge_loss[hinge_loss < 0] = 0
        return hinge_loss
    
    


class oco_classifier:
    def __init__(self, size, C):
        self.size = size
        self.w = np.zeros(size)
        self.C = C

    def predict(self, x):
        return np.dot(self.w, x)

    def update(self, x, y):
        loss = np.maximum(0, 1.0 - y * np.dot(self.w, x))
        if loss > 0:
            self.w += np.minimum(self.C, loss/(np.square(np.linalg.norm(x))+ 1e-6)) * x * y
        return loss

    def reset(self):
        self.w = np.zeros(self.size)
        


In [3]:
dataset_names = ["german", "ionosphere", "spambase", "magic04", "a8a"]
root_path, extension = "./datasets/", "_numeric"


def get_path(name):
    '''returns a path pair to the preprocessed datasets
    X and y csv files.'''
    path = root_path + name + extension
    return path + "_X.csv", path + "_y.csv"


def read_dataset(X_path, y_path):
    '''reads and returns numpy arrays in a given pair of paths for 
    X and y.'''
    X = pd.read_csv(X_path).values
    y = pd.read_csv(y_path)['0'].values
    return X, y


def simulate_varying(X):  # multivariate normal distribution
    '''Get the data and generate a varying feature space pattern.
    Possible concerns: thresholding messing up the distribution?'''
    
    # create a covariance matrix
    cov = np.random.rand(num_features, num_features)
    cov = np.dot(cov, cov.transpose())  # to have a positive semi-definite matrix
    
    # create a mean vector
    mean = np.random.rand(len(X[0]))
    
    # sample from multivariate gaussian w/ given mean and cov
    spaces = np.random.multivariate_normal(mean, cov, len(X))
    
    # threshold samples for 1-hot encoding
    spaces[spaces < 0] = 0
    spaces[spaces != 0] = 1

    return spaces

def simulate_random_varying(X): # discrete uniform distribution
    matrix = np.random.randint(2, size=(len(X), len(X[0])))  
    return matrix


def squash(x):
    norm_x = np.linalg.norm(x)
    squared_x = np.square(norm_x)
    s = squared_x/(1 + squared_x)
    return s * x/(norm_x + 1e-5)

def quant(x, l):  # l: num_layers, x:input
    one_hot = []
    for i in x:
        if i != 0:
            one_hot.append(1)
        else:
            one_hot.append(0)
    one_hot = np.array(one_hot)
    
    qt = (one_hot - x) / (l-1)
    qts = []
    qts.append(one_hot)
    #qts.append(squash(x))
    for i in range(l-2):
        qts.append(squash(x + qt * (l-2-i)))
        #qts.append(x)
        
        
    qts.append(x)    
    
    return np.array(qts)

In [4]:
X_path, y_path = get_path("spambase")
X, y = read_dataset(X_path, y_path)
print(np.unique(y,return_counts=True))
num_features = len(X[0])
folds = 20
learning_rate = 0.001

(array([-1.,  1.]), array([2788, 1812]))


In [5]:
class OPNet(nn.Module):
    def __init__(self,number_layers,size):
        super(OPNet,self).__init__()
        self.classifier_module = classifier_module(size)
        self.number_layers = number_layers
        self.error_modules = nn.ModuleList([error_module(size) for i in range(number_layers-1)])
            
    def forward(self,x):
        predict= torch.zeros(1, 1).double()
        errors = []
        errors.append(torch.zeros(1, 1).double())
        for i in range (self.number_layers - 1):
            predict = self.error_modules[i](x[i], predict) 
            errors.append(torch.norm(predict - errors[-1]))
        
            
        pred = self.classifier_module(x[-1], predict) 
        errors.append(torch.norm(pred - errors[-1]))
        
        return pred, errors
        

In [6]:
def linear_model(w,lr,x,y,error_lin):
    pred = np.dot(w,x)
    #print(pred)
    y_ = np.sign(pred)
    loss = np.maximum(0, 1.0 - y * pred)
    if loss > 0 :
        w += np.minimum(lr , loss/np.square(np.linalg.norm(x) + 1e-5)) * y * x
        
    if y_ != y:
        error_lin += 1
    return error_lin, w
    
    

In [7]:

layers = 3
error_val = []
error_linear_val = []
cum_loss = torch.zeros([1,1])
cum_error_classifier = torch.zeros([1,1])
cum_error_error1 = torch.zeros([1,1])
cum_error_error2 = torch.zeros([1,1])
#cum_error_error3 = torch.zeros([1,1])
#cum_error_error4 = torch.zeros([1,1])
for j in range(20): 
    error = 0
    error_lin = 0
    X , y = shuffle(X,y,random_state=0)
    mask = simulate_random_varying(X)
    X_ = copy.deepcopy(X)
    X_ = X_ * mask
    w = np.zeros(X.shape[1])
    net = OPNet(layers,X.shape[1])
    net = net.to(torch.double)
    parameter = list(net.parameters())
    criterion = MyHingeLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.01)
    writer = SummaryWriter()
    for i in range(len(X)):
        x = quant(X_[i],layers) 
        if i == 100 and j == 0:
            print(x)
        error_lin, w = linear_model(w,0.01,X_[i],y[i],error_lin)
        #print(w)
        x = torch.from_numpy(x).detach()
        y_ = torch.from_numpy(y[i].reshape(1,1)).detach()
        pred = net(x)
        loss = criterion(pred[0], y_) + 0.001 * np.sum(pred[1])
        cum_loss += loss
        cum_error_classifier += pred[1][3]
        cum_error_error1 += pred[1][1]
        cum_error_error2 += pred[1][2]
        
        if torch.sign(pred[0]).detach().numpy()[0][0]!= y_:
            error +=1
        loss.backward()
        optimizer.step()
        #if i%20 == 0:
            #writer.add_scalar("loss ",cum_loss/(i+1), i)
            #writer.add_scalar("error_error_module_1",cum_error_error1/(i+1),i)
            #writer.add_scalar("error_error_module_2",cum_error_error2/(i+1),i)
            #writer.add_scalar("error_error_module_3",cum_error_error3/(i+1),i)
            #writer.add_scalar("error_error_module_4",cum_error_error4/(i+1),i)
            #writer.add_scalar("error_classifier_module",cum_error_classifier/(i+1),i)
            #writer.add_histogram("error_module_1",list(net.error_modules[0].parameters())[0],i)
            #writer.add_histogram("error_module_2",list(net.error_modules[1].parameters())[0],i)
            #writer.add_histogram("error_module_3",list(net.error_modules[2].parameters())[0],i)
            #writer.add_histogram("error_module_4",list(net.error_modules[3].parameters())[0],i)
            #writer.add_histogram("classifier_module",list(net.classifier_module.parameters())[0],i)
    error_val.append(error/len(X))
    error_linear_val.append(error_lin/len(X))
writer.close()    
print('average_error_rate ',np.average(error_val))
print('average_error_rate_linear',np.average(error_linear_val))
    
    
    
    
    

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.000

In [8]:
# weights check
#weight of norm each layers


In [9]:
error_val

[0.18217391304347827,
 0.19130434782608696,
 0.1973913043478261,
 0.18608695652173912,
 0.18478260869565216,
 0.18456521739130435,
 0.19108695652173913,
 0.19391304347826088,
 0.2126086956521739,
 0.19804347826086957,
 0.17891304347826087,
 0.19695652173913045,
 0.19847826086956522,
 0.19282608695652173,
 0.19456521739130433,
 0.19782608695652174,
 0.19804347826086957,
 0.1743478260869565,
 0.19782608695652174,
 0.18869565217391304]

In [10]:
error_linear_val

[0.31782608695652176,
 0.3080434782608696,
 0.2995652173913044,
 0.29478260869565215,
 0.2995652173913044,
 0.305,
 0.29586956521739133,
 0.2876086956521739,
 0.2956521739130435,
 0.3026086956521739,
 0.30478260869565216,
 0.3141304347826087,
 0.30521739130434783,
 0.3017391304347826,
 0.29478260869565215,
 0.3002173913043478,
 0.3006521739130435,
 0.29456521739130437,
 0.31021739130434783,
 0.2910869565217391]

In [11]:
net

OPNet(
  (classifier_module): classifier_module(
    (classifier_linear): Linear(in_features=57, out_features=1, bias=True)
  )
  (error_modules): ModuleList(
    (0): error_module(
      (error_linear): Linear(in_features=57, out_features=1, bias=True)
    )
    (1): error_module(
      (error_linear): Linear(in_features=57, out_features=1, bias=True)
    )
  )
)

In [12]:
np.mean(error_val)

0.1920217391304348

In [13]:
np.mean(error_linear_val)

0.30119565217391303