In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.functional as F
from torch.autograd  import Variable
import torch.optim as optim
from sklearn.utils import shuffle
from torch.utils.tensorboard import SummaryWriter
import copy
torch.manual_seed(1)
np.random.seed(1)



In [2]:
class error_module(nn.Module):
    def __init__(self,size):
        super(error_module,self).__init__()
        self.error_linear = nn.Linear(size,1)
        self.Var_e = Variable(torch.ones(1, 1), requires_grad=True)
    def forward(self,x,prev_error):
        x = self.error_linear(x) + self.Var_e * prev_error
        
        return x



class classifier_module(nn.Module):
    def __init__(self,size):
        super(classifier_module,self).__init__()
        self.classifier_linear = nn.Linear(size,1)
        self.Var_w = Variable(torch.ones(1, 1), requires_grad=True)
    def forward(self,x, prev_error):
        x = self.classifier_linear(x) +  self.Var_w * prev_error 
        
        return x
    
    
class MyHingeLoss(torch.nn.Module):

    def __init__(self):
        super(MyHingeLoss, self).__init__()

    def forward(self, output, target):

        hinge_loss = 1 - torch.mul(output, target)
        hinge_loss[hinge_loss < 0] = 0
        return hinge_loss
        


In [3]:
dataset_names = ["german", "ionosphere", "spambase", "magic", "a8a"]
root_path, extension = "./datasets/", "_numeric"


def get_path(name):
    '''returns a path pair to the preprocessed datasets
    X and y csv files.'''
    path = root_path + name + extension
    return path + "_X.csv", path + "_y.csv"


def read_dataset(X_path, y_path):
    '''reads and returns numpy arrays in a given pair of paths for 
    X and y.'''
    X = pd.read_csv(X_path).values
    y = pd.read_csv(y_path)['0'].values
    return X, y


def simulate_varying(X):  # multivariate normal distribution
    '''Get the data and generate a varying feature space pattern.
    Possible concerns: thresholding messing up the distribution?'''
    
    # create a covariance matrix
    cov = np.random.rand(num_features, num_features) + 0.9
    cov = np.dot(cov, cov.transpose())  # to have a positive semi-definite matrix
    
    # create a mean vector
    mean = np.random.rand(len(X[0]))
    
    # sample from multivariate gaussian w/ given mean and cov
    spaces = np.random.multivariate_normal(mean, cov, len(X))
    
    # threshold samples for 1-hot encoding
    spaces[spaces < 0] = 0
    spaces[spaces != 0] = 1

    return spaces

def simulate_random_varying(X): # discrete uniform distribution
    matrix = np.random.randint(2, size=(len(X), len(X[0])))  
    return matrix


def quant(x, l):  # l: num_layers, x:input
    one_hot = []
    for i in x:
        if i != 0:
            one_hot.append(1)
        else:
            one_hot.append(0)
    one_hot = np.array(one_hot)
    
    qt = (one_hot - x) / (l-1)
    qts = []
    qts.append(one_hot)
    
    for i in range(l-2):
        qts.append(x + qt * (l-2-i))
        
    qts.append(x)    
    
    return np.array(qts)

In [4]:
X_path, y_path = get_path("german")
X, y = read_dataset(X_path, y_path)
print(np.unique(y,return_counts=True))
num_features = len(X[0])
folds = 20
learning_rate = 0.001

(array([-1.,  1.]), array([300, 700]))


In [5]:
class OPNet(nn.Module):
    def __init__(self,number_layers,size):
        super(OPNet,self).__init__()
        self.classifier_module = classifier_module(size)
        self.number_layers = number_layers
        self.error_modules = nn.ModuleList([error_module(size) for i in range(number_layers-1)])
            
    def forward(self,x):
        predict= torch.zeros(1, 1).double()
        errors = []
        errors.append(torch.zeros(1, 1).double())
        for i in range (self.number_layers - 1):
            predict = self.error_modules[i](x[i], predict) 
            errors.append(torch.norm(predict - errors[-1]))
        
            
        pred = self.classifier_module(x[-1], predict) 
        errors.append(torch.norm(pred - errors[-1]))
        
        return pred, errors

    

In [15]:
class opcbackprop:
    def __init__(self,number_layers,size,lr):
        self.in_size = size
        self.number_layers = number_layers
        self.lr = lr
        self.model = OPNet(number_layers,size)
        self.criterion = MyHingeLoss()
        self.optim = optim.SGD(self.model.parameters(), lr=lr)
        
    def predict(self,x,return_sum = True):
        yhat = self.model(x)
        return yhat[0]
             
    def update(self,x,y):
        pred = self.predict(x)
        loss = self.criterion(pred,y)
        if torch.sign(pred).detach().numpy()[0][0]!= y:
            loss.backward()
            self.optim.step()
            
        return loss
            
    def reset(self):
        self.model = OPNet(number_layers,size)
        self.optim = optim.SGD(self.model.parameters(), lr=lr)

        

In [6]:
def linear_model(w,lr,x,y,error_lin):
    pred = np.dot(x,w)
    y_ = np.sign(pred)
    loss  = np.maximum(0, 1 - y * pred)
    if loss > 0:
        w += np.minimum(lr, loss/np.square(np.linalg.norm(x))) * y * x
    if y_ != y:
        error_lin += 1
    return error_lin, w 
    
    

In [7]:

# layers = 3
# error_val = []
# error_linear_val = []

# #cum_error_error4 = torch.zeros([1,1])
# for j in range(20): 
#     error = 0
#     error_lin = 0
#     X , y = shuffle(X,y,random_state=0)
#     X_ = copy.deepcopy(X)
    
#     w = np.zeros(X.shape[1])
#     net = OPNet(layers,X.shape[1])
#     net = net.to(torch.double)
#     parameter = list(net.parameters())
#     criterion = MyHingeLoss()
#     optimizer = optim.SGD(net.parameters(), lr=0.01)
#     writer = SummaryWriter()
#     for i in range(len(X)):
#         x= quant(X_[i],layers) 
#         #if i == 0 and j== 0:
#         #    print(x)
#         #    break
#         error_lin, w = linear_model(w,0.01,X_[i],y[i],error_lin)
#         x = torch.from_numpy(x).detach()
#         y_ = torch.from_numpy(y[i].reshape(1,1)).detach()
#         #rand_var = torch.tensor(np.zeros_like(x))
#         #print(rand_var)
#         #writer.add_graph(net,rand_var)
#         pred = net(x)
#         loss = criterion(pred[0], y_) #+ 0.1 * np.sum(pred[1])
#         cum_loss += loss
#         cum_error_classifier += pred[1][3]
#         cum_error_error1 += pred[1][1]
#         cum_error_error2 += pred[1][2]
#         #cum_error_error3 += pred[1][3]
#         #cum_error_error4 += pred[1][4]
#         if torch.sign(pred[0]).detach().numpy()[0][0]!= y_:
#             error +=1
#             loss.backward()
#             optimizer.step()
#         if i%20 == 0:
#             writer.add_scalar("loss ",cum_loss/(i+1), i)
#             writer.add_scalar("error_error_module_1",cum_error_error1/(i+1),i)
#             writer.add_scalar("error_error_module_2",cum_error_error2/(i+1),i)
#             #writer.add_scalar("error_error_module_3",cum_error_error3/(i+1),i)
#             #writer.add_scalar("error_error_module_4",cum_error_error4/(i+1),i)
#             writer.add_scalar("error_classifier_module",cum_error_classifier/(i+1),i)
#             writer.add_histogram("error_module_1",list(net.error_modules[0].parameters())[0],i)
#             writer.add_histogram("error_module_2",list(net.error_modules[1].parameters())[0],i)
#             #writer.add_histogram("error_module_3",list(net.error_modules[2].parameters())[0],i)
#             #writer.add_histogram("error_module_4",list(net.error_modules[3].parameters())[0],i)
#             #writer.add_histogram("classifier_module",list(net.classifier_module.parameters())[0],i)
#     error_val.append(error/len(X))
#     error_linear_val.append(error_lin/len(X))
# writer.close()    
# print('average_error_rate ',np.average(error_val))
# print('average_error_rate_linear',np.average(error_linear_val))
    
    
    
    
    

average_error_rate  0.33925
average_error_rate_linear 0.30150000000000005


In [8]:
# weights check
#weight of norm each layers


In [9]:
error_val

[0.326,
 0.349,
 0.339,
 0.341,
 0.33,
 0.346,
 0.353,
 0.362,
 0.328,
 0.349,
 0.314,
 0.32,
 0.33,
 0.334,
 0.335,
 0.347,
 0.353,
 0.352,
 0.357,
 0.32]

In [10]:
error_linear_val

[0.301,
 0.307,
 0.301,
 0.302,
 0.301,
 0.302,
 0.301,
 0.301,
 0.301,
 0.302,
 0.3,
 0.301,
 0.301,
 0.301,
 0.301,
 0.3,
 0.304,
 0.301,
 0.301,
 0.301]

In [11]:
net

OPNet(
  (classifier_module): classifier_module(
    (classifier_linear): Linear(in_features=24, out_features=1, bias=True)
  )
  (error_modules): ModuleList(
    (0): error_module(
      (error_linear): Linear(in_features=24, out_features=1, bias=True)
    )
    (1): error_module(
      (error_linear): Linear(in_features=24, out_features=1, bias=True)
    )
  )
)