In [0]:
import numpy as np 
import time 
import scipy.io as sio

Model for algorithm created in: Radu Dogaru, Ioana Dogaru, "Optimized Super Fast Support Vector Classifiers Using Python and Acceleration of RBF Computations", (2018) ; https://ieeexplore.ieee.org/document/8484742

Code taken from: https://github.com/radu-dogaru/Super_Fast_Vector_Classifier

and used to test dataset

# Loading Data

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

import os
os.chdir("/content/drive/My Drive/Colab Notebooks/sfsvc")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


In [3]:
ls

banet11c_test.mat   botnet2_8_test.mat   botnet2_train.mat  last_model.mat
banet11c_train.mat  botnet2_8_train.mat  botnet_test.mat    usps_test.mat
banet3c_train.mat   botnet2_test.mat     botnet_train.mat   usps_train.mat


In [0]:
train = sio.loadmat('usps_train.mat')
test = sio.loadmat('usps_test.mat')

In [0]:
#sio.whosmat('usps_train.mat')
sio.whosmat('botnet_train.mat')

[('Samples', (116, 49438), 'double'), ('Labels', (1, 49438), 'double')]

In [0]:
sio.whosmat('banet11c_train.mat')

[('Samples', (116, 49438), 'double'), ('Labels', (1, 49438), 'double')]

# Algorithm Parameters

In [0]:
#nume='optd64'  #  Dataset files _train.mat _test.mat 
#nume='usps'
#nume='botnet'
#nume='botnet2'
#nume='banet3c'
#nume='banet11c'
nume = 'botnet2_8'

# several such pairs are provided here  
# http://atm.neuro.pub.ro/radu_d/html/09_10/ici2009/k-lab/date-ici.html


raza=25.4; raza=float(raza) # Radius (start with bigger values and decreas until optimal performance)

prag=1; prag=float(prag)  # Activation (overlap) threshold - start with 1  
typ=1  # RBF function: 2 - Gaussian , # 1- triangular (best suited for HW)
alea=0 # if 1 - input samples are radnomized -> will slightly influence performance (like in ELM)
first_samples = 0 # 0  all samples are considered  ; n>0  - first n samples are considered (useful for faster radius tuning)

# Algorithm Functions

In [0]:
# Optimized (Intel MKL based) computation of RBF layers 
# Best speed obtained for float32 variables 
def rbf_layer_mkl(Samples,inW,raza,typ):
    N=np.size(Samples,0)
    n=np.size(Samples,1)
    m=np.size(inW,1)
    Ocol = np.ones((n,1), dtype = np.float32)
    Qlin = Ocol.T
    # (a-b)^2 equival with a^2+b^2-2a*b (main computation a*b)
    d=np.repeat(np.dot(Samples*Samples,Ocol),m,axis=1)+np.repeat(np.dot(Qlin,inW*inW),N,axis=0)-2*np.dot(Samples,inW)
    if typ==1:
        d=1-d/(raza*2.5066)
        Hrbf=(d+np.abs(d))/2.0
    elif typ==2:
        kgauss=-1/(2*raza*raza)
        Hrbf=np.exp(kgauss*d*d) #  
        
    return Hrbf

In [0]:
def novelty_layer_compute(Sa,r,pra,ty):
# With given parameters (radius (r), threshold (pra) and RBF type (ty))
# Computes the "tix" (indexes) of the "support vectors" selected from 
# the Samples batch (Sa) 
# Returns the selected support vectors 
    
    N=np.size(Sa,0)
    tix=np.array([]).astype(np.int32)
    # First support vector is associated with first index 
    tix=np.append(tix,0)
    # Support vector selection loop 
    for i in range(1,N):
        # Compute the RBF layer activity for the actual inW
        # when the current sample (only one) is applied 
        # - using rbf_layer_mkl 
        Hid=rbf_layer_mkl(Sa[i:(i+1),:],Sa[tix,:].T, r, ty)
        Activity=np.sum(Hid)
        if Activity<pra:
            # A new Support vector is added as the current sample
            tix=np.append(tix,i)
    return Sa[tix,:].T

In [0]:
def sfsvc_train(Sa,La,ra,pra,ty,alea):
# Implements the "supervized" (class based) training a.k.a SFSVC 
# For each class a selection of support vectors is found using novelty_layer 
# and the number of neurons for each class is computed 
# Returns the input layer matrix (inW) and the list of neurons for each class 
    
    M=np.max(Labels)
    N=np.size(Samples,0)  
    
    if alea==1:
        ixpe=np.random.permutation(N)
        Sa=Sa[ixpe,:]
        La=La[:,ixpe]        
        
    k=0 # first class - find the support vectors 
    ixk=np.where(La==(k+1))
    Sk=Sa[ixk[1],:] 
    inW=novelty_layer_compute(Sk,ra,pra,ty)
    nk=np.size(inW,1)  # number of neurons in class k 
    neuroni=np.array([]).astype(np.int32)   
    neuroni=np.append(neuroni,nk)
    for k in range(1,M):      # next 2-M classes - find support vectors
        ixk=np.where(La==(k+1)) # select class k indices
        Sk=Sa[ixk[1],:]  # Sk is Samples of class k 
        rbfk=novelty_layer_compute(Sk,ra,pra,ty) 
        nk=np.size(rbfk,1)  # number of neurons in class k 
        neuroni=np.append(neuroni,nk)
        inW=np.append(inW,rbfk,axis=1)
    return (inW, neuroni)

In [0]:
def create_outw(inW,neur):
# Creates the output layer (outW) composed of bunary weights 0 or 1 
# Returns outW 
    
    M=np.size(neur)
    m=np.size(inW,1)
    outW=np.zeros((m,M)).astype('float32')
    l1=0; l2=0; nk=0
    for k in range(M):
        l1=l1+nk; 
        nk=neur[k]; 
        l2=l2+nk; 
        outW[range(l1,l2),k]=1    
    return outW

# Main

In [9]:
# Resulting model is given by:  tip , radius, inW, outW 
#--------------------------------------------------------
#  reads the training set 
timer = time.time()
db=sio.loadmat(nume+'_train.mat')
Samples=db['Samples'].astype('float32')
Samples=Samples.T

Labels=db['Labels'].astype('int8')
N=np.size(Samples,0)
n=np.size(Samples,1)
M=np.max(Labels)
runtime = time.time() - timer
print( " load train data time: %f s" % runtime)

if first_samples>0:
    if first_samples>N: 
        first_samples=N
    N=first_samples
    Samples=Samples[0:N,:]
Labels=Labels[:,0:N]

 load train data time: 0.524085 s


# Training and Testing

In [10]:
# ================ TRAIN SFSV =======================
# Implements the Super Fast Support Vector Classifiers train 
# In fact a selection of support vectors form training set 

timer = time.time()
(inW,neur)=sfsvc_train(Samples,Labels,raza,prag,typ,alea)
Wad=create_outw(inW,neur)
runtime = time.time() - timer
print( " TRAINING time: %f s" % runtime)

#=========LOAD Test Set ================================

timer = time.time()
db=sio.loadmat(nume+'_test.mat')
Samples=db['Samples'].astype('float32')
Samples=Samples.T
Labels=db['Labels'].astype('int8')
N=np.size(Samples,0)
n=np.size(Samples,1)
M=np.max(Labels)

runtime = time.time() - timer
print( " load test data time: %f s" % runtime)

#===================TEST phase (retrieval) ==========================
timer = time.time()
Hrbfk=rbf_layer_mkl(Samples,inW,1*raza,typ)
# Here one can try altered RBF layers (e.g. with typ=1)
# to improve implementations 
# Also radius may change from what was used in training phase 
# 1.2 coefficnet may help .. 

Scores=np.dot(Hrbfk,Wad)
runtime = time.time() - timer
print( " PREDICTION (test data) time: %f s" % runtime)

#================= Evaluate Accuracy for the Test set  =============================

Conf=np.zeros((M,M),dtype='int16')
for i in range(N):
    # gasire pozitie clasa prezisa 
    ix=np.where(Scores[i,:]==np.max(Scores[i,:]))
    ixx=np.array(ix)
    pred=int(ixx[0,0])
    actual=Labels[0,i]-1
    Conf[actual,pred]+=1
accuracy=100.0*np.sum(np.diag(Conf))/np.sum(np.sum(Conf))
print("Confusion matrix: ")
print(Conf)
print("Accuracy: %f" %accuracy)
print( "Neurons for each class layer: ", neur)
nr_neuroni=np.sum(neur)
print( "Total number of hidden layer: %d" %nr_neuroni)
print( "RBF type:  (1=Triangular; 2=Gaussian): %d" %typ)
print( "radius: %f" %raza)
print( "overlapping factor: %f" %prag)
print( "alea: %d" %alea)
# ========================================  SAVE THE MODEL ========================
# Salvare LAST_MODEL 
last_model={'inW':inW, 'Wad':Wad, 'tip':typ, 'raza':raza, 'accuracy':accuracy, 'nume':nume}
sio.savemat('last_model.mat',last_model)

 TRAINING time: 1.593929 s
 load test data time: 0.257109 s
 PREDICTION (test data) time: 0.529371 s
Confusion matrix: 
[[ 382   30    0    0    0    0    0    0]
 [  82   22    0    0    0    0    0    0]
 [   3    0 5755    0    0   41    0    0]
 [   0    0    0 5732    0    0    0    0]
 [   3    0    0    0    4    0    0    1]
 [   2    0  370    0    0 3345    0    0]
 [   0    0    0    0    0    0 6227    0]
 [  10    0    0    0    0    0    0    2]]
Accuracy: 97.537595
Neurons for each class layer:  [437 200 145  26  12  93  54  19   7   1]
Total number of hidden layer: 994
RBF type:  (1=Triangular; 2=Gaussian): 1
radius: 25.400000
overlapping factor: 1.000000
alea: 0
