In [1]:
%matplotlib widget 
#for interactive 3d plot 

import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from mpl_toolkits.mplot3d import Axes3D
from sklearn import metrics

# import ipyvolume as ipv
# %matplotlib inline

In [2]:

def drawPlane(ara, xlim = (-5,20), ylim= (-5,20)):
    a = ara[0]
    b = ara[1]
    c = ara[2]
    d = ara[3]
    x = np.linspace(xlim[0],xlim[1])
    y = np.linspace(ylim[0],ylim[1])
#   eqn: ax + by + cz + d = 0
    X,Y = np.meshgrid(x,y)
    Z = ( a*X + b*Y + d) / -c

    surf = ax.plot_surface(X, Y, Z)

# Multi-Class Perceptron (Kesler)

In [3]:
class Perceptron:
    
    def __init__(self,learning_rate):
        self.eta = learning_rate

    
    def fit(self, x_train, y_train, numEpoch= 500):
        print(len(y_train))

        self.x_train = np.hstack( ( np.array(x_train, dtype=float), np.ones([len(y_train), 1], dtype = float) ))
        self.y_train = np.array(y_train, dtype='O')
        self.weight = [np.zeros(self.x_train.shape[1] ,dtype=float )] * (len(np.unique(self.y_train)) ) # 1 for bias
        self.dataset = np.column_stack((self.x_train, self.y_train))
        self.thresh = 0.05
        self.classes = dict(zip(self.y_train, np.unique(self.y_train, return_inverse=True)[1]))
        self.numEpoch = numEpoch
        self.updateWeight()
    
 
    def getActivation(self, weight, x):
        return np.dot(weight, x)
    
    def predict(self, x_test):
        mxActivation, cls = -10000, 0
        
        if len(x_test) != len(self.weight[0]): # if called for testing set, have to handle it differently 
            x_test = x_test[:]
            x_test.append(1.0)  #add 1 to make consistent with weight dimension 
            
        for i in range(len(self.weight)):
            weight = self.weight[i]
            
            activation = self.getActivation(weight, x_test)
            if activation > mxActivation:
                mxActivation, cls = activation, i + 1 
        return cls 

    def updateWeight(self):
        it = 0
        bestCost, bestWeight = 10000, []
        while it < self.numEpoch:
    
            for i in range(len(self.y_train)):
                x = self.x_train[i,:]

                actual_cls = self.classes[self.y_train[i]]
                actual_cls_activation = self.getActivation(self.weight[actual_cls], x ) 
                
                for i in range(len(self.weight)):
                    if i == actual_cls:
                        continue
                    if self.getActivation(self.weight[i],x) >= actual_cls_activation:
                        self.weight[i] = self.weight[i] -  self.eta *  x
                        self.weight[actual_cls] = self.weight[actual_cls] + self.eta * x
                cost = self.costFunc(self.x_train, self.y_train)
                if cost <= bestCost:
                    bestCost, bestWeight = cost, self.weight.copy()
            print('iteration ', it , ' ->',self.costFunc(self.x_train, self.y_train))

            if self.costFunc(self.x_train, self.y_train) <= self.thresh:
                print(self.costFunc(self.x_train, self.y_train))
                break


            it += 1
        self.weight = bestWeight
        print(self.weight)
        drawPlane(self.weight[0])
        drawPlane(self.weight[1])
#         drawPlane(self.weight[2])
    
    def costFunc(self, x_list, y_actual):
        cost = 0
        for i in range(len(x_list)):
            cost = cost + int( self.predict(x_list[i]) != y_actual[i])
        return cost

# 3d plotting

In [4]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')

ax.grid()
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Z Label')

In [5]:
missing_values = ["n/a", "na", "--","NA","N/A","?"]
df = pd.read_csv('multi-class.csv', na_values = missing_values)

x_train, y_train = np.array(df.iloc[:,:-1].values.tolist()), np.array(df.iloc[:,-1].values.tolist() )
p = Perceptron(0.5)
p.fit(x_train, y_train)

ax.scatter(x_train[:,0], x_train[:,1], x_train[:,2], c='r')



# plt.show()


300
iteration  0  -> 100
iteration  1  -> 100
iteration  2  -> 21
iteration  3  -> 100
iteration  4  -> 65
iteration  5  -> 0
0
[array([ 1.0327,  9.7551, -1.9892, -3.5   ]), array([ 10.53225, -30.0144 ,   7.00205,  -1.5    ]), array([-11.56495,  20.2593 ,  -5.01285,   5.     ])]


<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f0204382550>

In [15]:
df = pd.read_csv('multi-test.csv', na_values = missing_values , header=None)
y_test = df.iloc[:,-1].tolist()
y_pred = []
for i in range(len(df.iloc[:,:-1])):
    x = df.iloc[i,:-1].tolist()
    y_pred.append(p.predict(x ) )

    
print('Accurcy on test set')
print(metrics.accuracy_score(y_test,y_pred) * 100)

Accurcy on test set
98.33333333333333
