In [1]:
import random
from random import randint
import numpy as np

class Winnow:
    numInput = 0
    weights = []
    threshold = 0.0
    alpha = 0.0
    
    def __init__(self, numInput, rndSeed):
        self.numInput = numInput
        self.weights = [0] *numInput
        for i in range(len(self.weights)):
            self.weights[i] = numInput / 2.0
        self.threshold = 1.0 * numInput
        self.alpha = 2.0
        random.seed( rndSeed )
    
    # returns double, int[][] trainData
    def Accuracy(self, trainData):
        numCorrect = 0
        numWrong = 0
        xValues = [0] *numInput
        
        for i in range(len(trainData)):
            xValues = np.copy(trainData[i])
            target = trainData[i][numInput] #last value is target
            computed = self.ComputeY(xValues)

            if computed == target:
                numCorrect += 1
            else:
                numWrong += 1
                
        return (numCorrect * 1.0) / (numCorrect + numWrong)

    # Fisher-Yates shuffle algorithm int[][] trainData
    def ShuffleObservations(self, trainData):
        for i in range(len(trainData)):
            r = randint(i, len(trainData) - 1)
            tmp = []
            tmp = trainData[r]
            trainData[r] = trainData[i]
            trainData[i] = tmp
            
    # returns double[], int[][] trainData
    def TrainWeights(self, trainData):
        xValues = [] * numInput
        self.ShuffleObservations(trainData)
        for i in range(len(trainData)):
            #  get the inputs
            xValues = np.copy(trainData[i])
            
            #  last value is target
            target = trainData[i][numInput] 
            
            computed = self.ComputeY(xValues)

            if (computed == 1 and target == 0):
                # need to decrease weight:
                for j in range(numInput):
                    if (xValues[j] == 0): continue
                    self.weights[j] = self.weights[j] / self.alpha #demotion
            elif (computed == 0 and target == 1):
                # need to increase weight:
                for j in range(numInput):
                    if (xValues[j] == 0): continue
                    self.weights[j] = self.weights[j] * self.alpha #promotion

        result = [0.0] *numInput # = number weights
        result = self.weights
        return result
    
    # returns double[], int[][] trainData
    def TrainMoreLayerWeights(self, trainData, layerNumber):
        xValues = [] * numInput
        self.ShuffleObservations(trainData)
        for layer in range(layerNumber):
            for i in range(len(trainData)):
                #  get the inputs
                xValues = np.copy(trainData[i])

                #  last value is target
                target = trainData[i][numInput] 

                computed = self.ComputeY(xValues)

                if (computed == 1 and target == 0):
                    # need to decrease weight:
                    for j in range(numInput):
                        if (xValues[j] == 0): continue
                        self.weights[j] = self.weights[j] / self.alpha #demotion
                elif (computed == 0 and target == 1):
                    # need to increase weight:
                    for j in range(numInput):
                        if (xValues[j] == 0): continue
                        self.weights[j] = self.weights[j] * self.alpha #promotion

        result = [0.0] *numInput # = number weights
        result = self.weights
        return result
    
    # int[] xValues
    def ComputeY(self, xValues):
        sum = 0.0
        for i in range(numInput):
            sum += self.weights[i] * xValues[i]
        if sum > self.threshold:
            return 1
        else:
            return 0

In [2]:
import numpy as np

# int[][] data, seed, out int[][] trainData, out int[][] testData
def MakeTrainTest(data, pct, seed):
    totRows = data.shape[0] #compute number of rows in each result
    numTrainRows = int(totRows * pct)
    numTestRows = totRows - numTrainRows
    #trainData = new int[numTrainRows][]
    trainData = np.empty(data.shape)
    #testData = new int[numTestRows][]
    testData = np.empty(data.shape)
    copy = np.empty(data.shape)

    # int[][] copy = new int[data.Length][] #  make a copy of data
    for i in range(copy.shape[0]):
        # by reference to save space
        copy[i] = data[i]
    for i in range(copy.shape[0]):
        # scramble row order of copy
        r = randint(i, copy.shape[0] - 1)
        tmp = copy[r]
        copy[r] = copy[i]
        copy[i] = tmp
    for i in range(numTrainRows):
        # create training
        trainData[i] = copy[i]
    for i in range(numTestRows):
        # create test
        testData[i] = copy[i + numTrainRows]
        
    return trainData, testData

In [3]:
# int[][] matrix
def ShowMatrix(matrix, decimals, numRows, indices):
    frmt = '%.' + str(decimals) + 'f'
    for i in range(numRows):
        if (indices):
            print("[" + '%02d' % i + "]   ", end='')
        for j in range(len(matrix[i])):
            print(frmt % matrix[i][j] + " ", end='')
        print("")
    lastIndex = len(matrix) - 1
    if (indices):
        print("[" + '%02d' % lastIndex + "]   ", end='')
    for j in range(len(matrix[lastIndex])):
        print(frmt % matrix[lastIndex][j] + " ", end='')
    print("")

In [4]:
# double[] vector
def ShowVector(vector, decimals, valsPerRow, newLine):
    frmt = '%.' + str(decimals) + 'f'
    for i in range(len(vector)):
        if (i % valsPerRow == 0): print("", end='')
        print(frmt % vector[i] + " ", end='')
    if (newLine): print("")

In [5]:
import pandas as pd
data = pd.read_csv("data/SPECT.test")
# print(data.values.tolist())
data.head(10)


Unnamed: 0,1,1.1,0,0.1,1.2,1.3,0.2,0.3,0.4,1.4,...,0.7,1.6,1.7,1.8,0.8,0.9,1.9,1.10,0.10,0.11
0,1,1,0,0,1,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1,0,0,0,1,0,1,0,0,1,...,0,1,1,0,0,0,0,0,0,1
2,1,0,1,1,1,0,0,1,0,1,...,1,1,0,1,0,0,0,0,1,0
3,1,0,0,1,0,0,0,0,1,0,...,1,1,0,1,0,0,0,0,0,1
4,1,0,0,1,1,0,1,0,0,1,...,1,0,0,1,0,0,0,0,1,1
5,1,1,0,0,1,0,0,1,1,1,...,1,1,0,1,0,0,0,1,0,1
6,1,1,0,0,1,0,0,0,0,1,...,0,1,1,0,0,0,0,0,0,0
7,1,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
8,1,1,0,0,1,1,1,0,0,1,...,0,1,0,1,1,0,1,0,0,0
9,1,1,0,0,0,1,0,0,0,1,...,1,1,1,0,0,0,1,0,0,0


In [6]:
print("First few lines of all data are:")
ShowMatrix(data.values.tolist(), 0, 4, True)

First few lines of all data are:
[00]   1 1 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 
[01]   1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 0 1 
[02]   1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 0 1 0 0 0 0 1 0 
[03]   1 0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 1 
[185]   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 


In [7]:
diagnosis = data['1']
diagnosis.head(10)

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: 1, dtype: int64

In [8]:
# drop first column from training data: It's our label
data2 = data.drop(labels='1', axis=1)
data2.head(10)

Unnamed: 0,1.1,0,0.1,1.2,1.3,0.2,0.3,0.4,1.4,1.5,...,0.7,1.6,1.7,1.8,0.8,0.9,1.9,1.10,0.10,0.11
0,1,0,0,1,1,0,0,0,0,1,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,1,0,0,1,0,...,0,1,1,0,0,0,0,0,0,1
2,0,1,1,1,0,0,1,0,1,0,...,1,1,0,1,0,0,0,0,1,0
3,0,0,1,0,0,0,0,1,0,0,...,1,1,0,1,0,0,0,0,0,1
4,0,0,1,1,0,1,0,0,1,0,...,1,0,0,1,0,0,0,0,1,1
5,1,0,0,1,0,0,1,1,1,1,...,1,1,0,1,0,0,0,1,0,1
6,1,0,0,1,0,0,0,0,1,1,...,0,1,1,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
8,1,0,0,1,1,1,0,0,1,1,...,0,1,0,1,1,0,1,0,0,0
9,1,0,0,0,1,0,0,0,1,1,...,1,1,1,0,0,0,1,0,0,0


In [9]:
print("Splitting data into 80% train and 20% test matrices")
trainData, testData = MakeTrainTest(data2.values, 0.8, 17)

Splitting data into 80% train and 20% test matrices


In [10]:
print("First few rows of testing data are:")
ShowMatrix(testData, 0, 3, True)

First few rows of testing data are:
[00]   0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 
[01]   1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 1 1 1 
[02]   1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 
[185]   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 


In [11]:
print("First few rows of training data are:")
ShowMatrix(trainData, 0, 3, True)

First few rows of training data are:
[00]   1 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 
[01]   0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 0 1 
[02]   0 1 1 1 0 0 1 0 1 0 0 1 1 1 0 1 0 0 0 0 1 0 
[185]   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 


In [12]:
%%time

print("Begin training using Winnow algorithm")
numInput = 21
w = Winnow(numInput, 23) #rndSeed = 0
weights = w.TrainWeights(trainData)
print("Training complete")

Begin training using Winnow algorithm
Training complete
Wall time: 3.5 ms


In [13]:
print("One layer model weights are:")
ShowVector(weights, 4, 8, True)

One layer model weights are:
0.0001 0.0002 0.1641 0.1641 0.0000 2.6250 0.0026 21.0000 0.1641 0.0000 2.6250 0.0410 2.6250 0.1641 0.0205 0.0103 1.3125 0.0820 0.0205 0.0026 10.5000 


In [14]:
trainAcc = w.Accuracy(trainData)
testAcc = w.Accuracy(testData)

print("Prediction accuracy on training data = " + str(trainAcc))
print("Prediction accuracy on test data = " + str(testAcc))

Prediction accuracy on training data = 0.7634408602150538
Prediction accuracy on test data = 0.9086021505376344


In [15]:
%%time

print("Begin training using Winnow algorithm in more layer")
numInput = 21
w = Winnow(numInput, 0) #rndSeed = 0
weights = w.TrainMoreLayerWeights(trainData,1000)
print("Training complete")

Begin training using Winnow algorithm in more layer
Training complete
Wall time: 2.09 s


In [16]:
trainAcc = w.Accuracy(trainData)
testAcc = w.Accuracy(testData)

print("Prediction accuracy on training data = " + str(trainAcc))
print("Prediction accuracy on test data = " + str(testAcc))

Prediction accuracy on training data = 0.6290322580645161
Prediction accuracy on test data = 0.8602150537634409


In [17]:
print("Predicting diagnosis of patient with all abnormal readings: ", end='')
yays = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
predicted = w.ComputeY(yays)
if predicted == 0:
    print("normal")
else:
    print("abnormal")

print("Predicting diagnosis of patient with all normal readings: ", end='')
nays = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
predicted2 = w.ComputeY(nays)
if predicted2 == 0:
    print("normal")
else:
    print("abnormal")

Predicting diagnosis of patient with all abnormal readings: abnormal
Predicting diagnosis of patient with all normal readings: normal
