In [30]:
import random
from random import randint
import numpy as np

class Winnow:
    numInput = 0
    weights = []
    threshold = 0.0
    alpha = 0.0
    
    def __init__(self, numInput, rndSeed):
        self.numInput = numInput
        self.weights = [0] *numInput
        for i in range(len(self.weights)):
            self.weights[i] = numInput / 2.0
        self.threshold = 1.0 * numInput
        self.alpha = 2.0
        random.seed( rndSeed )
    
    # returns double, int[][] trainData
    def Accuracy(self, trainData):
        numCorrect = 0
        numWrong = 0
        xValues = [0] *numInput
        
        for i in range(len(trainData)):
            xValues = np.copy(trainData[i])
            target = trainData[i][numInput] #last value is target
            computed = self.ComputeY(xValues)

            if computed == target:
                numCorrect += 1
            else:
                numWrong += 1
                
        return (numCorrect * 1.0) / (numCorrect + numWrong)

    # Fisher-Yates shuffle algorithm int[][] trainData
    def ShuffleObservations(self, trainData):
        for i in range(len(trainData)):
            r = randint(i, len(trainData) - 1)
            tmp = []
            tmp = trainData[r]
            trainData[r] = trainData[i]
            trainData[i] = tmp
            
    # returns double[], int[][] trainData
    def TrainWeights(self, trainData):
        xValues = [] * numInput
        self.ShuffleObservations(trainData)
        for i in range(len(trainData)):
            #  get the inputs
            xValues = np.copy(trainData[i])
            
            #  last value is target
            target = trainData[i][numInput] 
            
            computed = self.ComputeY(xValues)

            if (computed == 1 and target == 0):
                # need to decrease weight:
                for j in range(numInput):
                    if (xValues[j] == 0): continue
                    self.weights[j] = self.weights[j] / self.alpha #demotion
            elif (computed == 0 and target == 1):
                # need to increase weight:
                for j in range(numInput):
                    if (xValues[j] == 0): continue
                    self.weights[j] = self.weights[j] * self.alpha #promotion

        result = [0.0] *numInput # = number weights
        result = self.weights
        return result
    
    # int[] xValues
    def ComputeY(self, xValues):
        sum = 0.0
        for i in range(numInput):
            sum += self.weights[i] * xValues[i]
        if sum > self.threshold:
            return 1
        else:
            return 0

In [31]:
import numpy as np

# int[][] data, seed, out int[][] trainData, out int[][] testData
def MakeTrainTest(data, pct, seed):
    totRows = data.shape[0] #compute number of rows in each result
    numTrainRows = int(totRows * pct)
    numTestRows = totRows - numTrainRows
    #trainData = new int[numTrainRows][]
    trainData = np.empty(data.shape)
    #testData = new int[numTestRows][]
    testData = np.empty(data.shape)
    copy = np.empty(data.shape)

    # int[][] copy = new int[data.Length][] #  make a copy of data
    for i in range(copy.shape[0]):
        # by reference to save space
        copy[i] = data[i]
    for i in range(copy.shape[0]):
        # scramble row order of copy
        r = randint(i, copy.shape[0] - 1)
        tmp = copy[r]
        copy[r] = copy[i]
        copy[i] = tmp
    for i in range(numTrainRows):
        # create training
        trainData[i] = copy[i]
    for i in range(numTestRows):
        # create test
        testData[i] = copy[i + numTrainRows]
        
    return trainData, testData

In [32]:
# int[][] matrix
def ShowMatrix(matrix, decimals, numRows, indices):
    frmt = '%.' + str(decimals) + 'f'
    for i in range(numRows):
        if (indices):
            print("[" + '%02d' % i + "]   ", end='')
        for j in range(len(matrix[i])):
            print(frmt % matrix[i][j] + " ", end='')
        print("")
    lastIndex = len(matrix) - 1
    if (indices):
        print("[" + '%02d' % lastIndex + "]   ", end='')
    for j in range(len(matrix[lastIndex])):
        print(frmt % matrix[lastIndex][j] + " ", end='')
    print("")

In [33]:
# double[] vector
def ShowVector(vector, decimals, valsPerRow, newLine):
    frmt = '%.' + str(decimals) + 'f'
    for i in range(len(vector)):
        if (i % valsPerRow == 0): print("", end='')
        print(frmt % vector[i] + " ", end='')
    if (newLine): print("")

In [34]:
import pandas as pd
data = pd.read_csv("data/SPECT.train")
data.head(10)

Unnamed: 0,1,0,0.1,0.2,1.1,0.3,0.4,0.5,1.2,1.3,...,1.4,1.5,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16
0,1,0,0,1,1,0,0,0,1,1,...,1,1,0,0,0,0,0,0,0,1
1,1,1,0,1,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1
3,1,0,0,0,0,0,0,0,1,0,...,1,0,1,1,0,0,0,0,0,0
4,1,0,0,0,1,0,0,0,0,1,...,1,1,0,1,0,0,0,1,0,1
5,1,1,0,1,1,0,0,0,1,0,...,1,0,0,0,0,0,0,0,1,1
6,1,0,0,1,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,1
7,1,0,0,1,0,0,0,1,1,0,...,1,0,1,0,0,0,0,0,1,1
8,1,0,1,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
9,1,1,1,0,0,1,0,1,0,0,...,1,0,0,1,1,1,1,1,0,1


In [35]:
print("First few lines of all data are:")
ShowMatrix(data, 0, 4, True)

First few lines of all data are:
[00]   

KeyError: 0

In [23]:
diagnosis = data['1']
diagnosis.head(10)

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: 1, dtype: int64

In [24]:
# drop first column from training data: It's our label
data2 = data.drop(labels='1', axis=1)
data2.head(10)

Unnamed: 0,0,0.1,0.2,1.1,0.3,0.4,0.5,1.2,1.3,0.6,...,1.4,1.5,0.9,0.10,0.11,0.12,0.13,0.14,0.15,0.16
0,0,0,1,1,0,0,0,1,1,0,...,1,1,0,0,0,0,0,0,0,1
1,1,0,1,0,1,0,0,1,0,1,...,1,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1
3,0,0,0,0,0,0,0,1,0,0,...,1,0,1,1,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,1,0,...,1,1,0,1,0,0,0,1,0,1
5,1,0,1,1,0,0,0,1,0,1,...,1,0,0,0,0,0,0,0,1,1
6,0,0,1,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,1
7,0,0,1,0,0,0,1,1,0,0,...,1,0,1,0,0,0,0,0,1,1
8,0,1,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9,1,1,0,0,1,0,1,0,0,1,...,1,0,0,1,1,1,1,1,0,1


In [25]:
print("Splitting data into 80% train and 20% test matrices")
trainData, testData = MakeTrainTest(data2.values, 0.8, 17)

Splitting data into 80% train and 20% test matrices


In [26]:
print("First few rows of testing data are:")
ShowMatrix(testData, 0, 3, True)

First few rows of testing data are:
[00]   1 0 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 1 1 0 0 
[01]   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
[02]   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
[78]   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 


In [28]:
print("First few rows of training data are:")
ShowMatrix(trainData, 0, 3, True)

First few rows of training data are:
[00]   0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 
[01]   0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 
[02]   0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 
[78]   1 0 1 0 1 0 1 1 0 1 0 1 1 0 0 0 1 0 0 1 1 0 


In [27]:
%%time

print("Begin training using Winnow algorithm")
numInput = 22
w = Winnow(numInput, 0) #rndSeed = 0
weights = w.TrainWeights(trainData)
print("Training complete")

Begin training using Winnow algorithm


IndexError: index 22 is out of bounds for axis 0 with size 22

In [29]:
print("Final model weights are:")
ShowVector(weights, 4, 8, True)

Final model weights are:


NameError: name 'weights' is not defined

In [None]:
trainAcc = w.Accuracy(trainData)
testAcc = w.Accuracy(testData)

print("Prediction accuracy on training data = " + str(trainAcc))
print("Prediction accuracy on test data = " + str(testAcc))

In [None]:
print("Predicting diagnosis of patient with all abnormal readings: ", end='')
yays = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
predicted = w.ComputeY(yays)
if predicted == 0:
    print("normal")
else:
    print("abnormal")

print("Predicting diagnosis of patient with all normal readings: ", end='')
nays = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
predicted2 = w.ComputeY(nays)
if predicted2 == 0:
    print("normal")
else:
    print("abnormal")