In [1]:
import numpy as np
import warnings

In [2]:
# Function to adjust weights
def adjust_weights(weights, att, tar, hyp, eta):
    """
    Returns adjusted weight(s) based on the learning rate, corresponding
    attribute value(s), and the difference between the known and hypothesized
    class.
    
    Inputs:
        weights: weight(s) to be adjusted
        att: corresponding attribute(s)
        tar: known class (from data; i.e., the 'target')
        hyp: hypothesized class (from classifier)
        eta: learning rate
    Outputs:
        aw: adjusted weight(s)
    """
    return weights + (eta * (tar - hyp) * att)

In [3]:
def run_model(weights, examples):
    """
    Run linear classifier that returns 1 if the weighted sum is greater than
    zero or 0 otherwise.
    
    Inputs:
        weights: array of weight(s) making up the linear classifier
        examples: 2D array of examples to classify, one example per row
    """
    ws = np.sum(weights * examples, axis=1)
    h = ws > 0
    return h.astype(int)

In [4]:
def accuracy(weights, examples):
    """
    Return the fraction of examples whose class was correctly identified.
    
    Inputs:
        weights: array of weight(s) making up the linear classifier
        examples: 2D array of examples to classify, one example per row
    """
    h = run_model(weights=weights, examples=examples[:,0:-1])
    num_correct = np.sum(examples[:,-1] == h)
    return num_correct / len(examples)

In [33]:
# Load data
data = np.loadtxt(fname='banknote.csv', delimiter=',')

# Normalize
MAX = data.max(axis=0)
MIN = data.min(axis=0)
norm = (data - MIN) / (MAX - MIN)

# Append artificial "zeroth" attribute x0=1
# (i.e., "bias", used for weight-training)
norm = np.append(arr=np.ones([len(norm),1]), values=norm, axis=1)

# Shuffle examples
np.random.shuffle(norm)

In [6]:
# Training fraction 
train = 0.75

# Training/testing subsets
trainInd = round(train * len(norm))
trainSS = norm[:trainInd]
testSS = norm[trainInd:]

In [7]:
# Learning Rate
eta = 0.3

# Training Threshold (stop when obtained)
threshold = 0.9

In [8]:
# Initialize weights
weights = np.random.random(5)
weights_archive = weights[:]

# Epoch counter
num_epoch = 0

# Record percent error as each training example is presented
pct_error = []

In [10]:
# Initial model
trainAcc = accuracy(weights=weights, examples=trainSS) * 100
pct_error.append(100 - np.round(trainAcc, 4))

# Train
while trainAcc < threshold*100:
    print(f'Epoch {num_epoch}: Percent Error {np.round(100-trainAcc, 4)}%')

    # Update epoch counter
    num_epoch += 1
    
    # Loop through each training example
    for ex in trainSS:
        # Attributes and class label
        attributes = ex[0:-1].reshape(1,-1)
        classLabel = ex[-1]

        # Model hypothesis
        hypothesis = run_model(weights=weights,
                               examples=attributes)

        # Adjust and record weights
        weights = adjust_weights(weights=weights,
                                 att=attributes,
                                 tar=classLabel,
                                 hyp=hypothesis,
                                 eta=eta)
        weights_archive = np.row_stack((weights_archive, weights))

    # Accuracy
    trainAcc = accuracy(weights=weights, examples=trainSS) * 100
    pct_error.append(100-np.round(trainAcc, 4))

print(f'Epoch {num_epoch}: Percent Error {np.round(100-trainAcc, 4)}%\n')
    
# Final status
testAcc = accuracy(weights=weights, examples=testSS)*100
print(f'Final accuracy on train data: {np.round(trainAcc, 2)}%')
print(f'Accuracy on test data: {np.round(testAcc, 2)}%')

Epoch 1: Percent Error 55.3936%
Epoch 2: Percent Error 8.1633%

Final accuracy on train data: 91.84%
Accuracy on test data: 90.67%


***

In [2]:
class Linear:
    """
    Embarrassingly simple linear classifier using perceptron learning.
    
    Initialization inputs:
        data: str, fileneame of CSV data to be trained on, organized with row-wise 
            examples and column-wise attributes. Known classes are expected to be in 
            the last column.
        train: float, 0 < train < 1, specifies the fraction of data to be used for
            training. Defaults to 0.75.
        threshold: float, 0 < threshold < 1, the accuracy threshold above which the 
            model is considered adequate and training stops. Defaults to 0.9.
        lr: float, learning rate used to adjust weights during training. Usually <1. 
            Defaults to 0.1
        seed: int, used to set random seed prior to weight initialization for 
            reproducibility. If not supplied, weights will be initialized 
            differently for every instantiation (default).
        verbose: Boolean controlling whether model performance status should be 
            printed during training.
    """
    def __init__(self, data, train=0.75, threshold=0.9, lr=0.1, seed=None,
                 verbose=True):
        self.dfile = data
        self.training = train
        self.threshold = threshold * 100
        self.eta = lr
        self.seed = seed
        self.verbose = verbose
    
        # Load data
        self.data = np.loadtxt(fname=self.dfile, delimiter=',')

        # Normalize
        MAX = self.data.max(axis=0)
        MIN = self.data.min(axis=0)
        self.norm = (self.data - MIN) / (MAX - MIN)
        
        # Append artificial "zeroth" bias attribute x0=1
        # (used for weight-training)
        self.norm = np.append(arr=np.ones([len(self.norm),1]),
                              values=self.norm, axis=1)
        
        # Initialize
        self.initialize(shuffle=True)
        
    def __str__(self):
        return 'Embarrassingly simple linear classifier using perceptron learning.'
    
    def __repr__(self):
        return f'Embarrassingly simple linear classifier trained on {self.dfile}'

    def initialize(self, shuffle=True):
        """
        Randomly initialize weights.
        
        Input:
            shuffle: Boolean whether normalized data should be shuffled first. If 
                True (default), the newly shuffled data are also subset according to 
                'train' argument passed at instance initialization or as set by
                set_train_subset method.
        """
        # Shuffle examples
        if shuffle:
            self.shuffled = self.norm[:]
            np.random.shuffle(self.shuffled)

            # Training/testing subsets
            trainInd = round(self.training * len(self.shuffled))
            self.trainSS = self.shuffled[:trainInd]
            self.testSS = self.shuffled[trainInd:]        
    
        # Initialize weights
        wgts_rng = np.random.default_rng(seed=self.seed)
        self.weights = wgts_rng.random(5)
        
    def reset(self, shuffle=True, seed=None):
        """
        Re-initialize model to random weights for retraining.
        
        Input:
            shuffle: Boolean whether normalized data should be shuffled first. If 
                True (default), the newly shuffled data are also subset according to 
                'train' argument passed at instance initialization or as set by
                set_train_subset method.
            seed: int, optionally set random seed prior to weight initialization. 
                Set this for reproducibility. If not supplied, weights will be 
                initialized differently every time.
        """
        if seed:
            if isinstance(seed, int):
                self.seed = seed
            elif isinstance(seed, str) and seed.lower() == 'none':
                self.seed = None
            else:
                raise ValueError("'seed' must be either an int or string "\
                                 "string 'None' to set to NoneValue")
            warnings.warn('Random seed has been set and may be different than what was used to initiate this Linear instance.')
        self.initialize(shuffle=shuffle)
    
    def adjust_weights(self, att, tar, hyp):
        """
        Adjust weight(s) based on the learning rate, attribute values, and the
        difference between the known and hypothesized classes.

        Inputs:
            att: example attribute(s)
            tar: known class (from data; i.e., the 'target')
            hyp: hypothesized class (from classifier)
        """
        self.weights = self.weights + (self.eta * (tar - hyp) * att)
    
    def set_train_subset(self, fraction):
        """
        Set the fraction of data to be used for training.
        
        Input:
            fraction: float, 0 < fraction < 1
        """
        self.training = fraction
    
    def set_threshold(self, threshold):
        """
        Set the accuracy threshold above which the model is considered trained.
        
        Input:
            threshold: float, 0 < threshold < 1
        """
        self.thresh = threshold * 100
    
    def set_lr(self, lr):
        """
        Set learning rate used for training.
        
        Input:
            lr: float, usually <1
        """
        self.eta = lr

    def set_verbose(self, verbose):
        """
        Set whether model performance status should be printed during training.
        
        Input:
            verbose: Boolean
        """
        self.verbose = verbose
    
    def run_model(self, weights, examples):
        """
        Run linear classifier that returns 1 if the weighted sum is greater than
        zero or 0 otherwise.

        Inputs:
            weights: array of weight(s) making up the linear classifier
            examples: 2D array of examples to classify, one example per row
        """
        ws = np.sum(weights * examples, axis=1)
        h = ws > 0
        return h.astype(int)

    def get_weights(self):
        """Return existing model weights (parameters)."""
        return self.weights
    
    def accuracy(self, weights, examples):
        """
        Return the fraction of examples whose class was correctly identified.

        Inputs:
            weights: array of weight(s) making up the linear classifier
            examples: 2D array of examples to classify, one example per row
        """
        h = self.run_model(weights=weights, examples=examples[:,0:-1])
        num_correct = np.sum(examples[:,-1] == h)
        return np.round(((num_correct / len(examples)) * 100), 3)

    def error(self, weights, examples):
        """
        Return the fraction of examples whose class was incorrectly identified.

        Inputs:
            weights: array of weight(s) making up the linear classifier
            examples: 2D array of examples to classify, one example per row
        """
        h = self.run_model(weights=weights, examples=examples[:,0:-1])
        num_incorrect = np.sum(examples[:,-1] != h)
        return np.round(((num_incorrect / len(examples)) * 100), 3)

    def test(self, traindata=False):
        """
        Test the current model. Returns tuple (accuracy, error) as percentages of
        examples classified correctly and incorrectly, respectively.
        
        Input:
            traindata: Boolean indicating whether accuracy and error should be
                calculated on training subset. Defaults to False (testing subset)
        """
        ds = self.trainSS if traindata else self.testSS
        acc = self.accuracy(weights=self.weights, examples=ds)
        err = self.error(weights=self.weights, examples=ds)
        return (acc, err)
    
    def train(self):
        """Train the model"""
        self.num_epoch = 0
        
        # Test initial model
        self.trainAcc, self.trainErr = self.test(traindata=True)
        
        # Train
        while self.trainAcc < self.threshold:
            if self.verbose:
                print(f'Epoch {self.num_epoch}: Percent Error {self.trainErr}%')

            # Update epoch counter
            self.num_epoch += 1

            # Loop through each training example
            for ex in self.trainSS:
                attributes = ex[0:-1].reshape(1,-1)
                classLabel = ex[-1]
                hypothesis = self.run_model(weights=self.weights, 
                                            examples=attributes)
                self.adjust_weights(att=attributes, tar=classLabel, hyp=hypothesis)
                
            # Test current model
            self.trainAcc, self.trainErr = self.test(traindata=True)

        if self.verbose:
            print(f'Epoch {self.num_epoch}: Percent Error {self.trainErr}%\n')

        # Final status
        self.testAcc, self.testErr = self.test(traindata=False)
        print(f'Final accuracy on training data: {self.trainAcc}%')
        print(f'Accuracy on testing data: {self.testAcc}%')

In [3]:
model = Linear(data='banknote.csv', train=0.75, threshold=0.9, lr=0.01,
               verbose=True, seed=1)
model.get_weights()

array([0.51182162, 0.9504637 , 0.14415961, 0.94864945, 0.31183145])

In [4]:
model.train()
model.get_weights()

Epoch 0: Percent Error 55.102%
Epoch 1: Percent Error 48.785%
Epoch 2: Percent Error 17.298%
Epoch 3: Percent Error 12.245%
Epoch 4: Percent Error 1.749%

Final accuracy on training data: 98.251%
Accuracy on testing data: 97.376%


array([[ 0.13182162, -0.107347  , -0.10065039, -0.09462214,  0.00733456]])

In [5]:
model.reset(shuffle=False)
model.get_weights()

array([0.51182162, 0.9504637 , 0.14415961, 0.94864945, 0.31183145])

In [6]:
model.train()
model.get_weights()

Epoch 0: Percent Error 55.102%
Epoch 1: Percent Error 48.785%
Epoch 2: Percent Error 17.298%
Epoch 3: Percent Error 12.245%
Epoch 4: Percent Error 1.749%

Final accuracy on training data: 98.251%
Accuracy on testing data: 97.376%


array([[ 0.13182162, -0.107347  , -0.10065039, -0.09462214,  0.00733456]])

In [7]:
model.reset(shuffle=True)
model.get_weights()

array([0.51182162, 0.9504637 , 0.14415961, 0.94864945, 0.31183145])

In [8]:
model.train()
model.get_weights()

Epoch 0: Percent Error 55.588%
Epoch 1: Percent Error 45.773%
Epoch 2: Percent Error 18.562%
Epoch 3: Percent Error 3.207%

Final accuracy on training data: 96.793%
Accuracy on testing data: 97.668%


array([[ 0.13182162, -0.1113166 , -0.084855  , -0.08318149, -0.00177857]])

In [9]:
model.reset(shuffle=False, seed='None')
model.get_weights()



array([0.80901901, 0.77788219, 0.57288856, 0.59766846, 0.91894066])

In [10]:
model.train()

Epoch 0: Percent Error 55.588%
Epoch 1: Percent Error 44.218%
Epoch 2: Percent Error 16.618%
Epoch 3: Percent Error 5.053%

Final accuracy on training data: 94.947%
Accuracy on testing data: 93.294%


In [11]:
model.seed

In [12]:
model.reset(shuffle=False)
model.get_weights()

array([0.13586051, 0.48824849, 0.8291498 , 0.91344898, 0.02719304])