In [2]:
import pandas as pd
import numpy as np
from joblib import Parallel, delayed

# Loading The Dataset

In [6]:
data = pd.read_csv('your_dataset.csv')

# Preparing data:
1. Seperating features and labels
2. Normalizing the dataset

In [9]:
def DataNormalize(inputData):
    mean = np.mean(inputData, axis = 0)
    std = np.std(inputData, axis = 0)
    normalizedData = (inputData - mean) / std
    
    return normalizedData

In [11]:
def DataShuffle(dataSize):
    indices = np.arange(dataSize)
    np.random.shuffle(indices)
    
    return indices

In [13]:
def DataSplit(inputData, outputData):
    # Define the split ratio
    trainRatio = 0.8
    trainSize = int(len(inputData) * trainRatio)
    
    # Split the data
    trainData = inputData[:trainSize]
    trainLabel = outputData[:trainSize]
    testData = inputData[trainSize:]
    testLabel = outputData[trainSize:]
    
    return trainData, trainLabel, testData, testLabel


In [15]:
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

X_normalized = DataNormalize(X)

# Shuffle the data
indices = DataShuffle(X.shape[0])
X_normalized = X_normalized[indices]
Y = Y[indices]
    
    
# Split the data
trainData, trainLabel, testData, testLabel = DataSplit(X_normalized, Y)

# Perceptron Implementation 
### "Improving the performance using Multiprocessing"

In [18]:
class Perceptron:
    def __init__(self, n_iters = 1000):
        self.n_iters = n_iters
        self.weights = None
        
    def fit(self, inputData, labelData):
        numSamples, numFeatures = inputData.shape
        self.weights = np.zeros(numFeatures)

        for _ in range(self.n_iters):
            linearOutput = np.dot(inputData, self.weights) * labelData
            misclassified = (labelData * linearOutput) <= 0

            if not np.any(misclassified):
                break

            # Update the weights by summing over all misclassified examples
            self.weights += np.dot(labelData[misclassified], inputData[misclassified])
                
    def predict(self, inputData):
        linearOutput = np.dot(inputData, self.weights)
        pred = self._activation_function(linearOutput)
        return pred
    
    def _activation_function(self, inputData):
        return np.where(inputData >= 0, 1, -1)

In [20]:
def zeroOneLoss(trueLabels, predLabels):
    return np.sum(trueLabels != predLabels) / len(trueLabels)

# Cross Validation Implementation

In [23]:
# Grid Search using Cross Validateion
def crossValScore(X, y, iters, k = 5):
    fold_size = len(X) // k
    accuracies = []
    
    for i in range(k):
        X_test = X[i * fold_size:(i + 1) * fold_size]
        y_test = y[i * fold_size:(i + 1) * fold_size]
        X_train = np.concatenate((X[:i * fold_size], X[(i + 1) * fold_size:]), axis=0)
        y_train = np.concatenate((y[:i * fold_size], y[(i + 1) * fold_size:]), axis=0)
        
        perceptron = Perceptron(iters)
        
        perceptron.fit(X_train, y_train)
        
        predictions = perceptron.predict(X_test)
        accuracy = zeroOneLoss(predictions, y_test)
        accuracies.append(accuracy)
        
    return np.mean(accuracies)

# Tuning Hyperparameters

In [27]:
# Tuning Hyperparameters
n_iters_list = [100, 1000, 1500, 2000, 2500]

best_loss = 2
best_params = {}

for n_iters in n_iters_list:
    params = {'n_iters': n_iters}
    mean_loss = crossValScore(trainData, trainLabel,params['n_iters'], k = 5)
    print(f"n_iters: {n_iters}, Cross Validation Loss:{mean_loss}")
        
    if mean_loss < best_loss:
        best_loss = mean_loss
        best_params = params
            
print("\nBest Hyperparameters:")
print(f"n_iters: {best_params['n_iters']}")
print(f"Best Cross-Validation loss: {best_loss}")

n_iters: 100, Cross Validation Loss:0.312
n_iters: 1000, Cross Validation Loss:0.31275
n_iters: 1500, Cross Validation Loss:0.312625
n_iters: 2000, Cross Validation Loss:0.312625
n_iters: 2500, Cross Validation Loss:0.312625

Best Hyperparameters:
n_iters: 100
Best Cross-Validation loss: 0.312


# Running the model using the tuned hyperparameters

In [30]:
# Initialize the Perceptron
perceptron = Perceptron(n_iters = best_params['n_iters'])

# Train the Perceptron
perceptron.fit(trainData, trainLabel)

# Predict on the test set
predLabels = perceptron.predict(testData)

# Calculate zero-one loss
loss = zeroOneLoss(testLabel, predLabels)

print(f"Zero-One Loss: {loss}")
    

Zero-One Loss: 0.299
