# Loading The Dataset

In [32]:
data = pd.read_csv('your_dataset.csv')

# Preparing data:
1. Seperating features and labels
2. Normalizing the dataset

In [33]:
def DataNormalize(inputData):
    mean = np.mean(inputData, axis = 0)
    std = np.std(inputData, axis = 0)
    normalizedData = (inputData - mean) / std
    
    return normalizedData

In [34]:
def DataShuffle(dataSize):
    indices = np.arange(dataSize)
    np.random.shuffle(indices)
    
    return indices

In [35]:
def DataSplit(inputData, outputData):
    # Define the split ratio
    trainRatio = 0.8
    trainSize = int(len(inputData) * trainRatio)
    
    # Split the data
    trainData = inputData[:trainSize]
    trainLabel = outputData[:trainSize]
    testData = inputData[trainSize:]
    testLabel = outputData[trainSize:]
    
    return trainData, trainLabel, testData, testLabel


In [36]:
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

X_normalized = DataNormalize(X)

# Shuffle the data
indices = DataShuffle(X.shape[0])
X_normalized = X_normalized[indices]
Y = Y[indices]
    
    
# Split the data
trainData, trainLabel, testData, testLabel = DataSplit(X_normalized, Y)

# Support Vector Machines Implementation

In [37]:
class PegasosSVM:
    def __init__(self, lambda_param = 0.01, max_iter = 1000):
        self.lambda_param = lambda_param
        self.max_iter = max_iter
        self.weights = None
        
    def _hinge_loss(self, X, y, w, lambda_param):
        hinge_loss = np.maximum(0, 1 - y * (X @ w)).mean()
        return 0.5 * lambda_param * np.dot(w, w) + hinge_loss
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        
        for t in range(1, self.max_iter + 1):
            eta = 1 / (self.lambda_param * t)
            i = np.random.randint(n_samples)
            xi, yi = X[i], y[i]
            
            if yi * np.dot(xi, self.weights) < 1:
                self.weights = (1 - eta * self.lambda_param) * self.weights + eta * yi * xi
            else:
                self.weights = (1 - eta * self.lambda_param) * self.weights
            
            if t % 100 == 0:
                loss = self._hinge_loss(X, y, self.weights, self.lambda_param)
                #print(f"Iteration {t}/{self.max_iter}: Loss = {loss:.4f}")
                
    def predict(self, X):
        return np.sign(X @ self.weights)

# Cross Validation Implementation

In [38]:
# Grid Search using Cross Validateion
def crossValScore(X, y, params, k = 5):
    fold_size = len(X) // k
    accuracies = []
    
    for i in range(k):
        X_test = X[i * fold_size:(i + 1) * fold_size]
        y_test = y[i * fold_size:(i + 1) * fold_size]
        X_train = np.concatenate((X[:i * fold_size], X[(i + 1) * fold_size:]), axis=0)
        y_train = np.concatenate((y[:i * fold_size], y[(i + 1) * fold_size:]), axis=0)
        
        model = PegasosSVM(lambda_param = params['lambda'], max_iter = params['iters'])
        
        model.fit(X_train, y_train)
        
        predictions = model.predict(X_test)
        accuracy = zeroOneLoss(predictions, y_test)
        accuracies.append(accuracy)
        
    return np.mean(accuracies)

In [39]:
def zeroOneLoss(trueLabels, predLabels):
    return np.sum(trueLabels != predLabels) / len(trueLabels)

# tuning Hyperparameters

In [43]:
# Tuning Hyperparameters
lambda_list = [0.001, 0.01, 0.1]
n_iters_list = [1000, 1500, 2000]

best_loss = 2
best_params = {}
for lambdas in lambda_list:
    for n_iters in n_iters_list:
        params = {'lambda': lambdas,'iters': n_iters}
        mean_loss = crossValScore(trainData, trainLabel,params, k = 5)
        print(f"lambda: {lambdas}, n_iters: {n_iters}, Cross Validation Loss:{mean_loss:.4f}")

        if mean_loss < best_loss:
            best_loss = mean_loss
            best_params = params
            
print("\nBest Hyperparameters:")
print(f"lambda: {best_params['lambda']}")
print(f"n_iters: {best_params['iters']}")
print(f"Best Cross-Validation loss: {best_loss:.4f}")

lambda: 0.001, n_iters: 1000, Cross Validation Loss:0.4026
lambda: 0.001, n_iters: 1500, Cross Validation Loss:0.3862
lambda: 0.001, n_iters: 2000, Cross Validation Loss:0.3580
lambda: 0.01, n_iters: 1000, Cross Validation Loss:0.3134
lambda: 0.01, n_iters: 1500, Cross Validation Loss:0.3256
lambda: 0.01, n_iters: 2000, Cross Validation Loss:0.3031
lambda: 0.1, n_iters: 1000, Cross Validation Loss:0.2854
lambda: 0.1, n_iters: 1500, Cross Validation Loss:0.2861
lambda: 0.1, n_iters: 2000, Cross Validation Loss:0.2812

Best Hyperparameters:
lambda: 0.1
n_iters: 2000
Best Cross-Validation loss: 0.2812


In [44]:
numberOfRuns = 10
losses = []

for i in range(numberOfRuns):
    # Shuffle the data
    indices = DataShuffle(trainData.shape[0])
    trainData = trainData[indices]
    trainLabel = trainLabel[indices]

    # Initialize the SVM Model
    model = PegasosSVM(lambda_param = best_params['lambda'], max_iter = best_params['iters'])

    # Train the Perceptron
    model.fit(trainData, trainLabel)

    # Predict on the test set
    predLabels = model.predict(testData)

    # Calculate zero-one loss
    loss = zeroOneLoss(testLabel, predLabels)
    print(f"Iteration {i + 1} - Zero-One Loss: {loss}")
    losses.append(loss)

averageLoss = np.mean(losses)
print(f"Average Zero-One Loss: {averageLoss:.4f}")
    

Iteration 1 - Zero-One Loss: 0.275
Iteration 2 - Zero-One Loss: 0.2975
Iteration 3 - Zero-One Loss: 0.2845
Iteration 4 - Zero-One Loss: 0.2895
Iteration 5 - Zero-One Loss: 0.29
Iteration 6 - Zero-One Loss: 0.2725
Iteration 7 - Zero-One Loss: 0.2735
Iteration 8 - Zero-One Loss: 0.28
Iteration 9 - Zero-One Loss: 0.2885
Iteration 10 - Zero-One Loss: 0.278
Average Zero-One Loss: 0.2829
