In [1]:
import numpy as np
from data import Data
from GD import (g,
                l2_g,
                init_w,
                predict)

In [2]:
data_loader = Data("Rice_Cammeo_Osmancik.arff")
X = data_loader.get_X()
y = data_loader.get_y()
X_scaled = data_loader.normalize_data(X)
X_with_bias = np.hstack((np.ones((X_scaled.shape[0], 1)), X_scaled))

In [3]:
lambda_values = [0, 0.0001, 0.001, 0.01, 0.1, 1, 10]

n_folds = 5

X_folds = np.array_split(X_with_bias, n_folds)
y_folds = np.array_split(y, n_folds)

In [4]:
best_lambda = None
best_accuracy = 0

learning_rate = 0.01
epochs = 10000

for _lambda in lambda_values:
    accuracies = []
    
    for i in range(n_folds):
        X_train = np.concatenate([X_folds[j] for j in range(n_folds) if j != i])
        y_train = np.concatenate([y_folds[j] for j in range(n_folds) if j != i])
        X_test = X_folds[i]
        y_test = y_folds[i]
        
        n_features = X_train.shape[1]
        w = init_w(n_features)
        
        for epoch in range(epochs):
            gradient = g(X_train, y_train, w) + l2_g(w, _lambda)
            w -= learning_rate * gradient
            
        predictions = predict(X_test, w)
        accuracy = np.mean(predictions == y_test)
        accuracies.append(accuracy)
    
    avg_accuracy = np.mean(accuracies)
    print(f"_lambda: {_lambda}, Average Accuracy: {avg_accuracy * 100:.2f}%")
    if avg_accuracy > best_accuracy:
        best_accuracy = avg_accuracy
        best_lambda = _lambda

print(f"Best _lambda: {best_lambda} with Average Accuracy: {best_accuracy * 100:.2f}%")

_lambda: 0, Average Accuracy: 89.87%
_lambda: 0.0001, Average Accuracy: 89.87%
_lambda: 0.001, Average Accuracy: 89.95%
_lambda: 0.01, Average Accuracy: 90.13%
_lambda: 0.1, Average Accuracy: 90.16%
_lambda: 1, Average Accuracy: 89.87%
_lambda: 10, Average Accuracy: 89.50%
Best _lambda: 0.1 with Average Accuracy: 90.16%
