In [1]:
import csv
import math
import random
import numpy as np
from functools import partial
from operator import itemgetter

In [2]:
iris_file_path = "data/iris.data"

iris_classes = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]

In [3]:
def read_class_data(file_path, iris_class):
    matrix = list()
    with open(file_path, newline='') as file:
        reader = csv.reader(file, delimiter=',', quotechar='|')
        for row in reader:
            if row[4] == iris_class:
                matrix_row = [1] + [float(x) for x in row[:-1]]
                matrix.append(matrix_row)
    return matrix

In [4]:
def get_random_sublist_indeces(full_list, sublist_size):
    
    full_list_size = len(full_list)
    assert full_list_size >= sublist_size
    
    indeces = range(0, full_list_size)
    sub_list_indeces = list()
    while(len(sub_list_indeces) != sublist_size):
        random_element_index = random.choice(indeces)
        if random_element_index not in sub_list_indeces:
            sub_list_indeces.append(random_element_index)
    
    return sub_list_indeces

def split_to_learn_and_test(sample, test_sample_size):
    
    test_sample_indeces = get_random_sublist_indeces(sample, test_sample_size)
    
    learn_sample = list()
    test_sample = list()
    for i in range(0, len(sample)):
        if i in test_sample_indeces:
            test_sample.append(sample[i])
        else:
            learn_sample.append(sample[i])
    return (learn_sample, test_sample)

In [5]:
# Reading the whole dataset
iris_setosa = read_class_data(iris_file_path, iris_classes[0])
iris_versicolor = read_class_data(iris_file_path, iris_classes[1])
iris_virginica = read_class_data(iris_file_path, iris_classes[2])

# Splitting the dataset into samples for learning and for testing
(iris_setosa_learn, iris_setosa_test) = split_to_learn_and_test(iris_setosa, len(iris_setosa) // 10)
(iris_versicolor_learn, iris_versicolor_test) = split_to_learn_and_test(iris_versicolor, len(iris_versicolor) // 10)
(iris_virginica_learn, iris_virginica_test) = split_to_learn_and_test(iris_virginica, len(iris_virginica) // 10)

In [6]:
def calculate_scalar_product(x, y):
    assert len(x) == len(y)
    
    product = 0
    
    for i in range(0, len(x)):
        product += x[i] * y[i]
    
    return product

In [7]:
def classify(x, w):
    value = calculate_scalar_product(x, w)
    point = 1 / (1 + math.exp(-value))
    
    if (point < 0.5):
        return -1
    else:
        return 1
    

def calculate_error(w, X, Y):
    assert len(X) == len(Y)
    assert len(X) > 0
    
    count = len(X)
    incorrect_count = 0
    
    for i in range(0, count):
        point = classify(X[i], w)
        if point != Y[i]:
            incorrect_count += 1
                
    return incorrect_count / count


def calculate_loss(w, X, Y, tau):
    assert len(X) == len(Y)
    assert len(X) > 0
    
    count = len(X)
    
    loss = 0
    for i in range(0, count):
        value = Y[i] * calculate_scalar_product(X[i], w)
        loss += math.log(1 + math.exp(-value))
    
    loss = loss / count + tau * calculate_scalar_product(w, w)
    
    return loss

In [8]:
def calculate_gradient_for_points(w, X, Y, tau):
    assert len(X) == len(Y)
    assert len(w) == len(X[0])
    
    grad =  [0 for x in range(0, len(w))]
    
    for i in range(0, len(X)):
        for j in range(0, len(grad)):
            exp_power = Y[i] * calculate_scalar_product(w, X[i])
            grad[j] += -Y[i]*X[i][j] / (1 + math.exp(exp_power))
            
    # normalize and regularize
    for i in range(0, len(grad)):
        grad[i] = grad[i] / len(X) + tau * 2 * w[i]
    return grad

def bgd_learn(tau, etta, T, X, Y):
    w = [0 for x in range(0, len(X[0]))]
    
    for t in range(0, T):
        grad = calculate_gradient_for_points(w, X, Y, tau)
        
        for i in range(0, len(w)):
            w[i] -= etta * grad[i]

    return w

In [9]:
def calculate_gradient_for_point(w, x, y, tau):
    assert len(w) == len(x)
    
    grad =  [0 for x in range(0, len(w))]
    
    exp_power = y * calculate_scalar_product(w, x)
    for i in range(0, len(grad)):
        grad[i] += -y*x[i] / (1 + math.exp(exp_power)) + tau * 2 * w[i]
    
    return grad

def sgd_learn(tau, etta, T, X, Y):
    w = [0 for x in range(0, len(X[0]))]
    
    random_indeces = range(0, len(X))
    for t in range(0, T):
        random_index = random.choice(random_indeces)
        
        x = X[random_index]
        y = Y[random_index]
        
        grad = calculate_gradient_for_point(w, x, y, tau)
        
        for i in range(0, len(w)):
            w[i] -= etta * grad[i]
            
    return w

In [10]:
def k_fold_learn(folds_count, target_class_sample, other_classes_sample, algorithm):
    target_validation_sample_size = len(target_class_sample) // folds_count
    other_validation_sample_size = len(other_classes_sample) // folds_count
    
    # if it possible
    target_class_sample = [x for x in target_class_sample]
    target_processed_sample = list()
    target_validation_sample = list()
    
    other_classes_sample = [x for x in other_classes_sample]
    other_processed_sample = list()
    other_validation_sample = list()
    
    average_error = 0
    for fold in range(0, folds_count):
        (target_class_sample, target_validation_sample) = split_to_learn_and_test(target_class_sample, target_validation_sample_size)
        (other_classes_sample, other_validation_sample) = split_to_learn_and_test(other_classes_sample, other_validation_sample_size)
        
        target_learn_sample = target_class_sample + target_processed_sample
        other_learn_sample = other_classes_sample + other_processed_sample
        
        learn_sample = target_learn_sample + other_learn_sample
        learn_markers = [1 if x < len(target_learn_sample) else -1 for x in range(0, len(learn_sample))]
        
        validation_sample = target_validation_sample + other_validation_sample
        validation_markers = [1 if x < len(target_validation_sample) else -1 for x in range(0, len(validation_sample))]
        
        # TODO: learn with sgd or bgd
        w = algorithm(learn_sample, learn_markers)
        
        average_error += calculate_error(w, validation_sample, validation_markers)
        
        target_processed_sample += target_validation_sample
        other_processed_sample += other_validation_sample
    
    average_error = average_error / folds_count
    
    return average_error


def k_fold_learn_new(folds_count, target_class_sample, other_classes_sample, tau_values, etta_values, T_values, algorithm):
    assert len(target_class_sample) % folds_count == 0
    assert len(other_classes_sample) % folds_count == 0
    
    target_validation_sample_size = len(target_class_sample) // folds_count
    other_validation_sample_size = len(other_classes_sample) // folds_count
    
    # preparing data for splitting to k parts
    target_class_sample = [x for x in target_class_sample]
    target_processed_sample = list()
    target_validation_sample = list()
    
    other_classes_sample = [x for x in other_classes_sample]
    other_processed_sample = list()
    other_validation_sample = list()
    
    fold_learn_samples = list()
    fold_learn_markers = list()
    fold_validation_samples = list()
    fold_validation_markers = list()
    
    # splitting samples to k folds
    for fold in range(0, folds_count):
        (target_class_sample, target_validation_sample) = split_to_learn_and_test(target_class_sample, target_validation_sample_size)
        (other_classes_sample, other_validation_sample) = split_to_learn_and_test(other_classes_sample, other_validation_sample_size)
        
        target_learn_sample = target_class_sample + target_processed_sample
        other_learn_sample = other_classes_sample + other_processed_sample
        
        learn_sample = target_learn_sample + other_learn_sample
        learn_markers = [1 if x < len(target_learn_sample) else -1 for x in range(0, len(learn_sample))]
        
        validation_sample = target_validation_sample + other_validation_sample
        validation_markers = [1 if x < len(target_validation_sample) else -1 for x in range(0, len(validation_sample))]
        
        fold_learn_samples.append(learn_sample)
        fold_learn_markers.append(learn_markers)
        fold_validation_samples.append(validation_sample)
        fold_validation_markers.append(validation_markers)
        
        target_processed_sample += target_validation_sample
        other_processed_sample += other_validation_sample
    
    folds_data = list(zip(fold_learn_samples, fold_learn_markers, fold_validation_samples, fold_validation_markers))

    assert len(folds_data) == folds_count
    
    # we have k folds, now start iterating through params
    k_fold_results = list()
    for tau in tau_values:
        for etta in etta_values:
            for T in T_values:
                average_loss = 0
                
                for learn_sample, learn_markers, validation_sample, validation_markers in folds_data:
                    w = algorithm(tau, etta, T, learn_sample, learn_markers)
                    loss = calculate_loss(w, validation_sample, validation_markers, tau)
                    
                    average_loss += loss
                
                average_loss = average_loss / folds_count
                k_fold_results.append((tau, etta, T, average_loss))
                # print("Calcualted weights for tau = ", tau,  " etta = ", etta, " T = ", T, " with average true risk ", average_error)
                
    # choosing the best params combination
    
    min_error = min(k_fold_results, key=itemgetter(3))
    
    return min_error

In [11]:
def calcualte_model(target_class_name, alg_name, target_learn, other_learn, target_test, other_test, algorithm):
    tau_values = [0.1, 0.2, 0.3, 0.4, 0.5]
    etta_value = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
    T_values = [20,50,100,200,300]
    (tau, etta, T, average_error) = k_fold_learn_new(9, target_learn, other_learn, tau_values, etta_value, T_values, algorithm)
    learn_set = target_learn + other_learn
    test_set = target_test + other_test
    Y_learn = [1 if x < len(target_learn) else -1 for x in range(0, len(learn_set))]
    Y_test = [1 if x < len(target_test) else -1 for x in range(0, len(test_set))]
    w = algorithm(tau, etta, T, learn_set, Y_learn)
    print("Best params for ", alg_name, " = ", (tau, etta, T))
    print("Best weights = ", w)
    
    test_error = calculate_error(w, test_set, Y_test)
    print("Classification error for ", target_class_name, " = ", test_error)

calcualte_model("Iris-Setonza vs Versicolor", "BGD", iris_setosa_learn, iris_versicolor_learn, iris_setosa_test, iris_versicolor_test, bgd_learn)
calcualte_model("Iris-Setonza vs Versicolor", "SGD", iris_setosa_learn, iris_versicolor_learn, iris_setosa_test, iris_versicolor_test, sgd_learn)
calcualte_model("Iris-Setonza vs Virginica", "BGD", iris_setosa_learn, iris_virginica_learn, iris_setosa_test, iris_virginica_test, bgd_learn)
calcualte_model("Iris-Setonza vs Virginica", "SGD", iris_setosa_learn, iris_virginica_learn, iris_setosa_test, iris_virginica_test, sgd_learn)
calcualte_model("Iris-Versicolor vs Virginica", "BGD", iris_versicolor_learn, iris_virginica_learn, iris_versicolor_test, iris_virginica_test, bgd_learn)
calcualte_model("Iris-Versicolor vs Virginica", "SGD", iris_versicolor_learn, iris_virginica_learn, iris_versicolor_test, iris_virginica_test, sgd_learn)
calcualte_model("Iris-Versicolor vs Setonza", "BGD", iris_versicolor_learn, iris_setosa_learn, iris_versicolor_test, iris_setosa_test, bgd_learn)
calcualte_model("Iris-Versicolor vs Setonza", "SGD", iris_versicolor_learn, iris_setosa_learn, iris_versicolor_test, iris_setosa_test, sgd_learn)
calcualte_model("Iris-Virginica vs Iris-Versicolor", "BGD", iris_virginica_learn, iris_versicolor_learn, iris_virginica_test, iris_versicolor_test, bgd_learn)
calcualte_model("Iris-Virginica vs Iris-Versicolor", "SGD", iris_virginica_learn, iris_versicolor_learn, iris_virginica_test, iris_versicolor_test, sgd_learn)
calcualte_model("Iris-Virginica vs Iris-Setonza", "BGD", iris_virginica_learn, iris_setosa_learn, iris_virginica_test, iris_setosa_test, bgd_learn)
calcualte_model("Iris-Virginica vs Iris-Setonza", "SGD", iris_virginica_learn, iris_setosa_learn, iris_virginica_test, iris_setosa_test, sgd_learn)

Best params for  BGD  =  (0.1, 0.05, 300)
Best weights =  [0.10145171572954542, 0.15077808242564367, 0.578433643974476, -0.891183227950756, -0.38260881831657784]
Classification error for  Iris-Setonza vs Versicolor  =  0.0
Best params for  SGD  =  (0.1, 0.02, 200)
Best weights =  [0.07827409498993394, 0.1113815152272994, 0.5342955931868008, -0.7986355188409722, -0.3435533564816997]
Classification error for  Iris-Setonza vs Versicolor  =  0.0
Best params for  BGD  =  (0.1, 0.05, 300)
Best weights =  [0.1259670168873629, 0.2300184162825966, 0.5183008343079073, -0.8200130809365441, -0.4168158545529107]
Classification error for  Iris-Setonza vs Virginica  =  0.0
Best params for  SGD  =  (0.1, 0.05, 200)
Best weights =  [0.1355288644935789, 0.2663778741450885, 0.548108692372022, -0.8402827962702926, -0.41861143490382735]
Classification error for  Iris-Setonza vs Virginica  =  0.0
Best params for  BGD  =  (0.1, 0.1, 200)
Best weights =  [0.1619290110667123, 0.33823807190510186, 0.23487903198