In [1]:
from scipy.special import expit
import numpy as np
from sklearn.metrics import accuracy_score
from concurrent.futures import ThreadPoolExecutor

from labs.helpers import read_dataset

In [2]:
data_dir = "../../data/train"
classes = ["field", "water"]
file_pattern = "*{}*.jpg"
standard_shape = (50, 50, 3)

In [3]:
dataset, features = read_dataset(data_dir, classes, file_pattern, standard_shape)
train_df = dataset.sample(frac=0.8, random_state=18)
test_df = dataset.drop(train_df.index)

In [4]:
train_df.shape

(18, 7501)

In [5]:
test_df.shape

(4, 7501)

In [6]:
class LinearPerceptron:

    __features = None
    __class_to_number = None
    __number_to_class = None
    __weights = None
    __old_weights_delta = None

    def __init__(self, learning_rate=1, moment=0.8, max_epoch=10000):
        self.__learning_rate = learning_rate
        self.__moment = moment
        self.__max_epoch = max_epoch
        self.__activation = expit

    def fit(self, df, train_features, target):
        self.__features = train_features
        self.__build_classes_binarizers(df, target)
        train_values = self.__build_values(df)
        binarized_classes = df[target].apply(lambda class_: self.__class_to_number[class_]).values.astype("float32")
        self.__old_weights_delta = np.zeros(train_values.shape[1])
        self.__weights = np.full(train_values.shape[1], 0.001)
        # inside for operator
        for epoch_number in range(self.__max_epoch):
            cum_error = 0
            for measure_index in range(train_values.shape[0]):
                measure = train_values[measure_index]
                real_value = binarized_classes[measure_index]
                cum_error += self.__learn(measure, real_value)
                if np.mean(cum_error) < 0.000000001:
                    print("End of learn")
                    return

    def __build_values(self, df):
        measures = df[self.__features].values
        bias_neuron_vals = np.ones((measures.shape[0], 1))
        values = np.hstack((bias_neuron_vals, measures))
        return values

    def __learn(self, measure, real_value):
        neuron_output = self.__go_forward(measure)
        self.__go_backward(measure, neuron_output, real_value)
        return self.__go_forward(measure)

    def __go_forward(self, measure):
        multiplied = np.multiply(measure, self.__weights)
        neuron_input = np.sum(multiplied)
        neuron_output = self.__activation(neuron_input)
        return neuron_output

    def __go_backward(self, measure, neuron_output, real_value):
        out_neuron_error = (real_value - neuron_output) * self.__activation_derivative(neuron_output)
        neuron_activation_derivation = np.multiply(1 - measure, measure)
        error_by_weights = out_neuron_error * self.__weights
        in_neurons_errors = np.multiply(neuron_activation_derivation, error_by_weights)
        gradients = np.multiply(in_neurons_errors, measure)
        new_weights_delta = self.__learning_rate * gradients + self.__old_weights_delta * self.__moment
        self.__weights += new_weights_delta
        self.__old_weights_delta = new_weights_delta

    def __build_classes_binarizers(self, df, target):
        self.__class_to_number = dict()
        self.__number_to_class = dict()
        classes_list = df[target].unique()
        for k, v in enumerate(classes_list[:2]):
            self.__class_to_number[v] = k
            self.__number_to_class[k] = v

    @staticmethod
    def __activation_derivative(neuron_out):
        return (1 - neuron_out) * neuron_out

    def predict(self, df):

        def iterate_over_measures(measure):
            result = self.__go_forward(measure)
            return self.__number_to_class[int(round(result))]

        test_values = self.__build_values(df)
        return np.apply_along_axis(iterate_over_measures, 1, test_values)






In [7]:
model = LinearPerceptron()
model.fit(train_df, features[:100], "class")

In [8]:
test_df.head()

Unnamed: 0,1:1:1,1:1:2,1:1:3,1:2:1,1:2:2,1:2:3,1:3:1,1:3:2,1:3:3,1:4:1,...,50:48:1,50:48:2,50:48:3,50:49:1,50:49:2,50:49:3,50:50:1,50:50:2,50:50:3,class
5,196.0,207.0,131.0,194.0,205.0,129.0,190.0,200.0,127.0,186.0,...,174.0,186.0,114.0,174.0,192.0,116.0,174.0,192.0,116.0,field
10,212.0,217.0,161.0,214.0,219.0,163.0,212.0,220.0,163.0,204.0,...,146.0,190.0,103.0,150.0,194.0,105.0,150.0,194.0,105.0,field
17,66.0,105.0,86.0,64.0,103.0,84.0,62.0,101.0,82.0,59.0,...,153.0,173.0,88.0,120.0,143.0,99.0,87.0,110.0,66.0,water
19,55.0,87.0,82.0,54.0,86.0,81.0,53.0,85.0,80.0,53.0,...,178.0,196.0,114.0,173.0,192.0,113.0,158.0,177.0,98.0,water


In [9]:
real_classes = test_df["class"].values
predicted = model.predict(test_df)

print(f"Accuracy is {accuracy_score(real_classes, predicted)}")

Accuracy is 0.5


In [10]:
def calc_accuracy(local_features_to_use):
    print(f"Thread for {len(local_features_to_use)} features works")
    local_model = LinearPerceptron()
    local_model.fit(train_df, local_features_to_use, "class")
    local_predicted_classes = local_model.predict(test_df)
    local_acc = round(accuracy_score(real_classes, local_predicted_classes), 3)
    return len(local_features_to_use), local_acc



In [11]:
%%time
features_variants = (features[:i] for i in range(7500, 100, -100))

with ThreadPoolExecutor(5) as executor:
    results = executor.map(calc_accuracy, features_variants)
for result in results:
    f_n, l_c = result
    print(f"{l_c * 100}% accuracy score with {f_n} features")



Thread for 7500 features works
Thread for 7400 features works
Thread for 7300 features worksThread for 7200 features works
Thread for 7100 features works

Thread for 7000 features works
Thread for 6900 features works
Thread for 6800 features works
Thread for 6700 features works
Thread for 6600 features works
Thread for 6500 features works
Thread for 6400 features works
Thread for 6300 features works
Thread for 6200 features works
Thread for 6100 features works
Thread for 6000 features works
Thread for 5900 features works
Thread for 5800 features works
Thread for 5700 features works
Thread for 5600 features works
Thread for 5500 features works
Thread for 5400 features works
Thread for 5300 features works
Thread for 5200 features works
Thread for 5100 features works
Thread for 5000 features works
Thread for 4900 features works
Thread for 4800 features works
Thread for 4700 features works
Thread for 4600 features works
Thread for 4500 features works
Thread for 4400 features works
Thread f