In [58]:
from math import sqrt, pow
import numpy as np

from labs.helpers import read_dataset

In [59]:
data_dir = "../../data/train"
classes = ["field", "water"]
file_pattern = "*{}*.jpg"
standard_shape = (50, 50, 3)

In [60]:
dataset, features = read_dataset(data_dir, classes, file_pattern, standard_shape)
train_df = dataset.sample(frac=0.8, random_state=18)
test_df = dataset.drop(train_df.index)

In [61]:
train_df.shape

(18, 7501)

In [62]:
test_df.shape

(4, 7501)

In [63]:
class Classifier:
    __classes_centers = None
    __features = None

    def fit(self, df, train_features, target):
        self.__features = train_features
        self.__classes_centers = df.groupby(by=target).mean()[features]

    def predict(self, df):

        def iterate_over_test(measure):
            distances = self.__classes_centers.apply(lambda x: self.__calc_evclid_distance(measure, x.values), axis=1).values
            potentials = list(map(self.__calc_potential, distances))
            class_index_number = np.argmax(potentials)
            return self.__classes_centers.index[class_index_number]
        defined_classes = df[self.__features].apply(iterate_over_test, axis=1).values
        return defined_classes

    @staticmethod
    def __calc_potential(R):
        return 1000000 / (1 + pow(R, 2))

    @staticmethod
    def __calc_evclid_distance(measure1, measure2):
        difference = measure1 - measure2
        squared_diff = np.square(difference)
        sum_of_squared_diff = np.sum(squared_diff)
        return sqrt(sum_of_squared_diff)


In [64]:
test_df.head()

Unnamed: 0,1:1:1,1:1:2,1:1:3,1:2:1,1:2:2,1:2:3,1:3:1,1:3:2,1:3:3,1:4:1,...,50:48:1,50:48:2,50:48:3,50:49:1,50:49:2,50:49:3,50:50:1,50:50:2,50:50:3,class
5,196.0,207.0,131.0,194.0,205.0,129.0,190.0,200.0,127.0,186.0,...,174.0,186.0,114.0,174.0,192.0,116.0,174.0,192.0,116.0,field
10,212.0,217.0,161.0,214.0,219.0,163.0,212.0,220.0,163.0,204.0,...,146.0,190.0,103.0,150.0,194.0,105.0,150.0,194.0,105.0,field
17,66.0,105.0,86.0,64.0,103.0,84.0,62.0,101.0,82.0,59.0,...,153.0,173.0,88.0,120.0,143.0,99.0,87.0,110.0,66.0,water
19,55.0,87.0,82.0,54.0,86.0,81.0,53.0,85.0,80.0,53.0,...,178.0,196.0,114.0,173.0,192.0,113.0,158.0,177.0,98.0,water


In [65]:
model = Classifier()
model.fit(train_df, features, "class")
model.predict(test_df)

array(['field', 'field', 'water', 'water'], dtype=object)