In [1]:
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [2]:
class KnnSplitted:
    """
    Classifier that uses one KNN per feature.
    """

    def __init__(self, n_neighbors):
        """
        Argument:
        ---------
        - `n_neighbors`: number of neighbors used in the KNN models.
        """
        self.n_neighbors = n_neighbors
    
    def load_data(self, data_path):
        """
        Load the data for the classifer.
        Modified from the method given with the assignment.
        """

        FEATURES = range(2, 33)
        N_TIME_SERIES = 3500

        # Create the training and testing samples
        LS_path = os.path.join(data_path, 'LS')
        TS_path = os.path.join(data_path, 'TS')
        X_train = [np.zeros((N_TIME_SERIES, 512)) for i in range(2, 33)]
        X_test = [np.zeros((N_TIME_SERIES, 512)) for i in range(2, 33)]

        for f in FEATURES:
            data = np.loadtxt(os.path.join(LS_path, 'LS_sensor_{}.txt'.format(f)))
            X_train[f-2] = data
            data = np.loadtxt(os.path.join(TS_path, 'TS_sensor_{}.txt'.format(f)))
            X_test[f-2] = data

        y_train = np.loadtxt(os.path.join(LS_path, 'activity_Id.txt'))

        print('X_train len: {}.'.format(len(X_train)))
        print('y_train len: {}.'.format(len(y_train)))
        print('X_test len: {}.'.format(len(X_test)))

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
    
    def fit(self):
        """
        Fit the classifier.
        """

        self.models = []

        for i in range(2, 33):
            model = KNeighborsClassifier(n_neighbors=self.n_neighbors)
            model.fit(self.X_train[i-2], self.y_train)
            self.models.append(model)

    def predict(self):
        """
        Predict the class labels.
        """

        predictions = np.zeros((31, 3500), dtype=int)

        for i in range(2, 33):
            pred = np.zeros(3500)
            pred = self.models[i-2].predict(self.X_test[i-2])
            predictions[i-2] = pred

        predictedClasses = np.zeros(3500, dtype=int)
        for i in range (3500):
            predictedClasses[i] = np.argmax(np.bincount(predictions[:, i]))

        return predictedClasses

In [3]:
clf = KnnSplitted(n_neighbors=1)
clf.load_data('data')

X_train len: 31.
y_train len: 3500.
X_test len: 31.


In [4]:
clf.fit()

In [5]:
predictions = np.zeros(3500)
predictions = clf.predict()
print(predictions)

[ 4 11  6 ...  5  1  5]
