In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.datasets import load_basic_motions
from sktime.datatypes._panel._convert import from_3d_numpy_to_nested

In [None]:
def load_data(data_path):
    """
    Based on method given with the assignment.
    Load the data in a 3D-array.
    """

    FEATURES = range(2, 33)
    N_TIME_SERIES = 3500

    # Create the training and testing samples
    LS_path = os.path.join(data_path, 'LS')
    TS_path = os.path.join(data_path, 'TS')
    X_train = np.zeros((N_TIME_SERIES, len(FEATURES), 512))
    X_test = np.zeros((N_TIME_SERIES, len(FEATURES), 512))

    for f in FEATURES:
        print("Loading feature {}...".format(f))
        data = np.loadtxt(os.path.join(LS_path, 'LS_sensor_{}.txt'.format(f)))
        for i in range(N_TIME_SERIES):
            X_train[i][f-2][:] = data[i]
        data = np.loadtxt(os.path.join(TS_path, 'TS_sensor_{}.txt'.format(f)))
        for i in range(N_TIME_SERIES):
            X_test[i][f-2][:] = data[i]

    y_train = np.loadtxt(os.path.join(LS_path, 'activity_Id.txt'))

    print('X_train size: {}.'.format(X_train.shape))
    print('y_train size: {}.'.format(y_train.shape))
    print('X_test size: {}.'.format(X_test.shape))
    
    X_train = from_3d_numpy_to_nested(X_train)
    X_test = from_3d_numpy_to_nested(X_test)

    return X_train, y_train, X_test

In [None]:
def write_submission(y, where, submission_name='toy_submission.csv'):
    """
    Method given with the assignment.
    """
    os.makedirs(where, exist_ok=True)

    SUBMISSION_PATH = os.path.join(where, submission_name)
    if os.path.exists(SUBMISSION_PATH):
        os.remove(SUBMISSION_PATH)

    y = y.astype(int)
    outputs = np.unique(y)

    # Verify conditions on the predictions
    if np.max(outputs) > 14:
        raise ValueError('Class {} does not exist.'.format(np.max(outputs)))
    if np.min(outputs) < 1:
        raise ValueError('Class {} does not exist.'.format(np.min(outputs)))
    
    # Write submission file
    with open(SUBMISSION_PATH, 'a') as file:
        n_samples = len(y)
        if n_samples != 3500:
            raise ValueError('Check the number of predicted values.')

        file.write('Id,Prediction\n')

        for n, i in enumerate(y):
            file.write('{},{}\n'.format(n+1, int(i)))

    print('Submission {} saved in {}.'.format(submission_name, SUBMISSION_PATH))

In [None]:
X_train, y_train, X_test = load_data('data')

In [None]:
clf = KNeighborsTimeSeriesClassifier(n_neighbors=1)
clf.fit(X_train, y_train)

In [None]:
predictions = clf.predict(X_test)

In [None]:
write_submission(y_test, 'submissions', submission_name='knn_time_series_1.csv')