In [2]:
import pandas as pd
import numpy as np
import torch
import onlinehd
from time import time
import sklearn.preprocessing

In [3]:
with open('UCI HAR Dataset/features.txt') as f:
    features = [line.split()[1] for line in f.readlines()]
temp = []

for f in features:
    while f in temp:
        f = f + '_'
    temp.append(f)
features = temp

In [4]:
X_train = pd.read_csv('UCI HAR Dataset/train/X_train.txt', delim_whitespace=True, header=None, names=features)
Y_train = pd.read_csv('UCI HAR Dataset/train/Y_train.txt', squeeze = True, header=None)

In [5]:
X_test = pd.read_csv('UCI HAR Dataset/test/X_test.txt', delim_whitespace=True, header=None, names=features)
Y_test = pd.read_csv('UCI HAR Dataset/test/Y_test.txt', squeeze = True, header=None)

In [None]:
scaler = sklearn.preprocessing.Normalizer().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
X_train = torch.from_numpy(X_train.to_numpy()).float()
Y_train = torch.from_numpy(Y_train.to_numpy()).long() - 1
X_test = torch.from_numpy(X_test.to_numpy()).float()
Y_test = torch.from_numpy(Y_test.to_numpy()).long() - 1

In [7]:
classes = Y_train.unique().size(0)
features = X_train.size(1)

In [8]:
print('Training Dataset Shape =', tuple(X_train.shape))
print('Testing Dataset Shape =', tuple(X_test.shape))
print('Number of Classes =', classes)

Training Dataset Shape = (7352, 561)
Testing Dataset Shape = (2947, 561)
Number of Classes = 6


In [9]:
out = pd.DataFrame(columns=['learning rate', 'epochs', 'dimension', 'bootstrap','testing accuracy', 'training time'])

In [10]:
def learn_and_test(X_train, Y_train, X_test, Y_test, dim, lr, epoch, bs):
    global out
    model = onlinehd.OnlineHD(classes, features, dim=dim)
    
    if torch.cuda.is_available():
        X_train = X_train.cuda()
        Y_train = Y_train.cuda()
        X_test = X_test.cuda()
        Y_test = Y_test.cuda()
        model = model.to('cuda')
        
    print('Training...')
    t = time()
    encoded = model.encode(X_train)
    probs = 0.0
    binomial = torch.distributions.binomial.Binomial(probs=1-probs)
    encoded = encoded * binomial.sample(encoded.shape).cuda() * (1.0 / (1 - probs))
    model = model.fit(encoded, Y_train, encoded = True, bootstrap=bs, lr=lr, epochs=epoch, batch_size=1024)
    t = time() - t
    print('Validating...')
    yhat = model(X_train)
    yhat_test = model(X_test)
    acc = (Y_train == yhat).float().mean()
    acc_test = (Y_test == yhat_test).float().mean()
    out = out.append(pd.Series([lr, epoch, dim, bs, acc_test.cpu().item(), t], index = ['learning rate', 'epochs', 'dimension', 'bootstrap', 'testing accuracy', 'training time']), ignore_index=True)
    print(f'{acc = :6f}')
    print(f'{acc_test = :6f}')
    print(f'{t = :6f}')


In [11]:
for lr in [0.2, 0.3, 0.4, 0.5]:
    for epoch in [20, 40, 60]:
        for dim in [5000, 7500, 10000]:
            for bs in [0.25, 0.5]:
                print('===============================')
                print(lr, epoch, dim, bs)
                learn_and_test(X_train, Y_train, X_test, Y_test, dim , lr, epoch, bs)

0.2 20 5000 0.25
Training...
Validating...
acc = 0.996600
acc_test = 0.171021
t = 4.345680
0.2 20 5000 0.5
Training...
Validating...
acc = 0.997416
acc_test = 0.175772
t = 3.148262
0.2 20 7500 0.25
Training...
Validating...
acc = 0.999184
acc_test = 0.159484
t = 4.273858
0.2 20 7500 0.5
Training...
Validating...
acc = 0.999728
acc_test = 0.168985
t = 3.358243
0.2 20 10000 0.25
Training...
Validating...
acc = 1.000000
acc_test = 0.174075
t = 4.391912
0.2 20 10000 0.5
Training...
Validating...
acc = 1.000000
acc_test = 0.171700
t = 3.589156
0.2 40 5000 0.25
Training...
Validating...
acc = 0.999184
acc_test = 0.169325
t = 5.015320
0.2 40 5000 0.5
Training...
Validating...
acc = 1.000000
acc_test = 0.177808
t = 4.242045
0.2 40 7500 0.25
Training...
Validating...
acc = 1.000000
acc_test = 0.178487
t = 5.335205
0.2 40 7500 0.5
Training...
Validating...
acc = 1.000000
acc_test = 0.170682
t = 4.350244
0.2 40 10000 0.25
Training...
Validating...
acc = 1.000000
acc_test = 0.173397
t = 5.495125
0

In [12]:
out['testing accuracy'].max()

0.1845945119857788

In [13]:
out.to_csv('UCI_HAR_result.csv',index=None)