In [None]:
import torch
from torch import nn
from torch import optim

import numpy as np
import pandas as pd
from umap import UMAP

import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.style.use('seaborn-v0_8-colorblind')


In [None]:
df = pd.read_csv('./dataset/gait.csv')
x0 = df.to_numpy()

n_points = 10*101*6
df = df[:n_points]
x0 = x0[:n_points]
df

In [None]:
# %%time
# xy = UMAP().fit_transform(x0)
# plt.scatter(xy[:,0], xy[:,1], s=2, c=df['replication'])
# np.save('xy.npy', xy)

## predicates deduction

In [None]:
subset = np.logical_and(
    np.logical_and(0 < xy[:,0], xy[:,0] < 5),
    np.logical_and(0 < xy[:,1], xy[:,1] < 5),
)

plt.scatter(xy[:,0], xy[:,1], s=1, c='#666')
plt.scatter(xy[subset,0], xy[subset,1], s=2, c=df['replication'][subset])

In [None]:
b = 4
def predict(x, a, mu):
    return 1/(1+((a*(x-mu)).pow(b)).sum(1))



def predicate(x0, subset):
    '''subset boolean array of selection'''
    
    ## prepare training data
    x = torch.from_numpy(x0.astype(np.float32))
    x_mean = x.mean(0)
    x_std = x.std(0)+1
    x = (x-x_mean)/(x_std)
    label = torch.from_numpy(subset).float()
    
    bce = nn.BCELoss()
    a = torch.randn(x.shape[1]).requires_grad_(True)
    mu = torch.randn(x.shape[1]).requires_grad_(True)
    optimizer = optim.SGD([mu, a,], lr=1e-2, momentum=0.9, weight_decay=0.01)
    for e in range(3000):
        pred = predict(x, a, mu)
        l = bce(pred, label)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        if e % 500 == 0:
            print('loss', l.item())    
    a.detach_()
    mu.detach_()
    
#     plt.stem(a.abs().numpy())
#     plt.show()
    
    r = 1/a.abs()
    print(
        'accuracy', 
        ((pred>0.5).float() == label).float().sum().item(), 
    '/', subset.shape[0])
    
    predicates = []
    for k in range(mu.shape[0]):
        if r[k] < 0.5 * (x[:,k].max()-x[:,k].min()):
            r_k = (r[k] * x_std[k]).item()
            mu_k = (mu[k] * x_std[k] + x_mean[k]).item()
            ci = ((mu_k-r_k), (mu_k+r_k))
            predicates.append(dict(
                dim=k, interval=ci
            ))
    return dict(
        predicates=predicates
    )

            
predicate(x0, subset)

In [None]:
plt.scatter(xy[:,0], xy[:,1], s=0.1, c=pred.detach().numpy())
plt.colorbar()


In [None]:
plt.hist(x0[subset,5], bins=60);
plt.hist(x0[~subset,5], bins=60, alpha=0.1);

In [None]:
x[subset,4:7].min(0).values, x[subset,4:7].max(0).values

In [None]:
x[~subset,4:7].min(0).values, x[~subset,4:7].max(0).values