In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from torch.utils import data
import pandas as pd

In [5]:
sensor = np.loadtxt('./data/processed_data/sensor_data.txt')
activity = np.loadtxt('./data/processed_data/activity_data.txt')

In [7]:
sensor_dataset = data.TensorDataset(data_tensor=torch.FloatTensor(sensor), target_tensor=torch.FloatTensor(activity))
data_loader = data.DataLoader(
    dataset=sensor_dataset,
    shuffle=False
)

In [45]:
class VAE(nn.Module):
    def __init__(self, n_input, n_hidden):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(n_input, n_hidden),
            nn.LeakyReLU(0.2),
            nn.Linear(n_hidden, n_input*2))  # 2 for mean and variance.
        
        self.decoder = nn.Sequential(
            nn.Linear(n_input, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_input),
            nn.Sigmoid())
        
    def reparameterize(self, mu, log_var):
        ## z = mean + eps * sigma where eps is sampled from N(0, 1).
        eps = Variable(torch.randn(mu.size(0), mu.size(1)))
        z = mu + eps * torch.exp(log_var / 2) ## /2 是为了转化为std
        return z
    
    def forward(self, x):
        h = self.encoder(x)
        mu, log_var = torch.chunk(h, 2, dim=1)   ## mean and log variance
        z = self.reparameterize(mu, log_var)
        out = self.decoder(z)
        return out, mu, log_var
    
    def sample(self, z):
        return self.decoder(z)

In [46]:
vae = VAE(14, 30)
optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)
iter_per_epoch = len(data_loader)
data_iter = iter(data_loader)

In [50]:
Loss = []
for epoch in range(3):
    for i, (x, _) in enumerate(data_loader):
        
        x_train = Variable(x.view(x.size(0), -1))
        out, mu, log_var = vae(x_train)
        
        # Compute reconstruction loss and kl divergence
        # For kl_divergence, see Appendix B in the paper or http://yunjey47.tistory.com/43
        reconst_loss = F.binary_cross_entropy(out, x_train, size_average=False)
        kl_divergence = torch.sum(0.5 * (mu**2 + torch.exp(log_var) - log_var - 1))
        
        # Backprop + Optimize
        total_loss = reconst_loss + kl_divergence
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print ("\rEpoch[%d/%d], Step [%d/%d], Total Loss: %.4f, "
                   "Reconst Loss: %.4f, KL Div: %.7f" 
                   %(epoch+1, 3, i+1, iter_per_epoch, total_loss.data[0], 
                     reconst_loss.data[0], kl_divergence.data[0]), end='')
            Loss.append(reconst_loss)
    print()
    

Epoch[1/3], Step [80001/80010], Total Loss: 0.0087, Reconst Loss: 0.0087, KL Div: -0.0000000
Epoch[2/3], Step [80001/80010], Total Loss: 0.0005, Reconst Loss: 0.0005, KL Div: 0.0000003
Epoch[3/3], Step [80001/80010], Total Loss: 0.0010, Reconst Loss: 0.0010, KL Div: 0.0000006


In [51]:
recons_sensor = vae(Variable(torch.FloatTensor(sensor)))

In [52]:
np.savetxt('./data/recons_sensor.txt', recons_sensor[0].data.numpy())

In [53]:
del recons_sensor
recons_sensor = np.loadtxt('./data/recons_sensor.txt')

In [54]:
recons_sensor.shape, activity.shape

((80010, 14), (80010, 16))

In [55]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(recons_sensor, activity, test_size=0.3, shuffle=False)

In [56]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((56007, 14), (24003, 14), (56007, 16), (24003, 16))

In [57]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=80, max_depth=3, max_features=5)

In [58]:
rf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=3, max_features=5, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=80, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [59]:
from sklearn.metrics import classification_report
y_pred = rf.predict(X_test)

In [60]:
print(classification_report(y_test, y_pred, digits=4))

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


             precision    recall  f1-score   support

          0     0.0000    0.0000    0.0000         7
          1     0.0000    0.0000    0.0000      6392
          2     0.0000    0.0000    0.0000        77
          3     0.0000    0.0000    0.0000        24
          4     0.0000    0.0000    0.0000         8
          5     0.0000    0.0000    0.0000       132
          6     0.3408    0.9943    0.5077      8133
          7     0.0000    0.0000    0.0000         0
          8     0.0000    0.0000    0.0000         0
          9     0.0000    0.0000    0.0000         0
         10     0.0000    0.0000    0.0000       110
         11     0.0000    0.0000    0.0000         0
         12     0.0000    0.0000    0.0000        10
         13     0.0000    0.0000    0.0000         4
         14     0.0000    0.0000    0.0000         2
         15     0.0000    0.0000    0.0000         9

avg / total     0.1859    0.5425    0.2770     14908

