In [1]:
import torch
from torch import nn
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('DSL-StrongPasswordData.csv')

In [3]:
class LSTM(nn.Module):
    def __init__(self, num_channels, hidden_dim, num_layers=5, target_size=512):
        super(LSTM, self).__init__()
        self.num_channels = num_channels
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(num_channels, hidden_dim, num_layers=num_layers, batch_first=False)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2target = nn.Linear(hidden_dim, target_size)

    def forward(self, time_stamp):
        """
        Args:
            time_stamp: (num_feature, batch_size, num_channels)
        Output:
            metric: (target_size, batch_size)
        """
        # h.shape = c.shape = (num_layers, num_feature, hidden_dim)
        #                                     num_feature, batch_size, dim_vector
        _, (h, c) = self.lstm(time_stamp.view(-1, len(time_stamp), self.num_channels).double())

        # input the last h and c, whose shape = (num_feature, hidden_dim)
        # shape = (2 * num_feature, hidden_dim) after concatenation
        state_vector = torch.cat((h[-1, :, :], c[-1, :, :]), 0)
        state_metric = self.hidden2target(state_vector)
        metric = torch.sum(state_metric, dim=0)
        return metric

In [4]:
# initialize with num_channels per t and model depth
lstm = LSTM(1, 128).double()

In [5]:
from itertools import chain, repeat
label = pd.Series(list(chain.from_iterable(zip(*repeat(list(range(51)), 400)))), dtype=np.int8, name='typist')
training = data.iloc[:, 3:]

In [22]:
stroke1 = torch.tensor(training.iloc[0].values).view(-1, 1, 1)
stroke2 = torch.tensor(training.iloc[1].values).view(-1, 1, 1)

## Metric Learning

In [6]:
# input shape should be (num_feature, batch_size, num_channels)
embeddings = lstm(stroke1)
embeddings2 = lstm(stroke2)

In [20]:
embeddings.shape, embeddings2.shape, embeddings[:10]

(torch.Size([512]),
 torch.Size([512]),
 tensor([-3.6684,  3.5893,  1.7412,  4.2320, -6.1728, -3.9881,  3.4944,  5.7145,
          4.1667, -1.9314], dtype=torch.float64, grad_fn=<SliceBackward>))

In [None]:
# L1 distance
sum(embeddings - embeddings2)

In [11]:
# L1 loss
l1_loss = torch.nn.L1Loss(reduction='sum')
loss = l1_loss(embeddings, embeddings2)
loss

## Classification

In [39]:
num_classes=51
embeddings = lstm(stroke1)
gt_label = torch.tensor(0)
one_hot_label = torch.nn.functional.one_hot(gt_label, num_classes=num_classes)

cls_head = torch.nn.Linear(len(embeddings), num_classes).double()
softmax = torch.nn.Softmax()
pred_cls = softmax(cls_head(embeddings).double())

  


In [38]:
pred_cls.shape, pred_cls, one_hot_label.shape, one_hot_label

(torch.Size([51]),
 tensor([2.8560e-03, 2.7840e-03, 9.9352e-03, 3.6519e-03, 4.9495e-04, 8.2038e-04,
         6.5720e-04, 7.3496e-02, 2.6416e-02, 4.5150e-03, 2.1842e-03, 1.6426e-03,
         2.9907e-04, 7.4200e-04, 3.2409e-02, 6.0014e-04, 5.2635e-02, 6.0338e-02,
         3.7767e-04, 5.9697e-04, 1.3123e-03, 6.8210e-02, 1.7910e-02, 3.5233e-03,
         4.8046e-03, 2.9458e-03, 9.1984e-03, 1.7743e-02, 7.8691e-04, 8.6096e-04,
         7.2602e-05, 6.3483e-03, 1.4384e-02, 6.0182e-02, 3.1188e-01, 7.4939e-04,
         6.4176e-03, 1.1698e-02, 9.8340e-04, 9.0341e-02, 3.6891e-02, 6.8418e-04,
         6.5804e-03, 1.7511e-02, 1.8775e-03, 4.3068e-03, 4.4076e-03, 1.8286e-02,
         1.0695e-03, 2.4924e-04, 3.3405e-04], dtype=torch.float64,
        grad_fn=<SoftmaxBackward>),
 torch.Size([51]),
 tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0]))

In [43]:
# cross entropy loss
cross_entropy_loss = torch.nn.CrossEntropyLoss()
loss = cross_entropy_loss(pred_cls.view(-1, num_classes), gt_label.view(-1))

In [44]:
loss

tensor(3.9522, dtype=torch.float64, grad_fn=<NllLossBackward>)