In [1]:
from model.rnn import GRUDecoder
from model.autoencoder import AutoEncoder
import torch        

In [2]:
def get_device():
    # Check if CUDA is available
    if torch.cuda.is_available():
        # If CUDA is available, select the first CUDA device
        device = torch.device("cuda:0")
        print("Using CUDA device:", torch.cuda.get_device_name(0))
    # Check for MPS availability on supported macOS devices (requires PyTorch 1.12 or newer)
    elif torch.backends.mps.is_available():
        # If MPS is available, use MPS device
        device = torch.device("mps")
        print("Using MPS (Metal Performance Shaders) device")
    else:
        # Fallback to CPU if neither CUDA nor MPS is available
        device = torch.device("cpu")
        print("Using CPU")
    return device


## Autoencoder

In [12]:
### Initialization of the Autoencoder 
SEQ_LEN = 3000
HIDDEN_DIM = 512
ENCODING_SIZE = 64
model = AutoEncoder(vocab_size=100, embedding_size=HIDDEN_DIM, encoding_size=ENCODING_SIZE, sequence_len=SEQ_LEN)


In [14]:
x

tensor([[19, 13,  7,  ..., 88, 94, 60],
        [63, 68, 82,  ..., 25, 66, 91]])

In [13]:
#let's assume we have a batch of 2 people
x = torch.randint(1,99, size=(2,SEQ_LEN))
y = model(x) 
## returns the original shape


In [5]:
### only to use the encoder part 
y = model.encode(x) # here y contains embedding of a survey per row

In [6]:
y

tensor([[1.0181, 1.2055, 1.1069, 1.0573, 1.2489, 1.1327, 1.1476, 0.9988, 1.1303,
         1.1413, 1.2062, 1.1750, 1.1332, 1.1358, 1.2791, 1.2234, 0.9566, 1.2678,
         1.0647, 1.0055, 1.0363, 1.1205, 0.9869, 1.1123, 1.0870, 1.1040, 0.9812,
         1.0783, 1.0984, 1.1495, 1.0597, 1.0109, 1.0734, 1.0487, 1.2017, 0.9716,
         1.0674, 1.0515, 1.1291, 1.1136, 1.1051, 1.0814, 1.1689, 1.1321, 1.1260,
         1.0802, 1.1869, 1.0064, 1.0912, 1.1110, 1.2514, 1.1012, 1.0839, 1.1723,
         1.1688, 1.1562, 1.1463, 1.1324, 1.1117, 1.2091, 1.0689, 0.9981, 1.1680,
         1.2049],
        [0.9981, 1.1551, 0.9326, 1.1216, 1.1961, 1.0981, 1.0040, 1.0683, 1.1093,
         0.9739, 1.0427, 0.9508, 1.1087, 1.0366, 1.0619, 1.1540, 1.1945, 1.0816,
         1.2376, 1.0524, 1.2920, 1.0100, 1.0800, 1.0678, 1.0836, 1.0298, 1.0424,
         1.1119, 1.1505, 1.0366, 1.1331, 0.9942, 1.0213, 1.1918, 1.0771, 1.0476,
         1.1519, 1.0659, 1.0433, 1.0261, 1.0559, 1.0329, 1.1963, 1.0541, 0.9515,
         0

## RNN

In [7]:
## 
# input_size -> the size of the embedding of the autoencoder model
# hidden_size -> the size of the RNN to use in the decoder (the input_size and hidden_size can be different)
model = GRUDecoder(input_size=6, hidden_size=10, max_seq_len=4).to(get_device())

Using MPS (Metal Performance Shaders) device


In [8]:
# This is just an example

MAX_SEQ_LEN = 4 # max number of surveyas a person (in our dataset can have)
INPUT_SIZE = 6 # hidden dimmensions of autoencodder.

# let's say we have a person who only have 2 surveys
x0 = torch.rand(INPUT_SIZE) # embedding for the 1st survey 
x1 = torch.rand(INPUT_SIZE) # embedding for the 2nd survey

# the tensor for the person should be on the shape [MAX_SEQ_LEN, INPUT_SIZE]

e = torch.zeros(MAX_SEQ_LEN, INPUT_SIZE)
e[0] = x0
e[1] = x1
e = e.to(get_device()) # so this is a tensor for the person
#we also need to specify that the sequence has 'empty' embeddings
mask = torch.BoolTensor([True, True, False, False]).to(get_device()) # the last two dimensions are empty
## it is important that you append existing survey embeddings right next to each other (even if the year is missign between them, they should be still appended one after another)

## let assume we have a batch of people, I am reusing the same person, but in the pipeline is should be different people
# the batch size is 3 here 

x = torch.stack([e,e,e])
mask = torch.stack([mask, mask, mask])

Using MPS (Metal Performance Shaders) device
Using MPS (Metal Performance Shaders) device


In [9]:
xx = model(x, mask)

  lengths, sorted_idx = lengths.sort(0, descending=True)
  lengths, sorted_idx = lengths.sort(0, descending=True)


In [10]:
torch.nn.functional.sigmoid(xx)

  nonzero_finite_vals = torch.masked_select(


tensor([[0.5298],
        [0.5026],
        [0.4558]], device='mps:0', grad_fn=<SigmoidBackward0>)