In [13]:
from model.rnn import GRUDecoder
from model.autoencoder import AutoEncoder
import torch        

In [14]:
def get_device():
    # Check if CUDA is available
    if torch.cuda.is_available():
        # If CUDA is available, select the first CUDA device
        device = torch.device("cuda:0")
        print("Using CUDA device:", torch.cuda.get_device_name(0))
    # Check for MPS availability on supported macOS devices (requires PyTorch 1.12 or newer)
    elif torch.backends.mps.is_available():
        # If MPS is available, use MPS device
        device = torch.device("mps")
        print("Using MPS (Metal Performance Shaders) device")
    else:
        # Fallback to CPU if neither CUDA nor MPS is available
        device = torch.device("cpu")
        print("Using CPU")
    return device


## Autoencoder

In [15]:
### Initialization of the Autoencoder 
SEQ_LEN = 3000
HIDDEN_DIM = 512
ENCODING_SIZE = 64
model = AutoEncoder(vocab_size=100, embedding_size=HIDDEN_DIM, encoding_size=ENCODING_SIZE, sequence_len=SEQ_LEN)


In [16]:
#let's assume we have a batch of 2 people
x = torch.randint(1,99, size=(2,SEQ_LEN))
y = model(x) 
## returns the original shape


In [17]:
### only to use the encoder part 
y = model.encode(x) # here y contains embedding of a survey per row

In [18]:
y

tensor([[-9.5367e-08, -3.7087e-08,  3.1789e-08, -5.4306e-08, -6.8876e-08,
         -5.2982e-09, -5.6956e-08, -5.8280e-08, -4.2386e-08, -4.3710e-08,
          2.6491e-09, -6.8876e-08,  2.6491e-08,  7.6824e-08,  1.3245e-08,
         -2.7816e-08,  3.1789e-08, -5.2982e-08,  5.2982e-08, -2.7816e-08,
          6.6227e-09, -3.1789e-08, -1.8544e-08, -1.0596e-08,  0.0000e+00,
         -1.0596e-08,  2.2517e-08, -2.1193e-08,  1.0596e-08, -1.7219e-08,
         -4.5035e-08,  2.1193e-08, -2.1193e-08,  4.2386e-08, -2.7816e-08,
          3.1789e-08,  4.7684e-08, -3.7087e-08,  9.2718e-09,  1.0596e-08,
          1.0596e-08, -3.1789e-08,  5.0333e-08, -3.4438e-08, -5.2982e-08,
          4.2386e-08, -3.7087e-08,  0.0000e+00,  1.0596e-08, -1.3245e-08,
          9.5367e-08, -1.3245e-08, -3.8412e-08,  1.1126e-07,  5.0333e-08,
         -7.1526e-08,  1.8544e-08,  1.5895e-08,  3.3114e-08, -4.5035e-08,
         -3.1789e-08,  1.0596e-08,  8.4771e-08,  7.4175e-08],
        [-2.6491e-09,  3.1789e-08, -1.5895e-08, -1

## RNN

In [19]:
## 
# input_size -> the size of the embedding of the autoencoder model
# hidden_size -> the size of the RNN to use in the decoder (the input_size and hidden_size can be different)
model = GRUDecoder(input_size=6, hidden_size=10, max_seq_len=4).to(get_device())

Using MPS (Metal Performance Shaders) device


In [20]:
# This is just an example

MAX_SEQ_LEN = 4 # max number of surveyas a person (in our dataset can have)
INPUT_SIZE = 6 # hidden dimmensions of autoencodder.

# let's say we have a person who only have 2 surveys
x0 = torch.rand(INPUT_SIZE) # embedding for the 1st survey 
x1 = torch.rand(INPUT_SIZE) # embedding for the 2nd survey

# the tensor for the person should be on the shape [MAX_SEQ_LEN, INPUT_SIZE]

e = torch.zeros(MAX_SEQ_LEN, INPUT_SIZE)
e[0] = x0
e[1] = x1
e = e.to(get_device()) # so this is a tensor for the person
#we also need to specify that the sequence has 'empty' embeddings
mask = torch.BoolTensor([True, True, False, False]).to(get_device()) # the last two dimensions are empty
## it is important that you append existing survey embeddings right next to each other (even if the year is missign between them, they should be still appended one after another)

## let assume we have a batch of people, I am reusing the same person, but in the pipeline is should be different people
# the batch size is 3 here 

x = torch.stack([e,e,e])
mask = torch.stack([mask, mask, mask])

Using MPS (Metal Performance Shaders) device
Using MPS (Metal Performance Shaders) device


In [21]:
xx = model(x, mask)

In [22]:
torch.nn.functional.sigmoid(xx)

tensor([[0.4641],
        [0.3718],
        [0.4972]], device='mps:0', grad_fn=<SigmoidBackward0>)