In [1]:
from model.rnn import GRUDecoder
from model.autoencoder import AutoEncoder
import torch        

In [2]:
def get_device():
    # Check if CUDA is available
    if torch.cuda.is_available():
        # If CUDA is available, select the first CUDA device
        device = torch.device("cuda:0")
        print("Using CUDA device:", torch.cuda.get_device_name(0))
    # Check for MPS availability on supported macOS devices (requires PyTorch 1.12 or newer)
    elif torch.backends.mps.is_available():
        # If MPS is available, use MPS device
        device = torch.device("mps")
        print("Using MPS (Metal Performance Shaders) device")
    else:
        # Fallback to CPU if neither CUDA nor MPS is available
        device = torch.device("cpu")
        print("Using CPU")
    return device


## Autoencoder

In [3]:
### Initialization of the Autoencoder 
SEQ_LEN = 3000
HIDDEN_DIM = 512
ENCODING_SIZE = 64
model = AutoEncoder(vocab_size=100, embedding_size=HIDDEN_DIM, encoding_size=ENCODING_SIZE, sequence_len=SEQ_LEN)




In [4]:
#let's assume we have a batch of 2 people
x = torch.randint(1,99, size=(2,SEQ_LEN))
y = model(x) 
## returns the original shape


In [5]:
### only to use the encoder part 
y = model.encode(x) # here y contains embedding of a survey per row

In [7]:
y

tensor([[1.0803, 1.0387, 1.1260, 1.0745, 1.2651, 1.1385, 1.0452, 1.1095, 0.9616,
         1.0648, 0.8458, 1.0853, 1.1853, 1.2785, 0.9475, 0.9321, 1.1195, 1.1187,
         1.0438, 1.0787, 1.0481, 0.8518, 1.1889, 1.1000, 1.0081, 1.1126, 0.9642,
         1.0241, 1.2207, 1.0761, 0.8835, 0.9294, 1.1203, 1.1392, 1.1904, 1.1657,
         1.0177, 1.1087, 0.8757, 0.9787, 1.1058, 1.1208, 1.0508, 1.1631, 1.1322,
         1.0496, 1.2236, 1.1289, 1.0255, 1.0527, 1.2046, 1.1109, 1.1518, 1.1727,
         0.9947, 1.0273, 1.1130, 1.0833, 1.2079, 1.1044, 1.0035, 1.0782, 1.1120,
         1.1882],
        [1.0464, 1.0646, 1.2293, 1.0145, 1.0354, 1.1231, 1.1160, 1.1148, 1.0513,
         1.1728, 1.0108, 0.8998, 0.9953, 0.9843, 1.0526, 0.9437, 1.0459, 1.1369,
         1.0498, 1.2516, 1.1153, 0.7772, 0.9862, 1.1819, 1.0512, 1.1149, 1.0043,
         1.1191, 1.1650, 1.0315, 1.0229, 0.9551, 1.2055, 0.9158, 1.1290, 1.1661,
         0.9671, 1.1594, 0.9312, 1.0963, 1.2674, 1.2074, 1.0428, 1.0242, 1.0268,
         0

## RNN

In [8]:
## 
# input_size -> the size of the embedding of the autoencoder model
# hidden_size -> the size of the RNN to use in the decoder (the input_size and hidden_size can be different)
model = GRUDecoder(input_size=6, hidden_size=10, max_seq_len=4).to(get_device())

Using MPS (Metal Performance Shaders) device


In [9]:
# This is just an example

MAX_SEQ_LEN = 4 # max number of surveyas a person (in our dataset can have)
INPUT_SIZE = 6 # hidden dimmensions of autoencodder.

# let's say we have a person who only have 2 surveys
x0 = torch.rand(INPUT_SIZE) # embedding for the 1st survey 
x1 = torch.rand(INPUT_SIZE) # embedding for the 2nd survey

# the tensor for the person should be on the shape [MAX_SEQ_LEN, INPUT_SIZE]

e = torch.zeros(MAX_SEQ_LEN, INPUT_SIZE)
e[0] = x0
e[1] = x1
e = e.to(get_device()) # so this is a tensor for the person
#we also need to specify that the sequence has 'empty' embeddings
mask = torch.BoolTensor([True, True, False, False]).to(get_device()) # the last two dimensions are empty
## it is important that you append existing survey embeddings right next to each other (even if the year is missign between them, they should be still appended one after another)

## let assume we have a batch of people, I am reusing the same person, but in the pipeline is should be different people
# the batch size is 3 here 

x = torch.stack([e,e,e])
mask = torch.stack([mask, mask, mask])

Using MPS (Metal Performance Shaders) device
Using MPS (Metal Performance Shaders) device


In [10]:
xx = model(x, mask)

In [11]:
torch.nn.functional.sigmoid(xx)

tensor([[0.3430],
        [0.3379],
        [0.4362]], device='mps:0', grad_fn=<SigmoidBackward0>)