In [2]:
#step 1 loading the data
import torch
from torch.utils.data import Dataset, DataLoader


data = torch.load('student_training_data.pt')

print(f"Label Map:{data['meta']['label_map']}")
print(data.keys())
print(f"Training sequences:{len(data['train_sequences'])}")

Label Map:{'PAD': 0, 'human face': 1, 'person': 2, 'vehicle': 3, 'animal': 4, 'text logo': 5, 'sports ball': 6, 'fireworks': 7, 'waterfall': 8, 'toy gun': 9, 'mountain': 10, 'building': 11}
dict_keys(['meta', 'train_sequences', 'test_sequences'])
Training sequences:114


In [3]:
class VRDataset(Dataset):
    def __init__ (self,sequences,seq_len =30) :
        self.sequences = sequences
        self.seq_len = seq_len
    def __len__(self):
        return len(self.sequences)
    def __getitem__(self,idx):
        motion,semantics = self.sequences[idx]
        if len(motion) > self.seq_len:
            start = torch.randint(0,len(motion)-self.seq_len, (1,)).item()
            m_clip = motion[start:start+self.seq_len]
            s_clip = semantics[start:start+self.seq_len]
            return m_clip, s_clip
        return motion, semantics

In [53]:
train_ds= VRDataset(data['train_sequences'])
train_loader = DataLoader(train_ds,batch_size=32, shuffle=True)

In [54]:
#Step 2: building the personalized predictor

import torch.nn as nn

class PersonalizedFoVPredictor(nn.Module):
    def __init__(self, num_classes=12, d_model=64):
        super().__init__()
        self.motion_emb = nn.Linear(2, d_model) 
        self.sem_emb = nn.Linear(3, d_model) # Yaw, Pitch, and Class ID
        self.user_preference = nn.Parameter(torch.ones(num_classes)) # how much user cares about each of the object types

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=4)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
        
        self.head = nn.Linear(d_model, 2)

    def get_user_weights(self, semantics):
        label_ids = semantics[...,-1].long() #[Yaw, Pitch, Label_ID] is the semantics data structure so -1 allows us to get the data in the last index which is Label_ID
        #print(f"label ids: {label_ids}")
        return self.user_preference[label_ids]

    def calculate_weights(self, semantics, user_weights):
        user_weights = user_weights[..., None] #making semantics and user_weight to match in demensiom for tensor multiplications
        weighted_semantics = semantics * user_weights
        return weighted_semantics.mean(dim=2)
        
    def forward(self, motion,semantics):
        
        #motion embed
        #print("before motion embedded")
        #print(motion.shape)
        #batch = motion.shape[0]
        #print(f"batch size is: {batch}")
        #print(motion)
        motion_embedded = self.motion_emb(motion) #converting to the d_model
        #print("after motion embedded")
        #print(motion_embedded.shape)
        #print(motion_embedded)

        #semantics embedded: 1.embed semantics 2.calculate the weights 3.multiply user preference weigths
        #print("before semantics embedded")
        #print(semantics.shape)
        #print(semantics)
        
        #step 1
        semantics_embedded = self.sem_emb(semantics)
        #print("after semantics embedded")
        #print(semantics_embedded.shape)
        #print(semantics_embedded)

        #step 2
        user_weights = self.get_user_weights(semantics)
        
        #step 3
        semantics_weighted = self.calculate_weights(semantics_embedded, user_weights)
        #print(f"weighted semantics shape: {semantics_weighted.shape}")
    
        #fusing the past motion and semantics (with the wieghts from user preference) together for creating predictions
        fusion = motion_embedded + semantics_weighted
        
        fusion = fusion.transpose(0, 1)
    
        transformed = self.transformer(fusion) # Shape: (T, B, d_model)
        transformed = transformed.transpose(0, 1) 
        prediction = self.head(transformed)
        return prediction 
        

In [61]:
import torch.optim as optim

# Instantiating the model
model = PersonalizedFoVPredictor()
train_loss = []

# Define a loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.0005)
# Training loop
for epoch in range(50):
    model.train()
    running_loss = 0.0

    for motion, semantics in train_loader:
        optimizer.zero_grad()
        predictions = model(motion, semantics)
        target = motion[:, 1:, :]
        predictions = predictions[:, :-1, :]

        loss = criterion(predictions, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    train_loss.append(avg_loss)
    with torch.no_grad():
        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

Epoch 1, Loss: 4565.0895
Epoch 2, Loss: 3607.2663
Epoch 3, Loss: 2978.5646
Epoch 4, Loss: 2246.1040
Epoch 5, Loss: 1870.6053
Epoch 6, Loss: 1238.5815
Epoch 7, Loss: 1238.8024
Epoch 8, Loss: 1434.2566
Epoch 9, Loss: 1389.2691
Epoch 10, Loss: 1417.6824
Epoch 11, Loss: 1319.1059
Epoch 12, Loss: 1090.1384
Epoch 13, Loss: 1325.1307
Epoch 14, Loss: 916.1893
Epoch 15, Loss: 1260.3786
Epoch 16, Loss: 1678.7374
Epoch 17, Loss: 1614.3824
Epoch 18, Loss: 1499.0601
Epoch 19, Loss: 1442.1800
Epoch 20, Loss: 1113.5312
Epoch 21, Loss: 1529.9718
Epoch 22, Loss: 1254.8557
Epoch 23, Loss: 1257.2999
Epoch 24, Loss: 1184.2636
Epoch 25, Loss: 1056.3013
Epoch 26, Loss: 976.1826
Epoch 27, Loss: 831.5900
Epoch 28, Loss: 944.1305
Epoch 29, Loss: 1224.7373
Epoch 30, Loss: 1118.8760
Epoch 31, Loss: 1038.0956
Epoch 32, Loss: 980.5094
Epoch 33, Loss: 909.8669
Epoch 34, Loss: 1032.0386
Epoch 35, Loss: 831.0867
Epoch 36, Loss: 1349.4132
Epoch 37, Loss: 1222.3969
Epoch 38, Loss: 1105.9781
Epoch 39, Loss: 885.5689
Epo

In [62]:
label_map = data["meta"]["label_map"]

print("\nEach object's Weight:")
for name, label_id in label_map.items():
    print(f"  ({name:12s} label_ID = {label_id:2d}): {float(model.user_preference[label_id]):.4f}")

if model.user_preference[1] > model.user_preference[11]:
    print(f"sanity check: hypothesis is correct, faces object with weight: {model.user_preference[1]:4f} and building with weight {model.user_preference[11]:4f}")
else: print(f"hypothesis is false")


Each object's Weight:
  (PAD          label_ID =  0): 1.5854
  (human face   label_ID =  1): 1.2572
  (person       label_ID =  2): 1.1146
  (vehicle      label_ID =  3): 0.9762
  (animal       label_ID =  4): 1.0000
  (text logo    label_ID =  5): 1.0000
  (sports ball  label_ID =  6): 1.0000
  (fireworks    label_ID =  7): 1.0000
  (waterfall    label_ID =  8): 1.0000
  (toy gun      label_ID =  9): 1.0000
  (mountain     label_ID = 10): 1.0000
  (building     label_ID = 11): 1.0000
sanity check: hypothesis is correct, faces object with weight: 1.257192 and building with weight 1.000000


In [63]:
test_ds = VRDataset(data['test_sequences'])
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

In [78]:
model.eval()
running_val = 0.0
val_losses   = []
with torch.no_grad():
        for motion, semantics in test_loader:
            preds  = model(motion, semantics)
            target = motion[:, 1:, :]
            preds  = preds[:, :-1, :]
            v_loss = criterion(preds, target)
            running_val += v_loss.item()

        avg_val = running_val / len(test_loader)
        val_losses.append(avg_val)
    
        print(f"Epoch {epoch+1:2d} | train: {avg_loss:.4f} | val: {avg_val:.4f}")

Epoch 50 | train: 810.3376 | val: 1224.0212
