In [1]:
import torch

In [2]:
input_tensors = torch.load('input_seq_tensors.pt', weights_only=True)
act_dev_tensors = torch.load('act_dev_tensors.pt', weights_only=True)
act_state_tensors = torch.load('act_state_tensors.pt', weights_only=True)
act_timing_tensors = torch.load('act_timing_tensors.pt', weights_only=True)

In [3]:
from torch import nn
from torch.nn import functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


In [4]:
class MultiTaskLSTM(nn.Module):
    def __init__(self, input_size, num_classes, num_devices, hidden_size=128, num_lstm_layers=4):
        super(MultiTaskLSTM, self).__init__()
        # LSTM shared layer
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_lstm_layers, 
            batch_first=True, 
            dropout=0.2)
        
        # Timing prediction head (regression)
        self.timing_pred_head = nn.Linear(hidden_size, 1)  # 1 output for time prediction
        
        # Action classification head (classification)
        self.act_state_head = nn.Linear(hidden_size, num_classes)  # num_classes output for classification
        
        # Device classification head (classification)
        self.act_device_head = nn.Linear(hidden_size, num_devices)  # num_devices output for device classification
    
    def forward(self, x, lengths):
        # Process the packed sequence with LSTM
        lstm_out, (h_n, c_n) = self.lstm(x)
        lstm_out = lstm_out[range(len(lstm_out)), lengths - 1]
        
        # Timing and action predictions
        timing_pred = self.timing_pred_head(lstm_out)
        action_pred = self.act_state_head(lstm_out)
        device_pred = self.act_device_head(lstm_out)
        
        return timing_pred, action_pred, device_pred

In [5]:
from tqdm import tqdm

def train(model, train_data_loader, num_epochs=3):
    for epoch in range(num_epochs):
        model.train()
    
        for batch_idx, (inputs, y_act_device, y_act_state, y_timing, lengths) in tqdm(enumerate(train_data_loader)):
            inputs, y_device, y_action, y_timing, lengths = (
                inputs.float().to(device), 
                y_act_device.long().to(device), 
                y_act_state.long().to(device), 
                y_timing.float().to(device), 
                lengths # this guy stays in CPU
            )
            
            # Forward pass
            timing_pred, action_pred, device_pred = model(inputs, lengths)

            # # Squeeze the output if necessary to match target shape
            # timing_pred = timing_pred.squeeze(1)
            # action_pred = action_pred.squeeze(1)
            # device_pred = device_pred.squeeze(1)
        
            # Calculate losses
            action_loss = criterion_action(action_pred, y_action)
            timing_loss = criterion_timing(timing_pred, y_timing)
            device_loss = criterion_device(device_pred, y_device)

            device_weight = 1.0  # Higher weight for more importance
            action_weight = 1.0  # Higher weight for more importance
            timing_weight = 0.5  # Lower weight for less importance
            
            # Calculate the weighted loss
            weighted_loss = (device_weight * device_loss) + (action_weight * action_loss) + (timing_weight * timing_loss)

            # Backpropagation and optimization
            optimizer.zero_grad()
            weighted_loss.backward()
            optimizer.step()
            
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {weighted_loss.item():.4f}')

In [6]:
input_size = 9  # Number of input features (sensor data + action history)
hidden_size = 128  # Hidden units for LSTM
num_lstm_layers = 2;
num_classes = 2  # Number of action classes (on/off)
num_devices = 213  # Number of different devices

# Initialize model, loss functions, and optimizer
model = MultiTaskLSTM(input_size=input_size, num_lstm_layers=num_lstm_layers, hidden_size=hidden_size, num_classes=num_classes, num_devices=num_devices)
criterion_timing = nn.MSELoss()  # Loss for timing prediction (regression)
criterion_action = nn.CrossEntropyLoss()  # Loss for action type prediction (classification)
criterion_device = nn.CrossEntropyLoss()  # Loss for device prediction (classification)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'device: {device}')
model.to(device)

device: cuda


MultiTaskLSTM(
  (lstm): LSTM(9, 128, num_layers=2, batch_first=True, dropout=0.2)
  (timing_pred_head): Linear(in_features=128, out_features=1, bias=True)
  (act_state_head): Linear(in_features=128, out_features=2, bias=True)
  (act_device_head): Linear(in_features=128, out_features=213, bias=True)
)

In [7]:
# print(f'number of sequences: {len(input_tensors)}, number of device/state/timing: {len(act_dev_tensors)}/{len(act_state_tensors)}/{len(act_timing_tensors)}')

In [8]:
from torch.utils.data import random_split

# Determine the sizes of each split
total_size = len(input_tensors)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

In [9]:
from torch.utils.data import DataLoader, Dataset

class SmartHomeDataset(Dataset):
    def __init__(self, input_tensors, act_dev_tensors, act_state_tensors, act_timing_tensors):
        self.input_tensors = input_tensors
        self.act_dev_tensors = act_dev_tensors
        self.act_state_tensors = act_state_tensors
        self.act_timing_tensors = act_timing_tensors

    def __len__(self):
        return len(self.input_tensors)

    def __getitem__(self, idx):
        return (self.input_tensors[idx], 
                self.act_dev_tensors[idx], 
                self.act_state_tensors[idx], 
                self.act_timing_tensors[idx])

In [10]:
import torch
from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    # Separate inputs and targets
    inputs, y_device, y_action, y_timing = zip(*batch)
    
    # Pad sequences for inputs (batch_first=True makes it [batch_size, seq_len, features])
    inputs_padded = pad_sequence(inputs, batch_first=True, padding_value=0)
    
    # Convert targets to tensors (they should all have the same length as they're scalar values)
    y_device = torch.stack(y_device)
    y_action = torch.stack(y_action)
    y_timing = torch.stack(y_timing)
    
    # Compute lengths for each sequence (before padding)
    lengths = torch.tensor([len(seq) for seq in inputs])
    
    return inputs_padded, y_device, y_action, y_timing, lengths

In [11]:
dataset = SmartHomeDataset(input_tensors, act_dev_tensors, act_state_tensors, act_timing_tensors)
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [12]:
from torch.utils.data import DataLoader

batch_size = 32  # Adjust as needed

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [13]:
train(model, train_loader, num_epochs=32)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
513it [00:08, 58.57it/s]


Epoch [1/32], Loss: 101.0339


513it [00:07, 72.48it/s]


Epoch [2/32], Loss: 99.3525


513it [00:07, 72.46it/s]


Epoch [3/32], Loss: 139.3276


513it [00:07, 72.19it/s]


Epoch [4/32], Loss: 69.8689


513it [00:07, 72.56it/s]


Epoch [5/32], Loss: 539.8226


513it [00:07, 71.05it/s]


Epoch [6/32], Loss: 443.4532


513it [00:07, 71.20it/s]


Epoch [7/32], Loss: 43.0811


513it [00:07, 70.01it/s]


Epoch [8/32], Loss: 102.8109


513it [00:07, 69.20it/s]


Epoch [9/32], Loss: 107.0309


513it [00:07, 68.85it/s]


Epoch [10/32], Loss: 92.0493


513it [00:08, 61.11it/s]


Epoch [11/32], Loss: 402.4441


513it [00:07, 67.27it/s]


Epoch [12/32], Loss: 499.7713


513it [00:07, 69.01it/s]


Epoch [13/32], Loss: 112.0775


513it [00:07, 68.71it/s]


Epoch [14/32], Loss: 254.4729


513it [00:07, 69.21it/s]


Epoch [15/32], Loss: 158.2561


513it [00:07, 69.27it/s]


Epoch [16/32], Loss: 79.6195


513it [00:07, 69.35it/s]


Epoch [17/32], Loss: 462.1703


513it [00:07, 69.17it/s]


Epoch [18/32], Loss: 99.1219


513it [00:07, 68.78it/s]


Epoch [19/32], Loss: 211.7559


513it [00:07, 68.98it/s]


Epoch [20/32], Loss: 175.4177


513it [00:07, 68.64it/s]


Epoch [21/32], Loss: 112.1959


513it [00:07, 69.10it/s]


Epoch [22/32], Loss: 112.8950


513it [00:07, 69.42it/s]


Epoch [23/32], Loss: 86.1992


513it [00:07, 68.97it/s]


Epoch [24/32], Loss: 241.4002


513it [00:07, 69.13it/s]


Epoch [25/32], Loss: 70.7848


513it [00:07, 69.55it/s]


Epoch [26/32], Loss: 133.3966


513it [00:07, 69.03it/s]


Epoch [27/32], Loss: 256.0267


513it [00:07, 69.16it/s]


Epoch [28/32], Loss: 69.3757


513it [00:07, 69.32it/s]


Epoch [29/32], Loss: 70.1937


513it [00:07, 69.06it/s]


Epoch [30/32], Loss: 124.1703


513it [00:07, 70.01it/s]


Epoch [31/32], Loss: 365.4951


513it [00:07, 67.76it/s]

Epoch [32/32], Loss: 88.4071





In [14]:
model.eval()
val_loss = 0.0

with torch.no_grad():
    for inputs, y_device, y_action, y_timing, lengths in val_loader:
        # Convert input tensors to float32
        inputs = inputs.float().to(device)
        
        # Convert target tensors to appropriate types
        y_device = y_device.long().to(device)    # Device classification targets should be long
        y_action = y_action.long().to(device)    # Action classification targets should be long
        y_timing = y_timing.float().to(device)   # Timing targets should remain float32
        
        lengths = lengths  # lengths can stay as integers and needs to be in CPU

        # Forward pass through the model
        timing_pred, action_pred, device_pred = model(inputs, lengths)
        
        # Calculate losses
        loss_timing = criterion_timing(timing_pred, y_timing)
        loss_action = criterion_action(action_pred, y_action)
        loss_device = criterion_device(device_pred, y_device)
        
        val_loss += (loss_timing + loss_action + loss_device).item()

val_loss /= len(val_loader)
print(f'Validation Loss: {val_loss:.4f}')

Validation Loss: 493.5678


  return F.mse_loss(input, target, reduction=self.reduction)


In [15]:
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct_device = 0
correct_action = 0
total_samples = 0

with torch.no_grad():  # No need to calculate gradients during testing
    for inputs, y_device, y_action, y_timing, lengths in test_loader:
        # Move data to the same device as the model
        inputs = inputs.float().to(device)
        y_device = y_device.long().to(device)
        y_action = y_action.long().to(device)
        y_timing = y_timing.float().to(device)
        lengths = lengths.to(device)

        # Forward pass
        timing_pred, action_pred, device_pred = model(inputs, lengths)

        # Squeeze the predictions if necessary
        timing_pred = timing_pred.squeeze(1)
        action_pred = action_pred.squeeze(1)
        device_pred = device_pred.squeeze(1)
        
        # Calculate loss
        loss_timing = criterion_timing(timing_pred, y_timing)
        loss_action = criterion_action(action_pred, y_action)
        loss_device = criterion_device(device_pred, y_device)

        # Accumulate total loss
        test_loss += (loss_timing + loss_action + loss_device).item()

        # Calculate accuracy for device and action predictions
        _, predicted_device = torch.max(device_pred, 1)
        _, predicted_action = torch.max(action_pred, 1)
        
        correct_device += (predicted_device == y_device).sum().item()
        correct_action += (predicted_action == y_action).sum().item()
        total_samples += y_device.size(0)

# Calculate average loss
test_loss /= len(test_loader)

# Calculate accuracy
device_accuracy = correct_device / total_samples
action_accuracy = correct_action / total_samples

print(f'Test Loss: {test_loss:.4f}')
print(f'Device Prediction Accuracy: {device_accuracy:.4f}')
print(f'Action Prediction Accuracy: {action_accuracy:.4f}')

Test Loss: 477.1247
Device Prediction Accuracy: 0.9180
Action Prediction Accuracy: 0.9732


In [None]:
torch.save(model.state_dict(), f'model_')