In [7]:
import torch
from torch.utils.data import Dataset
import json
import os
import torch.nn as nn
import torch.nn.functional as func
from torch.utils.data import DataLoader
import torch.optim as optim
from os.path import expanduser
import splitfolders
import shutil
import glob
import numpy as np
from sklearn.model_selection import train_test_split

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# torch.cuda.set_per_process_memory_fraction(0.9, 0)
print(device)

cuda


In [9]:
class KpVelDataset(Dataset):
    def __init__(self, json_folder):
        super(KpVelDataset, self).__init__()
        self.data = []
        for json_file in sorted(os.listdir(json_folder)):
            if json_file.endswith('_combined.json'):
                with open(os.path.join(json_folder, json_file), 'r') as file:
                    data = json.load(file)
                    start_kp = data['start_kp']
                    next_kp = data['next_kp']
                    velocity = data['velocity']
                    self.data.append((start_kp, next_kp, velocity))

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        start_kp, next_kp, velocity = self.data[idx]
        # Extract and flatten the first two elements of each keypoint in start_kp
        start_kp_flat = torch.tensor([kp for sublist in start_kp for kp in sublist[0][:2]], dtype=torch.float)
        # Extract and flatten the first two elements of each keypoint in next_kp
        next_kp_flat = torch.tensor([kp for sublist in next_kp for kp in sublist[0][:2]], dtype=torch.float)
        velocity = torch.tensor(velocity)
        return start_kp_flat, next_kp_flat, velocity

In [10]:
def train_test_split(src_dir):
#     dst_dir_img = src_dir + "images"
    dst_dir_anno = src_dir + "annotations"
    
    if os.path.exists(dst_dir_anno):
        print("folders exist")
    else:
        os.mkdir(dst_dir_anno)
        
#     for jpgfile in glob.iglob(os.path.join(src_dir, "*.jpg")):
#         shutil.copy(jpgfile, dst_dir_img)

    for jsonfile in glob.iglob(os.path.join(src_dir, "*_combined.json")):
        shutil.copy(jsonfile, dst_dir_anno)
        
    output = src_dir + "split_folder_reg"
    
    splitfolders.ratio(src_dir, # The location of dataset
                   output=output, # The output location
                   seed=42, # The number of seed
                   ratio=(0.8, 0.1, 0.1), # The ratio of split dataset
                   group_prefix=None, # If your dataset contains more than one file like ".jpg", ".pdf", etc
                   move=False # If you choose to move, turn this into True
                   )
    
#     shutil.rmtree(dst_dir_img)
    shutil.rmtree(dst_dir_anno)
    
    return output  

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class KeypointRegressionNet(nn.Module):
    def __init__(self):
        super(KeypointRegressionNet, self).__init__()
        # Define the architecture
        self.fc1 = nn.Linear(15, 128)  # 18 keypoints + 3 velocity values
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, 12)  # Output size is 18 (6 keypoints * 3 values each)

    def forward(self, start_kp, velocity):
        # Flatten start keypoints and concatenate with velocity
        x = torch.cat((start_kp, velocity), dim=1)
        
        # Forward pass through the network
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)     # No activation function in the last layer
        return x

In [12]:
# Initialize dataset and data loader
# to generalize home directory. User can change their parent path without entering their home directory
num_epochs = 100
batch_size = 64
v = 1
layers = 5
root_dir = '/home/jc-merlab/Pictures/panda_data/panda_sim_vel/regression_dataset_panda/'
# print(root_dir)
split_folder_path = train_test_split(root_dir)
dataset = KpVelDataset(root_dir)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize model
model = KeypointRegressionNet()  # Adjust input_size as necessary
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
for epoch in range(num_epochs):
    for start_kp, next_kp, velocity in data_loader:
        optimizer.zero_grad()
        velocity = velocity.squeeze(1)
        # print(start_kp.shape)
        # print(velocity.shape)
        output = model(start_kp, velocity)
        loss = criterion(output, next_kp)
        loss.backward()
        optimizer.step()
        # print("output", output)
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    
# # Save the trained model
model_save_path = f'/home/jc-merlab/Pictures/Data/trained_models/reg_nkp_panda_b{batch_size}_e{num_epochs}_v{v}_l{layers}.pth'
torch.save(model.state_dict(), model_save_path)

# model_save_path = f'/home/jc-merlab/Pictures/Data/trained_models/reg_nkp_b{batch_size}_e{num_epochs}_v{v}.pth'
# torch.save({
#     'model_state_dict': model.state_dict(),
#     'model_structure': KeypointRegressionNet()
# }, model_save_path)
# print(f"Model saved to {model_save_path}")


Copying files: 4837 files [00:00, 6630.36 files/s]


Epoch 1, Loss: 15808.021484375
Epoch 2, Loss: 1453.130126953125
Epoch 3, Loss: 1141.284912109375
Epoch 4, Loss: 1439.5057373046875
Epoch 5, Loss: 1144.176513671875
Epoch 6, Loss: 1265.48095703125
Epoch 7, Loss: 930.892333984375
Epoch 8, Loss: 544.7493286132812
Epoch 9, Loss: 551.5944213867188
Epoch 10, Loss: 585.6015625
Epoch 11, Loss: 262.4015197753906
Epoch 12, Loss: 362.2667236328125
Epoch 13, Loss: 339.8898010253906
Epoch 14, Loss: 363.4721984863281
Epoch 15, Loss: 194.13722229003906
Epoch 16, Loss: 123.76951599121094
Epoch 17, Loss: 96.5335922241211
Epoch 18, Loss: 114.36405181884766
Epoch 19, Loss: 81.66212463378906
Epoch 20, Loss: 37.65974426269531
Epoch 21, Loss: 62.843387603759766
Epoch 22, Loss: 58.49724578857422
Epoch 23, Loss: 60.55731964111328
Epoch 24, Loss: 45.577457427978516
Epoch 25, Loss: 41.16115188598633
Epoch 26, Loss: 32.75562286376953
Epoch 27, Loss: 26.03606414794922
Epoch 28, Loss: 23.474149703979492
Epoch 29, Loss: 20.386442184448242
Epoch 30, Loss: 22.6199283

In [14]:
def test_model(model_path, test_data_dir):
    # Load the test dataset
    test_dataset = KpVelDataset(test_data_dir)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Initialize the model and load the saved state
    model = KeypointRegressionNet()
    model.load_state_dict(torch.load(model_path))
    model.eval()

    # Criterion for evaluation
    criterion = nn.MSELoss()
    total_loss = 0

    # No gradient needed for evaluation
    with torch.no_grad():
        for start_kp, next_kp, velocity in test_loader:
            output = model(start_kp, velocity)
            for i in range(start_kp.size(0)):
                individual_start_kp = start_kp[i]
                individual_next_kp = next_kp[i]
                individual_velocity = velocity[i]
                predicted_next_kp = output[i]

                print("Start KP:", individual_start_kp)
                print("Actual Next KP:", individual_next_kp)
                print("Velocity:", individual_velocity)
                print("Predicted Next_kp:", predicted_next_kp)
                print("-----------------------------------------")
            loss = criterion(output, next_kp)
            total_loss += loss.item()
            
    # Calculate the average loss
    avg_loss = total_loss / len(test_loader)
    print(f'Average Test Loss: {avg_loss}')

# Usage
model_path = '/home/jc-merlab/Pictures/Data/trained_models/reg_nkp_panda_b64_e100_v1_l5.pth'  # Update with your model path
test_data_dir = '/home/jc-merlab/Pictures/panda_data/panda_sim_vel/path_planning_combined/split_folder_reg/test/annotations/'  # Update with your test data path
test_model(model_path, test_data_dir)

Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.7756, 298.1862, 175.8351,
        277.7733, 229.1696, 194.4117, 246.0231, 208.2677])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.7759, 298.1881, 175.8348,
        277.7754, 229.1673, 194.4123, 246.0235, 208.2640])
Velocity: tensor([0.0000, 0.0000, 0.3435])
Predicted Next_kp: tensor([250.6365, 370.9055, 256.1215, 263.3322, 167.8363, 286.9798, 161.1773,
        264.5270, 246.7390, 206.1593, 248.0871, 216.5385])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.7759, 298.1881, 175.8348,
        277.7754, 229.1673, 194.4123, 246.0235, 208.2640])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.7762, 298.1904, 175.8345,
        277.7778, 229.1646, 194.4130, 246.0238, 208.2600])
Velocity: tensor([0.0000, 0.0000, 0.3435])
Predicted Next_kp: tensor([250.6364, 370.9054, 256.1213, 263.3321, 167.8367, 286.9826, 161.1768,
        264.5304, 2

Predicted Next_kp: tensor([245.8795, 365.7229, 252.8082, 259.6649, 165.0580, 278.2623, 159.4408,
        255.1259, 238.0135, 200.4634, 239.1683, 209.8760])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.4267, 296.9852, 175.8227,
        276.4914, 206.8771, 182.4921, 226.6347, 192.4121])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.4137, 296.9368, 175.8234,
        276.4400, 207.0023, 182.4801, 226.7421, 192.4249])
Velocity: tensor([ 0.0000,  0.0000, -0.3435])
Predicted Next_kp: tensor([245.9298, 365.8210, 252.8793, 259.6856, 165.1124, 278.1171, 159.5486,
        255.0157, 238.3385, 200.4642, 239.4594, 209.8364])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.3802, 296.8125, 175.8251,
        276.3077, 207.3241, 182.4501, 227.0180, 192.4585])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.3653, 296.7573, 175.8259,
        276.2490,

Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 183.5341, 254.1116, 191.1436,
        234.7128, 265.3678, 169.0797, 287.2070, 170.6659])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 183.5341, 254.1115, 191.1437,
        234.7127, 265.3690, 169.0809, 287.2353, 170.3711])
Velocity: tensor([0.0000, 0.0000, 0.3435])
Predicted Next_kp: tensor([245.6038, 371.1699, 256.4962, 260.3792, 166.5268, 252.3367, 167.2141,
        230.2226, 258.1102, 198.0535, 256.0172, 205.6987])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 183.5360, 254.1059, 191.1469,
        234.7076, 265.4127, 169.1208, 285.9398, 160.9210])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 183.5363, 254.1051, 191.1473,
        234.7069, 265.4179, 169.1253, 285.5484, 159.9605])
Velocity: tensor([0.0000, 0.0000, 0.3435])
Predicted Next_kp: tensor([245.0708, 370.5315, 256.0738, 259.8295, 166.3053, 253.1050, 166.4782,
        231.1563, 2

Velocity: tensor([0.0741, 0.0000, 0.0000])
Predicted Next_kp: tensor([233.5483, 351.9582, 243.6916, 248.4193, 165.7879, 206.5251, 176.9420,
        185.4009, 234.3244, 189.3444, 227.5526, 193.4554])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 214.5367, 216.1889, 232.0376,
        204.8872, 172.3392, 125.5558, 192.0593, 117.3639])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 215.2311, 215.7455, 232.8474,
        204.6250, 173.9751, 124.6880, 193.7711, 116.6917])
Velocity: tensor([0.0741, 0.0000, 0.0000])
Predicted Next_kp: tensor([233.6034, 352.1414, 243.7973, 248.4056, 165.7500, 206.5458, 176.8929,
        185.3965, 235.4648, 189.2656, 228.5951, 193.3242])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 215.9301, 215.3092, 233.6599,
        204.3712, 175.6200, 123.8371, 195.4899, 116.0370])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 216.432

Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 302.5574, 217.0770, 319.7650,
        228.7120, 408.5034, 185.3252, 416.1628, 204.1512])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 302.5655, 217.0825, 319.7716,
        228.7195, 407.8911, 184.0889, 415.8111, 202.8050])
Velocity: tensor([0.0000, 0.0893, 0.0000])
Predicted Next_kp: tensor([284.5021, 426.5734, 293.8257, 298.5013, 198.3014, 247.5432, 210.9492,
        225.9387, 335.9851, 241.2117, 321.7114, 250.8012])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 302.8202, 217.2554, 319.9811,
        228.9590, 401.6414, 173.3816, 422.9600, 169.9390])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 302.8281, 217.2608, 319.9876,
        228.9664, 401.6601, 173.4071, 422.9034, 168.9500])
Velocity: tensor([0.0000, 0.0000, 0.3435])
Predicted Next_kp: tensor([280.8933, 423.0223, 291.2752, 295.2643, 196.4579, 247.4548, 207.5398,
        226.4076, 3

Velocity: tensor([ 0.0000,  0.0000, -0.5000])
Predicted Next_kp: tensor([245.8585, 362.2097, 250.2847, 259.7519, 162.8119, 261.5781, 163.2702,
        236.0793, 129.6132, 297.0353, 114.8594, 293.4751])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.6109, 298.2830, 175.6537,
        277.8474,  76.4198, 285.2616,  80.8289, 264.3447])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 179.4932, 297.6669, 175.6973,
        277.2007,  76.4087, 283.8439,  81.2844, 262.9754])
Velocity: tensor([0.1481, 0.0000, 0.0000])
Predicted Next_kp: tensor([245.1407, 361.3382, 249.9300, 259.1546, 162.6809, 261.9002, 162.9433,
        236.3940, 132.5025, 295.4644, 117.9979, 292.7022])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 178.4874, 290.4614, 176.5775,
        269.7325,  77.1043, 267.3397,  83.8071, 246.9197])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 178.

Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 259.9297, 203.4346, 280.7185,
        203.9608, 373.8917, 171.2354, 383.6053, 151.1438])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 259.9366, 203.4348, 280.7252,
        203.9628, 373.9043, 171.2540, 382.4072, 150.6829])
Velocity: tensor([0.0000, 0.0000, 0.5000])
Predicted Next_kp: tensor([266.8051, 400.5672, 275.2769, 279.5211, 185.8739, 228.0868, 198.5453,
        207.5019, 312.1148, 221.3468, 298.4059, 227.3758])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 259.9809, 203.4359, 280.7692,
        203.9755, 375.7910, 177.0734, 381.7446, 155.8703])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 259.9866, 203.4361, 280.7749,
        203.9771, 376.2666, 178.7899, 382.5914, 157.7072])
Velocity: tensor([ 0.0000, -0.1786,  0.0000])
Predicted Next_kp: tensor([267.6602, 401.4406, 275.8621, 280.2863, 186.4115, 227.8560, 199.4397,
        207.1472

Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 323.6071, 237.9794, 335.3747,
        255.1279, 388.2385, 339.4374, 368.7219, 346.6602])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 323.3258, 237.5722, 335.1996,
        254.6468, 386.8670, 339.6920, 367.2552, 346.6446])
Velocity: tensor([ 0.0000, -0.1786,  0.0000])
Predicted Next_kp: tensor([301.2219, 439.8595, 301.7286, 313.0110, 205.5973, 278.5231, 218.6309,
        253.0744, 355.9246, 313.7361, 338.6560, 339.3659])
-----------------------------------------
Start KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 322.9605, 237.0510, 334.9701,
        254.0299, 384.9929, 340.0485, 365.2575, 346.6306])
Actual Next KP: tensor([257.9522, 366.9199, 257.9597, 283.0131, 322.6953, 236.6782, 334.8021,
        253.5875, 383.5551, 340.3289, 363.7299, 346.6255])
Velocity: tensor([ 0.0000, -0.1786,  0.0000])
Predicted Next_kp: tensor([300.6772, 439.0003, 301.2112, 312.4909, 205.6024, 277.1078, 218.7229,
        251.8