In [21]:
import torch 
from torch import nn 
import torch; torch.manual_seed(42)
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torch.distributions
import torchvision
import numpy as np
import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200
import pandas as pd
from os import listdir, path
import glob
from sklearn.model_selection import train_test_split
import torch.optim as optim
import torch.distributions as dist
import os 

In [60]:
data_dir = 'bvh_to_csv'
data_files = glob.glob(data_dir + '/*.csv')
tmps = listdir(data_dir)
ems = []
for i in range(len(tmps)):
    tmp = tmps[i].split('.')
    if tmp[0][4].isalpha():
        emotion = tmp[0][3:5]
    else:
        emotion = tmp[0][3]
    ems.append(emotion)
    
mapping = {
    'A': 0,
    'D': 1,
    'F': 2,
    'H': 3,
    'N': 4,
    'SA': 5,
    'SU': 6
}

# Use list comprehension to convert elements to numbers
labels = [mapping[item] for item in ems]

In [61]:
len(labels)

1401

In [62]:


# Define the path to the directory containing the mocap data files
data_dir = 'bvh_to_csv'

# List all the mocap data files
data_files = glob.glob(data_dir + '/*.csv')

# Initialize empty lists to store the data
mocap_data = []
emotion_labels = []
length = []
# Load the data from each file
for file in data_files:
    # Load the file and extract the coordinates
    file_data = np.genfromtxt(file, delimiter=',')


    # print(file_data.shape)
   
    # print(max_length)

    # padded_mocap_data = []
    # for sequence in mocap_data:
    #     num_frames_to_pad = max_length - sequence.shape[0]
    #     last_row = sequence[-1]  # Get the last row
    #     padded_sequence = np.concatenate((sequence, np.tile(last_row, (num_frames_to_pad, 1))), axis=0)
    #     padded_mocap_data.append(padded_sequence)

    # file_data = pd.read_csv(file, sep=',', skiprows=1)
    # file_data = file_data.iloc[:,1:]
# print(data)
    
    min_val = np.min(file_data)
    normalized_data = file_data - min_val
    range_val = np.max(file_data) - min_val
    normalized_data /= range_val
    file_data = normalized_data
    
#     # Append the coordinates to the mocap_data list
    mocap_data.append(file_data)


max_length = max([sequence.shape[0] for sequence in mocap_data])

padded_mocap_data = []
for sequence in mocap_data:
    num_frames_to_pad = max_length - sequence.shape[0]
    last_row = sequence[-1]  # Get the last row
    padded_sequence = np.concatenate((sequence, np.tile(last_row, (num_frames_to_pad, 1))), axis=0)
    padded_mocap_data.append(padded_sequence)
# Convert the emotion labels to a NumPy array
emotion_label = np.array(labels)

# Perform any necessary preprocessing steps on the mocap data and emotion labels
# For example, you can normalize the data or encode the emotion labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_mocap_data, emotion_label, test_size=0.2, random_state=42)
print(max_length)

1989


In [64]:
def kl_divergence_loss(mu, logvar):
    kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return kl_loss

def reconstruction_loss(input,target):
    recon_loss = nn.CrossEntropyLoss()(input,target)
    return recon_loss

def total_loss( mu, logvar,input,target):
    kl_loss = kl_divergence_loss(mu, logvar)
    recon_loss = reconstruction_loss(input,target)

    total_loss = 0.005 * kl_loss + 0.95* recon_loss
    return total_loss




In [65]:
class BatchFlatten(nn.Module):
    def __init__(self):
        super(BatchFlatten,self).__init__()
        self._name = 'batch_flatten'
    def forward(self, x):
        return x.view(x.shape[0],-1)

In [67]:
class NormalDistDecoder(nn.Module):
    def __init__(self, num_feat_in, latentD):
        super(NormalDistDecoder, self).__init__()

        self.mu = nn.Linear(num_feat_in, latentD)
        self.logvar = nn.Linear(num_feat_in, latentD)

    def forward(self, Xout):
        mu_logvar =torch.distributions.normal.Normal(self.mu(Xout), F.softplus(self.logvar(Xout)))
        return mu_logvar
        


In [69]:
class VPoser(nn.Module):
    def __init__(self):
        super(VPoser, self).__init__()

        num_neurons, self.latentD = 512,512
        nb_frames = 1989

        self.num_joints = 58
        n_features = self.num_joints * 3 * nb_frames

        self.encoder_net = nn.Sequential(
            BatchFlatten(),
            nn.BatchNorm1d(n_features),
            nn.Linear(n_features, num_neurons),
            nn.LeakyReLU(),
            nn.BatchNorm1d(num_neurons),
            nn.Dropout(0.1),
            nn.Linear(num_neurons, num_neurons),
            nn.LeakyReLU(),
            nn.Linear(num_neurons, self.latentD),
            # NormalDistDecoder(num_neurons, self.latentD)
            
            
            
        )

        self.decoder_net = nn.Sequential(
            nn.Linear(self.latentD, num_neurons),
            nn.LeakyReLU(),
            nn.Dropout(0.1),
            nn.Linear(num_neurons, num_neurons),
            nn.LeakyReLU(),
            nn.Linear(num_neurons, n_features),
            # ContinousRotReprDecoder(),
        )
        self.mu_net = nn.Linear(512, 512)
        self.logvar_net = nn.Linear(512,512)
        # self.decoder_net = NormalDistDecoder(self.latentD, 512)




    def encode(self, pose_body):
        '''
        :param Pin: Nx(numjoints*3)
        :param rep_type: 'matrot'/'aa' for matrix rotations or axis-angle
        :return:
        
        '''
        # print('posebody', pose_body.shape)
        # self.mu_net = nn.Linear(32, 512)
        # self.logvar_net = nn.Linear(32,512)
        q_z = self.encoder_net(pose_body)
        # print("qz",q_z.shape)
        mu = self.mu_net(q_z)
        logvar = self.logvar_net(q_z)
        return q_z, mu, logvar

    def decode(self, Zin):
        # print('zin' ,Zin.shape)
        bs = Zin.shape[0]
        # print(bs)

        prec = self.decoder_net(Zin)
        prec1 = prec.view(bs,-1,174)
        # print('prec' ,prec1.shape)

        return {
            # 'pose_body': matrot2aa(prec.view(-1, 3, 3)).view(bs, -1, 3),
            'pose_body_matrot': prec.view(bs,-1,174)
        }


    def forward(self, pose_body):    

        q_z, mu, logvar = self.encode(pose_body)
        # print(q_z.shape)
        # q_z_sample = q_z.rsample()
        q_z_sample = self.reparameterize(mu, logvar)
        # print('qz sample ', q_z_sample.shape)

        decode_results = self.decode(q_z_sample)
        pose_body_decoded = decode_results['pose_body_matrot']  # Extract the decoded pose tensor

        # decode_results.update({'poZ_body_mean': q_z.mean, 'poZ_body_std': q_z.scale, 'q_z': q_z})
        return pose_body_decoded, mu , logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    
    
    def sample_poses(self, num_poses, seed=None):
        np.random.seed(seed)

        some_weight = [a for a in self.parameters()][0]
        dtype = some_weight.dtype
        device = some_weight.device
        self.eval()
        with torch.no_grad():
            Zgen = torch.tensor(np.random.normal(0., 1., size=(num_poses, self.latentD)), dtype=dtype, device=device)

        return self.decode(Zgen)
    

In [70]:
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset

# Convert the mocap data and emotion labels to NumPy arrays
X_train_array = np.array(X_train, dtype=np.float32)
y_train_array = np.array(y_train, dtype=np.compat.long)
X_test_array = np.array(X_test, dtype=np.float32)
y_test_array = np.array(y_test, dtype=np.compat.long)

# Convert NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_array)
y_train_tensor = torch.tensor(y_train_array)
X_test_tensor = torch.tensor(X_test_array)
y_test_tensor = torch.tensor(y_test_array)

# Create a custom dataset class to load the data
class MocapDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Create DataLoader for training and testing sets
train_dataset = MocapDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False)

test_dataset = MocapDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [79]:


# Define the VPoser model
model = VPoser()
optimizer = optim.Adam(model.parameters(), lr=0.000001)
criterion = nn.CrossEntropyLoss()
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for batch_data, batch_labels in train_loader:
        # print(batch_data[0][0])
        optimizer.zero_grad()
        outputs, mu, logvar = model(batch_data)
        # print(outputs[0][0])
       
        
        labels = batch_labels.repeat_interleave(174)
        # print(labels.shape)

# Reshape the tensor to size (128, 3)
        labels = labels.reshape(-1,174)
# ENCODE THE LABELS AND DO A CLUSTERING  SEE THE DISTROBUTION OF LATENT 
        # print(labels.shape)
        labels = labels.long()    
        # loss = criterion(outputs, labels)
        loss = total_loss(mu,logvar,outputs,labels) 
        optimizer.step()



        mse_criterion = torch.nn.MSELoss()
        mse = mse_criterion(outputs, batch_data)
    print("Mean Squared Error:", mse.item())

      
       

        
# LATENT PROBABILITY  
    # Print the loss at the end of each epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss}")


Mean Squared Error: 0.2621057331562042
Epoch 1/10, Loss: 10.963930130004883
Mean Squared Error: 0.2623075842857361
Epoch 2/10, Loss: 10.983806610107422
Mean Squared Error: 0.2622583210468292
Epoch 3/10, Loss: 10.903276443481445
Mean Squared Error: 0.2621668577194214
Epoch 4/10, Loss: 11.008668899536133
Mean Squared Error: 0.2619835138320923
Epoch 5/10, Loss: 10.939143180847168
Mean Squared Error: 0.2619412839412689
Epoch 6/10, Loss: 10.955436706542969
Mean Squared Error: 0.2619992792606354
Epoch 7/10, Loss: 10.964600563049316
Mean Squared Error: 0.2619350552558899
Epoch 8/10, Loss: 10.923131942749023
Mean Squared Error: 0.2621893882751465
Epoch 9/10, Loss: 10.911256790161133
Mean Squared Error: 0.2622235119342804
Epoch 10/10, Loss: 11.017494201660156


In [74]:
model.eval()
test_loss = 0.0
num_samples = 0
for batch_data, batch_labels in test_loader:
    outputs, mu, logvar = model(batch_data)
    labels = batch_labels.repeat_interleave(174)
    labels = labels.reshape(-1,174)
    labels = labels.long()    
    # loss = criterion(outputs,labels)
    loss = total_loss(mu,logvar,outputs,labels)
    
print(f" Loss: {loss} ")




    


 Loss: 8.05859661102295 
