In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q torch transformers tqdm matplotlib numpy pandas torchmetrics sentence-transformers

# **Find bad data**

In [None]:
import os
import json

def list_files(root_dir):
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            yield os.path.join(dirpath, filename)

folder_path = '/content/drive/MyDrive/txt2openpose-Data - Copy'
for file_path in list_files(folder_path):
  with open(file_path) as f:
    data = json.load(f)
    if (data["canvas_width"] != 900 or data["canvas_height"] != 300 or len(data["people"]) != 5 ): print(file_path)

# **Count Data**

In [None]:
import os

def count_files(directory):
    total_files = 0
    for root, dirs, files in os.walk(directory):
        total_files += len(files)
    return total_files

def display_tree(directory, indent=0):
    if not os.path.isdir(directory):
        return

    # Display current directory
    print("|   " * indent + "|---" + os.path.basename(directory) + ((15 - len(os.path.basename(directory))) * " "), end="")

    # Count files in current directory
    file_count = count_files(directory)
    print(" ({0} Sequences)".format(file_count))

    # Display subdirectories recursively
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isdir(item_path):
            display_tree(item_path, indent + 1)

# Replace 'path_to_your_directory' with the path to your directory
display_tree('/content/drive/MyDrive/txt2openpose-Data - Copy')

# **Plot to see data**

In [None]:
folder_path = '/content/drive/MyDrive/txt2openpose-Data - Copy'

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np

def plot_fromPerson(person, person_idx):
        keypoints = person['pose_keypoints_2d']
        keypoints = np.array(keypoints).reshape(-1, 3)

        # Plot keypoints
        plt.scatter(keypoints[:, 0], keypoints[:, 1], s=10, c='r')

        # Connect keypoints
        for i, j in [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8),
                     (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)]:
            plt.plot([keypoints[i, 0], keypoints[j, 0]],
                     [keypoints[i, 1], keypoints[j, 1]], 'r')

        # Add label for each person
        plt.text(keypoints[0, 0], keypoints[0, 1], f'Person {person_idx}', fontsize=10, color='blue')

def plot_openpose(json_file):
    with open(json_file) as f:
        data = json.load(f)

    plt.figure(figsize=(8, 8))
    plt.imshow(np.zeros((300, 900, 3)))  # Create an empty image to plot keypoints on

    for idx, person in enumerate(data['people']):
      plot_fromPerson(person, idx)

    plt.title(json_file)
    plt.gca()  # Invert y-axis to match image coordinate system
    plt.show()

# Example usage
json_file = '/content/drive/MyDrive/txt2openpose-Data - Copy/Walk/Forward/White-queen_walk_girl_woman.json'
plot_openpose(json_file)

In [None]:
import random
import os

def get_random_file_paths(folder_path, num_files=5):
    file_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_paths.append(os.path.join(root, file))

    random.shuffle(file_paths)
    num_files = min(num_files, len(file_paths))
    return random.sample(file_paths, num_files)

# Example usage:
random_file_paths = get_random_file_paths(folder_path, 5)
for path in random_file_paths:
    plot_openpose(path)

# **Grouped Keypoints**

In [None]:
def groupedKeypointsForPerson(person):
  pose_keypoints_2d = person["pose_keypoints_2d"]
  grouped_keypoints = []

  for i in range(len(pose_keypoints_2d)):
    if (i+1) % 3 == 0:
      grouped_keypoints.append([pose_keypoints_2d[i-2], pose_keypoints_2d[i-1]])

  return grouped_keypoints

def groupedKeypointForJSON(json_path):
  newData = []
  with open(json_path) as f:
    data = json.load(f)
    for i, person in enumerate(data['people']):
      grouped_keypoints = groupedKeypointsForPerson(person)
      newData.append(grouped_keypoints)
  return newData

In [None]:
import os

newPosesData = []

def list_files(root_dir):
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            yield os.path.join(dirpath, filename)

for file_path in list_files(folder_path):
  new_json_keypoints_data = groupedKeypointForJSON(file_path)
  newPosesData.append(new_json_keypoints_data)

for i, data in enumerate(newPosesData):
  print("index: {}, length: {}, data: {}".format(i, len(data), data))

# **Create text descirption from file path**

In [None]:
import re

text_description_list = []

def list_files(root_dir):
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            yield os.path.join(dirpath, filename)

for file_path in list_files(folder_path):
  split_path = file_path.split('/')
  text_description_end_json = split_path[5] + " " + split_path[6] + " " + split_path[7]
  text_description = text_description_end_json[0:len(text_description_end_json) - 5]
  text_description_list.append(" ".join(list(dict.fromkeys(text_description.lower().split(" ")))))

for i, data in enumerate(text_description_list):
    print("index: {}, length: {}, data: {}".format(i, len(data), data))

# **Create text(x) points(y) data**

In [None]:
import numpy as np

keypoint_poses_data = np.array(newPosesData).reshape(len(newPosesData), 180).tolist()
train_test_data = []
for i, text_description in enumerate(text_description_list):
  data = [text_description, keypoint_poses_data[i]]
  train_test_data.append(data)

for data in train_test_data:
  print(data)

# **Split train test data**

In [None]:
import torch

In [None]:
import math

dataset_size = len(train_test_data)

train_size = math.floor(0.8 * dataset_size)
test_size = dataset_size - train_size

train_data, test_data = torch.utils.data.random_split(train_test_data, [train_size, test_size])

print(len(train_data))
print(len(test_data))

# **Dataloader**

In [None]:
from sentence_transformers import SentenceTransformer
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
#Dataloader
def collate_batch(batch):
    processed_texts = []
    processed_poses = []
    for text, poses in batch:
      processed_text = torch.tensor(sentence_model.encode(text), dtype=torch.float)
      processed_texts.append(processed_text)

      poses_output = torch.tensor(poses, dtype=torch.float)
      processed_poses.append(poses_output)

    processed_texts = torch.stack(processed_texts)
    processed_poses = torch.stack(processed_poses)
    return processed_texts, processed_poses

from torch.utils.data import DataLoader
train_dataloader = DataLoader(
    train_data, batch_size=1, shuffle=True, collate_fn=collate_batch
)
test_dataloader = DataLoader(
    test_data, batch_size=1, shuffle=False, collate_fn=collate_batch
)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_fromPerson(person, person_idx):
        keypoints = person
        keypoints = np.array(keypoints).reshape(-1, 2)

        # Plot keypoints
        plt.scatter(keypoints[:, 0], keypoints[:, 1], s=10, c='r')

        # Connect keypoints
        for i, j in [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8),
                     (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)]:
            plt.plot([keypoints[i, 0], keypoints[j, 0]],
                     [keypoints[i, 1], keypoints[j, 1]], 'r')

        # Add label for each person
        plt.text(keypoints[0, 0], keypoints[0, 1], f'Person {person_idx}', fontsize=10, color='blue')

def plot_openpose(people):
    plt.figure(figsize=(8, 8))
    plt.imshow(np.zeros((300, 900, 3)))  # Create an empty image to plot keypoints on

    for idx, person in enumerate(people):
      plot_fromPerson(person, idx)

    plt.gca()  # Invert y-axis to match image coordinate system
    plt.show()

In [None]:
import numpy as np

for i, batch in enumerate(train_dataloader):
  print(batch[0].shape, batch[1].shape)
  print("Batch: ", i+1)
  for idx, data in enumerate(batch[1]):
    plot_openpose(np.array(data).reshape(5, 36))

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# **Dense Model**

In [None]:
import torch
import torch.nn as nn

class Dense(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim):
        super(Dense, self).__init__()
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, hidden_dim)
        self.o = nn.Linear(hidden_dim, output_dim)

    def forward(self, embeddings):
        x = self.fc1(embeddings)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        output = self.o(x)
        return output

In [None]:
embedding_dim = 384
hidden_dim = 512
output_dim = 180
num_epochs = 100
learning_rate = 0.001

In [None]:
dense_model = Dense(embedding_dim, hidden_dim, output_dim).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(dense_model.parameters(), lr=learning_rate)

In [None]:
def get_gradient_norms(model):
    total_norm = 0.0
    for param in model.parameters():
        if param.grad is not None:
            param_norm = param.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    total_norm = total_norm ** (1. / 2)
    return total_norm

In [None]:
trainingEpoch_loss = []
validationEpoch_loss = []
gradient_norms = []

for epoch in range(num_epochs):
    step_loss = []
    dense_model.train()
    for idx, train_inputs in enumerate(train_dataloader):
        optimizer.zero_grad()
        outputs = dense_model(train_inputs[0].to(device))
        training_loss = criterion(outputs, train_inputs[1].to(device))
        training_loss.backward()

        grad_norm = get_gradient_norms(dense_model)
        gradient_norms.append(grad_norm)

        optimizer.step()
        step_loss.append(training_loss.item())

        if (idx+1) % 1 == 0: print (f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{len(train_dataloader)}], Loss: {training_loss.item():.4f}')
    trainingEpoch_loss.append(np.array(step_loss).mean())

    #dense_model.eval()
    #for idx, val_inputs in enumerate(val_dataloader):
    #  validationStep_loss = []
    #  outputs = dense_model(val_inputs[0].to(device))
    #  val_loss = criterion(outputs, val_inputs[1].to(device))
    #  validationStep_loss.append(val_loss.item())
    #validationEpoch_loss.append(np.array(validationStep_loss).mean())

In [None]:
from matplotlib import pyplot as plt
plt.plot(gradient_norms)
plt.xlabel('Batch number')
plt.ylabel('Gradient norm')
plt.title('Gradient Norms during Training')
plt.show()

In [None]:
from matplotlib import pyplot as plt
plt.plot(trainingEpoch_loss, label='train_loss')
plt.plot(validationEpoch_loss,label='val_loss')
plt.legend()
plt.show

In [None]:
#Save model to save weight folder
model_save_name = 'dense_5layers_1batch_noval.pt'
path = F"/content/drive/MyDrive/Save Weight/{model_save_name}"
torch.save(dense_model.state_dict(), path)

In [None]:
#Load saved model
dense_model.load_state_dict(torch.load("/content/drive/MyDrive/Save Weight/dense_5layers_1batch_noval.pt"))

In [None]:
import numpy as np

dense_model.eval()
mae = []
MAELoss = nn.L1Loss()
with torch.no_grad():
  for i, batch in enumerate(test_dataloader):
    outputs = dense_model(batch[0].to(device))
    test_loss = MAELoss(outputs, batch[1].to(device))
    mae.append(test_loss)

print("MAE Mean: ", np.array(mae).mean())

In [None]:
text = "jump up"
processed_text = processed_text = torch.tensor(sentence_model.encode(text), dtype=torch.float)
output_poses = dense_model(processed_text.to(device))
print(output_poses.shape)

In [None]:
keypoints_data = output_poses.tolist()
people = []
count = 1
for i, xy in enumerate(keypoints_data):
  if (i+1) % 36 == 0:
    people.append(keypoints_data[i+1-36 : 36*count])
    count += 1

people = np.array(people).reshape(5 ,36)
print(people.shape)
for person in people.tolist():
  print(person)

In [None]:
plot_openpose(people)

# **RNN**

In [None]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, num_layers, output_dim):
        super(RNN, self).__init__()
        self.rnn1 = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.rnn2 = nn.RNN(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.rnn3 = nn.RNN(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.rnn4 = nn.RNN(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.rnn5 = nn.RNN(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.o = nn.Linear(hidden_dim, output_dim)

    def forward(self, embedding):
        o_n1, h_n1 = self.rnn1(embedding)
        o_n2, h_n2 = self.rnn2(o_n1, h_n1)
        o_n3, h_n3 = self.rnn3(o_n2, h_n2)
        o_n4, h_n4 = self.rnn4(o_n3, h_n3)
        o_n5, h_n5 = self.rnn5(o_n4, h_n4)
        output = self.o(o_n5)
        return output

In [None]:
embedding_dim = 384
hidden_dim = 512
num_layers = 1
output_dim = 180
num_epochs = 100
learning_rate = 0.001

In [None]:
rnn_model = RNN(embedding_dim, hidden_dim, num_layers, output_dim).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn_model.parameters(), lr=learning_rate)

In [None]:
def get_gradient_norms(model):
    total_norm = 0.0
    for param in model.parameters():
        if param.grad is not None:
            param_norm = param.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    total_norm = total_norm ** (1. / 2)
    return total_norm

In [None]:
trainingEpoch_loss = []
validationEpoch_loss = []
gradient_norms = []

for epoch in range(num_epochs):
    step_loss = []
    rnn_model.train()
    for idx, train_inputs in enumerate(train_dataloader):
        train_text, train_poses = train_inputs
        train_text = train_text.to(device)
        train_poses = train_poses.to(device)

        optimizer.zero_grad()
        outputs = rnn_model(train_text)
        training_loss = criterion(outputs, train_poses)
        training_loss.backward()

        grad_norm = get_gradient_norms(rnn_model)
        gradient_norms.append(grad_norm)

        optimizer.step()
        step_loss.append(training_loss.item())

        if (idx+1) % 1 == 0:
          #for poses in outputs:
          #  plot_openpose(poses.cpu().detach().numpy().reshape(5, 36))
          print (f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{len(train_dataloader)}], Loss: {training_loss.item():.4f}')
    trainingEpoch_loss.append(np.array(step_loss).mean())

    #rnn_model.eval()
    #for idx, val_inputs in enumerate(val_dataloader):
    #  validationStep_loss = []
    #  val_text, val_poses = val_inputs
    #  val_text = val_text.to(device)
    #  val_poses = val_poses.to(device)
    #  outputs = rnn_model(val_text)
    #  val_loss = criterion(outputs, val_poses)
    #  validationStep_loss.append(val_loss.item())
    #validationEpoch_loss.append(np.array(validationStep_loss).mean())

In [None]:
from matplotlib import pyplot as plt
plt.plot(gradient_norms)
plt.xlabel('Batch number')
plt.ylabel('Gradient norm')
plt.title('Gradient Norms during Training')
plt.show()

In [None]:
from matplotlib import pyplot as plt
plt.plot(trainingEpoch_loss, label='train_loss')
plt.plot(validationEpoch_loss,label='val_loss')
plt.legend()
plt.show

In [None]:
#Save model to save weight folder
model_save_name = 'rnn_numlayers=1_stack5_noval.pt'
path = F"/content/drive/MyDrive/Save Weight/{model_save_name}"
torch.save(rnn_model.state_dict(), path)

In [None]:
#Load saved model
rnn_model.load_state_dict(torch.load("/content/drive/MyDrive/Save Weight/rnn_numlayers=3_stack5_noval.pt"))

In [None]:
import numpy as np

rnn_model.eval()
mae = []
MAELoss = nn.L1Loss()
with torch.no_grad():
  for i, batch in enumerate(test_dataloader):
    outputs = rnn_model(batch[0].to(device))
    test_loss = MAELoss(outputs, batch[1].to(device))
    mae.append(test_loss)

print("MAE Mean: ", np.array(mae).mean())

In [None]:
text = "walk forward"
text2 = "run sprint"
processed_text = torch.tensor(sentence_model.encode(text), dtype=torch.float).to(device)
processed_text2 = torch.tensor(sentence_model.encode(text2), dtype=torch.float).to(device)
output_poses = rnn_model(processed_text.unsqueeze(0))
output_poses2 = rnn_model(processed_text2.unsqueeze(0))
#print(output_poses.shape)
print(output_poses[0] == output_poses2[0])

In [None]:
keypoints_data = output_poses[0].tolist()
people = []
count = 1
for i, xy in enumerate(keypoints_data):
  if (i+1) % 36 == 0:
    people.append(keypoints_data[i+1-36 : 36*count])
    count += 1

print(len(people))
for person in people:
  print(person)

In [None]:
plot_openpose(people)

# **LSTM**

In [None]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.lstm3 = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.lstm4 = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.lstm5 = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.o = nn.Linear(hidden_dim, output_dim)

    def forward(self, embedding):
        o_n1, (h_n1, c_n1) = self.lstm1(embedding)
        o_n2, (h_n2, c_n2) = self.lstm2(o_n1, (h_n1, c_n1))
        o_n3, (h_n3, c_n3) = self.lstm3(o_n2, (h_n2, c_n2))
        o_n4, (h_n4, c_n4) = self.lstm4(o_n3, (h_n3, c_n3))
        o_n5, (h_n5, c_n5) = self.lstm5(o_n4, (h_n4, c_n4))
        output = self.o(o_n5)
        return output

In [None]:
embedding_dim = 384
hidden_dim = 512
num_layers = 1
output_dim = 180
num_epochs = 100
learning_rate = 0.001

In [None]:
LSTM_model = LSTM(embedding_dim, hidden_dim, num_layers, output_dim).to(device)

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(LSTM_model.parameters(), lr=learning_rate)

In [None]:
def get_gradient_norms(model):
    total_norm = 0.0
    for param in model.parameters():
        if param.grad is not None:
            param_norm = param.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    total_norm = total_norm ** (1. / 2)
    return total_norm

In [None]:
trainingEpoch_loss = []
validationEpoch_loss = []
gradient_norms = []

for epoch in range(num_epochs):
    step_loss = []
    LSTM_model.train()
    for idx, train_inputs in enumerate(train_dataloader):
        train_text, train_poses = train_inputs
        train_text = train_text.to(device)
        train_poses = train_poses.to(device)

        optimizer.zero_grad()
        outputs = LSTM_model(train_text)
        training_loss = criterion(outputs, train_poses)
        training_loss.backward()

        grad_norm = get_gradient_norms(LSTM_model)
        gradient_norms.append(grad_norm)

        optimizer.step()
        step_loss.append(training_loss.item())

        if (idx+1) % 1 == 0: print (f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{len(train_dataloader)}], Loss: {training_loss.item():.4f}')
    trainingEpoch_loss.append(np.array(step_loss).mean())

    #LSTM_model.eval()
    #for idx, val_inputs in enumerate(val_dataloader):
    #  validationStep_loss = []
    #  outputs = LSTM_model(val_inputs[0])
    #  val_loss = criterion(outputs, val_inputs[1])
    #  validationStep_loss.append(val_loss.item())
    #validationEpoch_loss.append(np.array(validationStep_loss).mean())

In [None]:
from matplotlib import pyplot as plt
plt.plot(gradient_norms)
plt.xlabel('Batch number')
plt.ylabel('Gradient norm')
plt.title('Gradient Norms during Training')
plt.show()

In [None]:
from matplotlib import pyplot as plt
plt.plot(trainingEpoch_loss, label='train_loss')
plt.plot(validationEpoch_loss,label='val_loss')
plt.legend()
plt.show

In [None]:
#Save model to save weight folder
model_save_name = 'lstm_numlayers=1_stack5_noval.pt'
path = F"/content/drive/MyDrive/Save Weight/{model_save_name}"
torch.save(LSTM_model.state_dict(), path)

In [None]:
#Load saved model
LSTM_model.load_state_dict(torch.load("/content/drive/MyDrive/Save Weight/lstm_numlayers=1_stack5_noval.pt"))

In [None]:
import numpy as np

LSTM_model.eval()
mae = []
MAELoss = nn.L1Loss()
with torch.no_grad():
  for i, batch in enumerate(test_dataloader):
    outputs = LSTM_model(batch[0].to(device))
    test_loss = MAELoss(outputs, batch[1].to(device))
    mae.append(test_loss)

print("MAE Mean: ", np.array(mae).mean())

In [None]:
text = "a man running sprint"
processed_text = torch.tensor(sentence_model.encode(text), dtype=torch.float)
output_poses = LSTM_model(processed_text.unsqueeze(0))
print(output_poses)

In [None]:
plot_openpose(output_poses.cpu().detach().numpy().reshape(5, 36))

In [None]:
people = output_poses.cpu().detach().numpy().reshape(5, 18, 2).tolist()

newPeople = []
for person in people:
  newPerson = []
  for keypoints in person:
    newPerson.append([keypoints[0], keypoints[1], 1])
  newPeople.append(newPerson)

print(np.array(newPeople).shape)

In [None]:
def write_openpose_json(data, file_path):
    with open(file_path, 'w') as f:
        json.dump({ 'people': data, 'animals': [], 'canvas_width': 900, 'canvas_height': 300 }, f, indent=4)

data = np.array(newPeople).reshape(5, 54).tolist()
formatted_data = []
for person in data:
  formatted_data.append({ "pose_keypoints_2d": person })
file_path = 'lstm_generated_poses_' + text + '.json'
write_openpose_json(formatted_data, file_path)

# **CNN**

In [None]:
import torch
import torch.nn as nn

class CNNModel(nn.Module):
    def __init__(self, output_dim):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)

        self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)

        self.fc1 = nn.Linear(64 * 48, 256)
        self.fc2 = nn.Linear(256, output_dim)

        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)

        x = x.view(-1, 64 * 48)

        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
embedding_dim = 384
output_dim = 180
num_epochs = 100
learning_rate = 0.001

In [None]:
CNN_model = CNNModel(output_dim).to(device)

In [None]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(CNN_model.parameters(), lr=learning_rate)

In [None]:
def get_gradient_norms(model):
    total_norm = 0.0
    for param in model.parameters():
        if param.grad is not None:
            param_norm = param.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    total_norm = total_norm ** (1. / 2)
    return total_norm

In [None]:
trainingEpoch_loss = []
validationEpoch_loss = []
gradient_norms = []

for epoch in range(num_epochs):
    step_loss = []
    CNN_model.train()
    for idx, train_inputs in enumerate(train_dataloader):
        optimizer.zero_grad()
        outputs = CNN_model(train_inputs[0].unsqueeze(0).to(device))
        training_loss = criterion(outputs, train_inputs[1].to(device))
        training_loss.backward()

        grad_norm = get_gradient_norms(CNN_model)
        gradient_norms.append(grad_norm)

        optimizer.step()
        step_loss.append(training_loss.item())

        if (idx+1) % 1 == 0: print (f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{len(train_dataloader)}], Loss: {training_loss.item():.4f}')
    trainingEpoch_loss.append(np.array(step_loss).mean())

    #CNN_model.eval()
    #for idx, val_inputs in enumerate(val_dataloader):
    #  validationStep_loss = []
    #  outputs = CNN_model(val_inputs[0].unsqueeze(0).to(device))
    #  val_loss = criterion(outputs, val_inputs[1].to(device))
    #  validationStep_loss.append(val_loss.item())
    #validationEpoch_loss.append(np.array(validationStep_loss).mean())

In [None]:
from matplotlib import pyplot as plt
plt.plot(gradient_norms)
plt.xlabel('Batch number')
plt.ylabel('Gradient norm')
plt.title('Gradient Norms during Training')
plt.show()

In [None]:
from matplotlib import pyplot as plt
plt.plot(trainingEpoch_loss, label='train_loss')
plt.plot(validationEpoch_loss,label='val_loss')
plt.legend()
plt.show

In [None]:
#Save model to save weight folder
model_save_name = 'cnn_3layers_noval.pt'
path = F"/content/drive/MyDrive/Save Weight/{model_save_name}"
torch.save(CNN_model.state_dict(), path)

In [None]:
#Load saved model
CNN_model.load_state_dict(torch.load("/content/drive/MyDrive/Save Weight/cnn_3layers_noval.pt"))

In [None]:
import numpy as np

CNN_model.eval()
mae = []
MAELoss = nn.L1Loss()
with torch.no_grad():
  for i, batch in enumerate(test_dataloader):
    outputs = CNN_model(batch[0].to(device))
    test_loss = MAELoss(outputs, batch[1].to(device))
    mae.append(test_loss)

print("MAE Mean: ", np.array(mae).mean())

In [None]:
text = "dead front"
processed_text = torch.tensor(sentence_model.encode(text), dtype=torch.float)
output_poses = CNN_model(processed_text.unsqueeze(0).to(device))
print(output_poses.shape)

In [None]:
keypoints_data = output_poses[0].tolist()
people = []
count = 1
for i, xy in enumerate(keypoints_data):
  if (i+1) % 36 == 0:
    people.append(keypoints_data[i+1-36 : 36*count])
    count += 1

print(len(people))
for person in people:
  print(person)

In [None]:
plot_openpose(people)