In [None]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import os
import cv2

In [None]:
def load_model():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = torch.load('yolov7-w6-pose.pt', map_location=device)['model']
    # Put in inference mode
    model.float().eval()
    return model

model = load_model()

In [None]:
@torch.no_grad()
def video_output(frame):
    image = frame
    # Apply transforms
    image = transforms.ToTensor()(image) 
    image = image.type(torch.cuda.FloatTensor)
    # Turn image into batch
    image = image.unsqueeze(0) 
    output, _ = model(image)
    output = non_max_suppression_kpt(output, 
                                     0.25, # Confidence Threshold
                                     0.65, # IoU Threshold
                                     nc=model.yaml['nc'], # Number of Classes
                                     nkpt=model.yaml['nkpt'], # Number of Keypoints
                                     kpt_label=True)
    
    with torch.no_grad():
        output = output_to_keypoint(output)
    nimg = image[0].permute(1, 2, 0) * 255
    nimg = nimg.cpu().numpy().astype(np.uint8)
    nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
    for idx in range(output.shape[0]):
        plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
    
    return  output,nimg

In [None]:
def listout(vid):
  cap = cv2.VideoCapture(vid)
  test=np.empty((0,56))
  '''
  #I tried to have a nice dataset and since all my video is around 30-40 frames. I forced every video to have 30 frame
  # Get the total number of frames in the video
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
  # Generate 30 equally spaced indices for the frames
  frame_indices = np.linspace(0, total_frames-1, 30, dtype=np.int32)
  '''
  for i, idx in enumerate(frame_indices):
      cap.set(cv2.CAP_PROP_POS_FRAMES, idx)  # Set the frame index
      ret, frame = cap.read()  # Read the frame
      if ret:  # If the frame is valid
        frame = letterbox(frame, 640, stride=64, auto=True)[0]
        output, __ = video_output(frame)
        output=output[0,2:]
        test=np.vstack((test,output))
  return test

In [None]:
x=np.empty((0,30,56))
y=np.array([])
label_map = {"unfit": 0, "fit": 1}

In [None]:
#Unfit Dataset

avi_dir = '/content/yolov7/GEMEP_Vids_folders/irritation'

# Get a list of all .avi files in the directory
avi_files = [f for f in os.listdir(avi_dir) if f.endswith('.avi')]

# Loop through the list of file names and pass each name to the listout function
for avi_file in avi_files:
    video_name =  os.path.join(avi_dir, avi_file)
    kp=listout(video_name)
    kp=kp[np.newaxis, ...]
    x = np.vstack((x, kp))
    y=np.hstack((y, np.array([0])))

In [None]:
#Fit DataSet

avi_dir = '/content/yolov7/GEMEP_Vids_folders/relief'

# Get a list of all .avi files in the directory
avi_files = [f for f in os.listdir(avi_dir) if f.endswith('.avi')]

# Loop through the list of file names and pass each name to the listout function
for avi_file in avi_files:
    video_name =  os.path.join(avi_dir, avi_file)
    kp=listout(video_name)
    kp=kp[np.newaxis, ...]
    x = np.vstack((x, kp))
    y=np.hstack((y, np.array([1])))

In [None]:
Y = torch.tensor(y, device='cuda').long()
X = torch.tensor(x, device='cuda', dtype=torch.float)
Y=nn.functional.one_hot(Y, num_classes=2).float()

In [None]:
# Split the data into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.25, random_state=42)
# Create PyTorch DataLoader objects for batch training
train_dataset = TensorDataset(X_train, Y_train)
val_dataset = TensorDataset(X_val, Y_val)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [None]:
torch.save(train_dataset, "train_dataset.pt")
torch.save(val_dataset, "val_dataset.pt")