In [1]:
import json
import sys
import os
from   scipy.io   import  loadmat
import numpy as np
import pandas as pd
import logging.config
import os
import re
from pathlib import Path
from typing import Any
import random
from collections import defaultdict

# model
from sklearn.model_selection import train_test_split

# Deep Learning
# import torch
# import torch.nn            as nn
# import torch.nn.functional as F
# from torch.optim.optimizer import Optimizer
# from torch.optim.lr_scheduler import LRScheduler
# from torch.utils.data import DataLoader
# from torch.utils.tensorboard import SummaryWriter
# import torchinfo
# from torchmetrics.classification import MulticlassAccuracy
# import torchvision
# from torchvision.transforms import v2 as TorchVisionTrns

sys.path.insert(0, '..')

from src.conf import settings
from src.common import cli


### Data Exploration

In [35]:
# Extract joints names:
def extract_joint_names(readme_path):
    with open(readme_path, 'r') as file:
        readme_content = file.read()
    # Use regular expression to find the section with body parts
    match = re.search(r"The ordering of joints is as follows:(.*?)If you use this dataset", readme_content, re.DOTALL)
    if match:
        labels_section = match.group(1).strip()
        joint_names = [line.strip() for line in labels_section.splitlines()]
    else:
        raise Exception(f"No labels found in the given path: {readme_path}")
    return joint_names

# Example usage:
readme_path = settings.DATA_DIR.joinpath('README.txt')
joint_names = extract_joint_names(readme_path)
print(joint_names)

['Right ankle', 'Right knee', 'Right hip', 'Left hip', 'Left knee', 'Left ankle', 'Right wrist', 'Right elbow', 'Right shoulder', 'Left shoulder', 'Left elbow', 'Left wrist', 'Neck', 'Head top']


In [2]:
# Load .mat file:
def load_joint_annotations(mat_path):
    try:
        mat = loadmat(mat_path)
    except Exception as e:
        print(f"Error loading .mat file: {e}")
        raise
    key = [k for k in mat.keys() if k[0] != '_'][0] #joints
    joint_annotations = mat[key]
    return joint_annotations

# Example usage:
mat_path = settings.LABELS_PATH
joint_annotations = load_joint_annotations(mat_path)
print(joint_annotations.shape)

(14, 3, 9428)


In [5]:
# Get Images filenames:
def get_image_filenames(img_dir):
    return sorted([f for f in os.listdir(img_dir) if f.endswith('.png')])

# Example usage:
img_dir = settings.RAW_DATA_DIR
image_filenames = get_image_filenames(img_dir)
print(len(image_filenames))

9428


##### For HRNet

In [69]:
num_images = len(image_filenames)
num_joints = len(joint_names)

labels = {}
for i in range(num_images):
        img_name = image_filenames[i]
        labels[img_name] = {}
        for j in range(num_joints):
                joint_data = joint_annotations[j,:, i].tolist()
                labels[img_name][joint_names[j]] = joint_data


# create label.json
with open(settings.CONF_DIR.joinpath("labels.json"), 'w') as f:
        json.dump(labels, f, indent=4)

##### For YOLOv5

In [7]:
def yolo_gt_txt_files(folder_path) -> dict[str, list[int]]:
    data_dict = {}
    
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            
            with open(file_path, 'r') as file:
                first_line = file.readline().strip().split()
                
                int_list = []
                for item in first_line:
                    if item.lower() == "human":
                        break
                    try:
                        int_list.append(int(item))
                    except ValueError:
                        print(f"Non-integer value encountered in {filename}: {item}")
                
                # Extract the relevant part of the filename before ".png"
                key = filename.split('_png')[0] + '.png'
                data_dict[key] = int_list

    return data_dict

# Example usage
folder_path = settings.DATA_DIR.joinpath('gt_bbox_yolo')
data = yolo_gt_txt_files(folder_path)
print(data)
# create bbox_label.json
with open(settings.CONF_DIR.joinpath("bbox_labels.json"), 'w') as f:
        json.dump(data, f, indent=4)

Non-integer value encountered in im01655_png.rf.dbf475a2de336d312a7b933c579d879c.txt: 191.00000000000006
Non-integer value encountered in im01655_png.rf.dbf475a2de336d312a7b933c579d879c.txt: 191.00000000000006
Non-integer value encountered in im01655_png.rf.dbf475a2de336d312a7b933c579d879c.txt: Humans
Non-integer value encountered in im00117_png.rf.1e349bf80016d27fd70f991ac6813549.txt: 495.00000000000006
Non-integer value encountered in im00117_png.rf.1e349bf80016d27fd70f991ac6813549.txt: 182.00000000000006
Non-integer value encountered in im00117_png.rf.1e349bf80016d27fd70f991ac6813549.txt: 182.00000000000006
Non-integer value encountered in im00117_png.rf.1e349bf80016d27fd70f991ac6813549.txt: 245.00000000000003
Non-integer value encountered in im00117_png.rf.1e349bf80016d27fd70f991ac6813549.txt: 495.0000000000001
Non-integer value encountered in im00117_png.rf.1e349bf80016d27fd70f991ac6813549.txt: Humans
Non-integer value encountered in im04481_png.rf.39ccfac2e9605928d56c125f09fb4657

### Data Split: Train-Val-Test

### YOLOV5

#### YOLO Data : Train-Val

In [1]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from scratch
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

# Use the model
# model.train(data="coco128.yaml", epochs=3)  # train the model
# metrics = model.val()  # evaluate model performance on the validation set
results = model(datetaset)  # predict on an image
path = model.export(format="onnx") 

ModuleNotFoundError: No module named 'torch'