In [2]:
import os.path as osp
import os
import numpy as np
import pickle
import logging
from tqdm import tqdm

In [3]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:

def get_raw_bodies_data(skes_path, ske_name, frames_drop_skes, frames_drop_logger):
    """
    This function reads raw skeleton data from .skeleton files.

    Args:
        skes_path (str): Path to the directory containing skeleton files.
        ske_name (str): Name of the skeleton file.
        frames_drop_skes (dict): Dictionary to store dropped frames.
        frames_drop_logger (Logger): Logger for frames dropped.

    Returns:
        dict: A dictionary containing raw skeleton data with the following keys:
            - num_bodies: the number of detected bodies.
            - bodies: a list of body data, each element is a dictionary containing:
                - num_joints: the number of detected joints for this body.
                - joints: a list of joint coordinates for this body.
            - num_frames: the number of valid frames.
    """
    ske_file = osp.join(skes_path, str(ske_name[1]) + '.skeleton')  # Corrected file path construction
    assert osp.exists(ske_file), 'Error: Skeleton file %s not found' % ske_file
    # Read all data from .skeleton file into a list (in string format)
    with open(ske_file, 'r') as f:
        str_data = f.readlines()  # Read lines from the file


    # Rest of the function code...
    num_frames = int(str_data[0].strip('\r\n'))
    frames_drop = []
    bodies_data = dict()
    valid_frames = -1  # 0-based index
    current_line = 1

    for f in range(num_frames):
        num_bodies = int(str_data[current_line].strip('\r\n'))
        current_line += 1

        if num_bodies == 0:  # no data in this frame, drop it
            frames_drop.append(f)  # 0-based index
            continue

        valid_frames += 1
        joints = np.zeros((num_bodies, 25, 3), dtype=np.float32)
        colors = np.zeros((num_bodies, 25, 2), dtype=np.float32)

        for b in range(num_bodies):
            bodyID = str_data[current_line].strip('\r\n').split()[0]
            current_line += 1
            num_joints = int(str_data[current_line].strip('\r\n'))  # 25 joints
            current_line += 1

            for j in range(num_joints):
                temp_str = str_data[current_line].strip('\r\n').split()
                joints[b, j, :] = np.array(temp_str[:3], dtype=np.float32)
                colors[b, j, :] = np.array(temp_str[5:7], dtype=np.float32)
                current_line += 1

            if bodyID not in bodies_data:  # Add a new body's data
                body_data = dict()
                body_data['joints'] = joints[b]  # ndarray: (25, 3)
                body_data['colors'] = colors[b, np.newaxis]  # ndarray: (1, 25, 2)
                body_data['interval'] = [valid_frames]  # the index of the first frame
            else:  # Update an already existed body's data
                body_data = bodies_data[bodyID]
                # Stack each body's data of each frame along the frame order
                body_data['joints'] = np.vstack((body_data['joints'], joints[b]))
                body_data['colors'] = np.vstack((body_data['colors'], colors[b, np.newaxis]))
                pre_frame_idx = body_data['interval'][-1]
                body_data['interval'].append(pre_frame_idx + 1)  # add a new frame index

            bodies_data[bodyID] = body_data  # Update bodies_data

    num_frames_drop = len(frames_drop)
    assert num_frames_drop < num_frames, \
        'Error: All frames data (%d) of %s is missing or lost' % (num_frames, ske_name)
    if num_frames_drop > 0:
        frames_drop_skes[tuple(ske_name)] = np.array(frames_drop, dtype=int)
        frames_drop_logger.info('{}: {} frames missed: {}\n'.format(ske_name, num_frames_drop,
                                                                    frames_drop))

    # Calculate motion (only for the sequence with 2 or more bodyIDs)
    if len(bodies_data) > 1:
        for body_data in bodies_data.values():
            body_data['motion'] = np.sum(np.var(body_data['joints'], axis=0))

    return {'name': ske_name, 'data': bodies_data, 'num_frames': num_frames - num_frames_drop}



In [5]:

np.int = np.int32
def get_raw_skes_data():
    skes_path = "/content/drive/My Drive/dataset/ske_train/"  # Change this path to your directory containing skeleton files
    stat_path = "/content/drive/My Drive/dataset/statistics/"
    save_path = "/content/drive/My Drive/dataset/"

    if not osp.exists(stat_path):
        os.makedirs(stat_path)
    if not osp.exists(save_path):
        os.makedirs(save_path)

    skes_name_file = osp.join(stat_path, 'skes_available_name_60.txt')
    save_data_pkl = osp.join(save_path, 'raw_skes_data_60.pkl')
    frames_drop_pkl = osp.join(save_path, 'frames_drop_skes_60.pkl')

    frames_drop_logger = logging.getLogger('frames_drop')
    frames_drop_logger.setLevel(logging.INFO)
    frames_drop_logger.addHandler(logging.FileHandler(osp.join(save_path, 'frames_drop.log')))
    frames_drop_skes = dict()

    skes_name = np.loadtxt(skes_name_file, dtype=str)

    num_files = skes_name.size
    print('Found %d available skeleton files.' % num_files)

    raw_skes_data = []
    frames_cnt = np.zeros(num_files, dtype=int)  # Replaced np.int with int

    for (idx, ske_name) in enumerate(tqdm(skes_name)):
        bodies_data = get_raw_bodies_data(skes_path, ske_name, frames_drop_skes, frames_drop_logger)
        raw_skes_data.append(bodies_data)
        frames_cnt[idx] = bodies_data['num_frames']
        if (idx + 1) % 1000 == 0:
            print('Processed: %.2f%% (%d / %d)' % \
                  (100.0 * (idx + 1) / num_files, idx + 1, num_files))

    with open(save_data_pkl, 'wb') as fw:
        pickle.dump(raw_skes_data, fw, pickle.HIGHEST_PROTOCOL)
    np.savetxt(osp.join(save_path, 'frames_cnt.txt'), frames_cnt, fmt='%d')

    print('Saved raw bodies data into %s' % save_data_pkl)
    print('Total frames: %d' % np.sum(frames_cnt))

    with open(frames_drop_pkl, 'wb') as fw:
        pickle.dump(frames_drop_skes, fw, pickle.HIGHEST_PROTOCOL)

if __name__ == '__main__':
    get_raw_skes_data()


Found 5744 available skeleton files.


 35%|███▍      | 1000/2872 [06:51<14:19,  2.18it/s]

Processed: 17.41% (1000 / 5744)


 38%|███▊      | 1078/2872 [07:20<11:52,  2.52it/s]INFO:frames_drop:['1079.' 'S001C002P001R002A059']: 32 frames missed: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]

 38%|███▊      | 1079/2872 [07:20<13:51,  2.16it/s]INFO:frames_drop:['1080.' 'S001C002P001R002A060']: 11 frames missed: [87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97]

 48%|████▊     | 1374/2872 [09:14<09:34,  2.61it/s]INFO:frames_drop:['1375.' 'S001C002P004R001A055']: 42 frames missed: [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74]

 50%|████▉     | 1434/2872 [09:37<09:45,  2.46it/s]INFO:frames_drop:['1435.' 'S001C002P004R002A055']: 32 frames missed: [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]

 67%|██████▋   | 1917/2872 [12:34<05:39,  2.81it/s]INF

Processed: 34.82% (2000 / 5744)


 78%|███████▊  | 2226/2872 [14:37<04:11,  2.57it/s]INFO:frames_drop:['2227.' 'S001C003P003R002A011']: 34 frames missed: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

 82%|████████▏ | 2344/2872 [15:21<03:09,  2.79it/s]INFO:frames_drop:['2345.' 'S001C003P004R002A009']: 25 frames missed: [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]

100%|██████████| 2872/2872 [18:37<00:00,  2.57it/s]


Saved raw bodies data into /content/drive/My Drive/dataset/raw_skes_data_60.pkl
Total frames: 248025
