In [9]:
import os
import time
import pickle
import numpy as np
import glm

bodyParts = [
    'pelvis',
    'left_hip',
    'right_hip',
    'spine1',
    'left_knee',
    'right_knee',
    'spine2',
    'left_ankle',
    'right_ankle',
    'spine3',
    'left_foot',
    'right_foot',
    'neck',
    'left_collar',
    'right_collar',
    'head',
    'left_shoulder',
    'right_shoulder',
    'left_elbow',
    'right_elbow',
    'left_wrist',
    'right_wrist',
]

def partNameToIndex(partName):
    return bodyParts.index(partName)

In [10]:
def calcAvgSpeed(joints,pose_no_begin,pose_no_end,joint):
    total = 0
    count = 0
    for i in range(pose_no_begin,pose_no_end-1):
        total += speed(joints,i,i+1,joint)
        count += 1
    if count > 0:
        return total / count
    else:
        return total
    
def speed(joints,pose_no_start,pose_no_end,joint_a):
    # return glm.distance(joints[pose_no_start,partNameToIndex(joint_a),:],joints[pose_no_end,partNameToIndex(joint_a),:])
    return np.linalg.norm(joints[pose_no_start,partNameToIndex(joint_a),:]-joints[pose_no_end,partNameToIndex(joint_a),:])


def calcAvgDistance(joints,pose_no_begin,pose_no_end,joint_a,joint_b):
    total = 0
    count = 0
    for i in range(pose_no_begin,pose_no_end):
        total += distance(joints,i,joint_a,joint_b)
        count += 1
    if count > 0:
        return total / count
    else:
        return total

def distance(joints,pose_no,joint_a,joint_b):
    jointA = joints[pose_no,partNameToIndex(joint_a),:]
    jointB = joints[pose_no,partNameToIndex(joint_b),:]
    
    return np.linalg.norm(jointA-jointB)

def calcAvgAngle(joints,pose_no_begin,pose_no_end,joint_a,joint_cent,joint_b):
    total = 0
    count = 0
    for i in range(pose_no_begin,pose_no_end):
        total += angle(joints,i,joint_a,joint_cent,joint_b)
        count += 1
    if count > 0:
        return total / count
    else:
        return total


def angle(joints,pose_no,joint_a,joint_cent,joint_b):
    posCent = joints[pose_no,partNameToIndex(joint_cent),:]
    v1 = posCent - joints[pose_no,partNameToIndex(joint_a),:]
    v2 = posCent - joints[pose_no,partNameToIndex(joint_b),:]
    # return glm.acos(glm.dot(glm.normalize(v1),glm.normalize(v2)))
    v1_norm = v1 / np.linalg.norm(v1)
    v2_norm = v2 / np.linalg.norm(v2)
    return np.arccos(np.clip(np.dot(v1_norm, v2_norm), -1.0, 1.0))

In [11]:
def makeVectors(animindex):
    vecs_all = []
    for i in range(min_element_size,max_element_size):
        vecs_all.append(makeVectorOfSize(animindex,i))
    return vecs_all

def makeVectorOfSize(animindex,size):
    arr = np.zeros((feature_size,size))
    frame_count = joints.shape[0]
    num_of_frames_per_element = int(frame_count / size)

    for i in range(0,size):
        arr[0,i] = calcAvgDistance(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_wrist','right_wrist')
        arr[1,i] = calcAvgDistance(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_ankle','right_ankle')
        arr[2,i] = calcAvgDistance(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'head','left_wrist')
        arr[3,i] = calcAvgDistance(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'head','right_wrist')


        arr[4,i] = calcAvgSpeed(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_foot')
        arr[5,i] = calcAvgSpeed(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'right_foot')

        arr[6,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'pelvis','spine3','head')
        arr[7,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_shoulder','left_elbow','left_wrist')
        arr[8,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'right_shoulder','right_elbow','right_wrist')
        arr[9,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_hip','left_knee','left_ankle')
        arr[10,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'right_hip','right_knee','right_ankle')
        arr[11,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_wrist','pelvis','right_wrist')
        arr[12,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_elbow','pelvis','right_elbow')
        arr[13,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_knee','pelvis','right_knee')
        arr[14,i] = calcAvgAngle(joints,i*num_of_frames_per_element,(i+1)*num_of_frames_per_element,'left_ankle','pelvis','right_ankle')
    

    return arr

In [12]:
feature_size = 15
min_element_size = 1
max_element_size = 16
counti = 0
directory = 'OCEAN/new_joints'
for file in os.listdir(directory):
    joints = np.load(os.path.join(directory,file)) #[seqlen,22,3]
    vecs = makeVectors(counti)
    


    f = open('OCEAN/Pickles/'+ str(counti) + '_' + file.split('.')[0] + '.pickle','wb')
    pickle.dump(vecs,f)
    f.close()
    counti += 1


In [13]:
# 从pickle文件读取数据
import pickle

pickleDict = {}

directory = 'OCEAN/Pickles'

# 遍历Pickles目录中的文件
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        file = open(f, 'rb')
        pickleDict[filename.split('.')[0]] = pickle.load(file)
        file.close()

# 打印读取的文件数量
count = 0
for p in pickleDict:
    count += 1
print("Read file count " + str(count))

# 每个元素大小和特征进行归一化
import numpy as np

# 获取某文件中特定元素大小和特征的最小值
def getMinVal(fname, element_size, feat):
    vec_of_file = pickleDict[fname]
    vec_of_size = vec_of_file[element_size-1]
    vals = vec_of_size[feat]
    minval = vals[0]
    for i in range(element_size):
        if minval > vals[i]:
            minval = vals[i]
    return minval

# 获取某文件中特定元素大小和特征的最大值
def getMaxVal(fname, element_size, feat):
    vec_of_file = pickleDict[fname]
    vec_of_size = vec_of_file[element_size-1]
    vals = vec_of_size[feat]
    maxval = vals[0]
    for i in range(element_size):
        if maxval < vals[i]:
            maxval = vals[i]
    return maxval

# 找到所有文件中特定元素大小和特征的最大值
def findMaxOf(element_size, feat):
    maxval = getMaxVal(next(iter(pickleDict.keys())), element_size, feat)
    for p in pickleDict:
        maxc = getMaxVal(p, element_size, feat)
        if maxc > maxval:
            maxval = maxc
    return maxval

# 找到所有文件中特定元素大小和特征的最小值
def findMinOf(element_size, feat):
    minval = getMinVal(next(iter(pickleDict.keys())), element_size, feat)
    for p in pickleDict:
        minc = getMinVal(p, element_size, feat)
        if minc < minval:
            minval = minc
    return minval

# 归一化特定文件中特定元素大小和特征的向量
def normalizeVec(max_value, min_value, fname, element_size, feat):
    vec_of_file = pickleDict[fname]
    vec_of_size = vec_of_file[element_size-1]
    vals = vec_of_size[feat]
    for i in range(element_size):
        vals[i] = (vals[i] - min_value) / (max_value - min_value)

# 归一化所有文件中特定元素大小和特征的向量
def normalize(max_value, min_value, element_size, feat):
    for p in pickleDict:
        normalizeVec(max_value, min_value, p, element_size, feat)

# 初始化每个特征和元素大小的最大值和最小值矩阵
max_per_feature_and_size = np.zeros((max_element_size - min_element_size, feature_size))
min_per_feature_and_size = np.zeros((max_element_size - min_element_size, feature_size))

# 计算并归一化每个特征和元素大小的最大值和最小值
for element_size in range(min_element_size, max_element_size):
    for feat in range(feature_size):
        min_per_feature_and_size[element_size - min_element_size, feat] = findMinOf(element_size, feat)
        max_per_feature_and_size[element_size - min_element_size, feat] = findMaxOf(element_size, feat)
        normalize(max_per_feature_and_size[element_size - min_element_size, feat], min_per_feature_and_size[element_size - min_element_size, feat], element_size, feat)

# 验证范围
mins = 0
maxs = 0
count = 0
for element_size in range(min_element_size, max_element_size):
    for feat in range(feature_size):
        mins += findMinOf(element_size, feat)
        maxs += findMaxOf(element_size, feat)
        count += 1
print(mins / count)
print(maxs / count)


Read file count 14099
0.0
1.0


In [15]:
# 创建子样本的成对相似度的距离矩阵
def distanceBtwFeat(fname1, fname2, element_size, feat):
    # 获取第一个文件中指定元素大小和特征的值
    vec_of_file1 = pickleDict[fname1]
    vec_of_size1 = vec_of_file1[element_size - 1]
    vals1 = vec_of_size1[feat]

    # 获取第二个文件中指定元素大小和特征的值
    vec_of_file2 = pickleDict[fname2]
    vec_of_size2 = vec_of_file2[element_size - 1]
    vals2 = vec_of_size2[feat]

    # 计算两个文件中指定元素大小和特征的绝对距离
    distance = 0
    count = 0
    for i in range(element_size):
        distance += abs(vals1[i] - vals2[i])
        count += 1
    return (distance / count)

# 计算两个文件之间的总距离
def distanceBtw(fname1, fname2):
    totalDistance = 0
    for element_size in range(min_element_size, max_element_size):
        for feat in range(feature_size):
            totalDistance += distanceBtwFeat(fname1, fname2, element_size, feat)
    return totalDistance

# 从文件名中获取索引
def indexFromFname(fname):
    return int(fname.split('_')[0])

# 初始化距离矩阵
distanceMatrix = np.zeros((len(pickleDict), len(pickleDict)))
fcount = 0

# 遍历所有文件，计算成对距离
for p1 in pickleDict:
    fcount += 1
    print('calculating for ' + p1 + ' (' + str(fcount) + '/14099)', end='\r')

    for p2 in pickleDict:
        if p1 != p2:
            distanceMatrix[indexFromFname(p1), indexFromFname(p2)] = distanceBtw(p1, p2)
        else:
            distanceMatrix[indexFromFname(p1), indexFromFname(p2)] = float('inf')


file = open('distanceMatrix.pickle', 'wb')
pickle.dump(distanceMatrix, file)
file.close()


calculating for 12092_G012T000A001R007P1 (14099/14099)

In [16]:
# 加载距离矩阵以消除相似的样本
file = open('distanceMatrix.pickle', 'rb')
distM = pickle.load(file)
file.close()

totalRemaining = len(pickleDict)  # 当前剩余的样本总数
targetCount = 10000  # 目标样本数量
eliminateds = []  # 存储被消除的样本索引

# 查找矩阵中最小值的索引
def find_min_idx(x):
    k = x.argmin()  # 返回矩阵x中最小值的扁平化索引
    ncol = x.shape[1]  # 矩阵的列数
    return int(k/ncol), k % ncol  # 返回最小值的行列索引

# 循环直到剩余样本数量达到目标数量
while totalRemaining > targetCount:
    # 查找最接近的样本对
    i, j = find_min_idx(distM)

    # 计算两个样本与其他样本的距离总和（排除无穷大值）
    sumi = 0
    sumj = 0
    for n in range(len(pickleDict)):
        if distM[i, n] != float('inf'):
            sumi += distM[i, n]
        if distM[j, n] != float('inf'):
            sumj += distM[j, n]

    # 选择需要消除的样本（距离总和较大者）
    to_eliminate = i
    if sumi > sumj:
        to_eliminate = j

    # 将被消除的样本对应的行和列设置为无穷大
    distM[to_eliminate, :] = float('inf')
    distM[:, to_eliminate] = float('inf')
    eliminateds.append(to_eliminate)  # 记录被消除的样本索引
    totalRemaining -= 1  # 剩余样本数减一
    print('Eliminated ' + str(to_eliminate) + ' in comparison btw ' + str(i) + ' - ' + str(j))

# 获取剩余的样本索引
remains = []
for i in range(len(pickleDict)):
    if i not in eliminateds:
        remains.append(i)

print(remains)

Eliminated 1077 in comparison btw 1077 - 10956
Eliminated 1682 in comparison btw 1682 - 13016
Eliminated 1691 in comparison btw 1691 - 5618
Eliminated 1937 in comparison btw 1937 - 13653
Eliminated 1947 in comparison btw 1947 - 10044
Eliminated 2114 in comparison btw 2114 - 10711
Eliminated 2293 in comparison btw 2293 - 2606
Eliminated 2370 in comparison btw 2370 - 3103
Eliminated 2726 in comparison btw 2726 - 6263
Eliminated 2727 in comparison btw 2727 - 12358
Eliminated 3582 in comparison btw 3582 - 4411
Eliminated 4982 in comparison btw 4982 - 11243
Eliminated 5338 in comparison btw 5338 - 13821
Eliminated 5750 in comparison btw 5750 - 9649
Eliminated 8371 in comparison btw 8371 - 11991
Eliminated 11111 in comparison btw 11111 - 11961
Eliminated 11243 in comparison btw 11243 - 11430
Eliminated 13581 in comparison btw 11794 - 13581
Eliminated 6974 in comparison btw 6974 - 9336
Eliminated 466 in comparison btw 466 - 3037
Eliminated 15 in comparison btw 15 - 2082
Eliminated 6914 in com

In [18]:
# copy remaining ones to different folder
import shutil

# !mkdir remaining_subsamples

def getFnameFromIndex(index):
    search_key = str(index) + '_'
    res = [key for key, val in pickleDict.items() if search_key in key]
    return res[0].split('_')[1]

for r in remains:
    # shutil.copy2('zeroeggs_subsamples/' + getFnameFromIndex(r) + '.bvh', 'zeroeggs_remaining_subsamples/')
    s = getFnameFromIndex(r)+'\n'
    with open('process.txt','a') as f:
        f.write(s)


In [22]:
#删去其中的A020 Label并排序

def process_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # 删除包含A20的行
    filtered_lines = [line for line in lines if 'A020' not in line]

    unique_lines = list(set(filtered_lines))

    # 对剩下的行按照GTARP格式排序
    sorted_lines = sorted(unique_lines, key=lambda x: (
        int(x[1:4]),  # Gxxx
        int(x[5:8]),  # Txxx
        int(x[9:12]),  # Axxx
        int(x[13:16]),  # Rxxx
        int(x[17])  # Px
    ))

    # 将处理后的内容写入输出文件
    with open(output_file, 'w', encoding='utf-8') as file:
        file.writelines(sorted_lines)

# 输入和输出文件路径
input_file = '/sata/public/yyqi/Dataset/OCEAN/all.txt'
output_file = 'processedAll.txt'

# 处理文件
process_file(input_file, output_file)
