# SeAct.txt

In [None]:
import os

# 定义要遍历的目录路径
root_dir = '/root/autodl-tmp/SeAct'

# 定义保存文件路径的文本文件
output_file = '/root/autodl-tmp/SeAct.txt'

# 获取所有叶结点文件的绝对路径
def get_leaf_files(dir_path):
    leaf_files = []
    for root, dirs, files in os.walk(dir_path):
        for file in files:
            leaf_files.append(os.path.join(root, file))
    return leaf_files

# 将文件路径写入文本文件
def write_paths_to_file(file_paths, output_file):
    with open(output_file, 'w') as f:
        for path in file_paths:
            f.write(f"{path}\n")


In [None]:
# 获取所有叶结点文件的路径
leaf_files = get_leaf_files(root_dir)

# 将这些路径写入文本文件
write_paths_to_file(leaf_files, output_file)

print(f"文件路径已写入 {output_file}")

# train.txt & val.txt

In [None]:
import os
import json

# 定义目录和文件路径
base_dir = '/root/autodl-tmp/SeAct_Sampled_EZCLIP'
label_mapping_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct_idx_to_label.json'
seact_train_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct_train.txt'
seact_val_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct_val.txt'
train_output_file = '/root/wj/EZ_CLIP/dataset_splits/SeAct/Zero-shot/train.txt'
val_output_file = '/root/wj/EZ_CLIP/dataset_splits/SeAct/Zero-shot/val.txt'

# 读取label映射
with open(label_mapping_file, 'r') as f:
    label_mapping = json.load(f)

# 读取train和val文件
with open(seact_train_file, 'r') as f:
    train_paths = ['/'.join((line.strip().split('/')[-2], line.strip().split('/')[-1].split('-')[0])) for line in f.readlines()]

with open(seact_val_file, 'r') as f:
    val_paths = ['/'.join((line.strip().split('/')[-2], line.strip().split('/')[-1].split('-')[0])) for line in f.readlines()]

# 初始化输出内容
train_output = []
val_output = []

# 遍历目录并生成输出内容
for root, dirs, files in os.walk(base_dir):
    if root == base_dir:  # 只遍历根目录下的子目录
        for sub_dir in dirs:
            sub_dir_path = os.path.join(base_dir, sub_dir)
            for sub_sub_dir in os.listdir(sub_dir_path):
                full_path = os.path.join(sub_dir_path, sub_sub_dir)
                if os.path.isdir(full_path):
                    file_list = [f for f in os.listdir(full_path) if f.endswith('.jpg')]
                    num_files = len(file_list)
                    label_index = label_mapping.get(sub_sub_dir, -1)
                    output_line = f"{full_path} {num_files} {label_index}"
                    path_identifier = '/'.join(full_path.split('/')[-2:])  # 获取路径中的标识符
                    if path_identifier in train_paths:
                        train_output.append(output_line)
                    elif path_identifier in val_paths:
                        val_output.append(output_line)

# 写入train_output_file
with open(train_output_file, 'w') as f:
    for line in train_output:
        f.write(line + '\n')

# 写入val_output_file
with open(val_output_file, 'w') as f:
    for line in val_output:
        f.write(line + '\n')

print(f"train.txt and val.txt files have been created at {train_output_file} and {val_output_file}")


# label.csv

In [None]:
import json
import csv

# 定义文件路径
idx_to_label_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct_idx_to_label.json'
idx_mapping_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct.json'
output_csv_file = '/root/wj/EZ_CLIP/lists/seact_labels.csv'

# 读取JSON文件
with open(idx_to_label_file, 'r') as f:
    idx_to_label = json.load(f)

with open(idx_mapping_file, 'r') as f:
    idx_mapping = json.load(f)

# 生成CSV文件内容
csv_data = []

# 创建一个反向映射以便快速查找类名
idx_to_class_name = {value: key for key, value in idx_mapping.items()}

for idx, label in idx_to_label.items():
    class_name = idx_to_class_name.get(int(idx), "Unknown")
    csv_data.append([label, class_name])

# 写入CSV文件
with open(output_csv_file, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(['id', 'name'])
    for row in csv_data:
        csvwriter.writerow(row)

print(f"CSV file has been created at {output_csv_file}")


# Description

In [None]:

import json
import csv

# 定义文件路径
label_mapping_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct.json'
idx_to_label_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct_idx_to_label.json'
description_file = '/root/wj/ExACT/Dataloader/SeAct/SeAct_ds_cls.json'
output_csv_file = '/root/wj/EZ_CLIP/GPT_discription/SeAct_gpt_Class_discription_new.csv'

# 读取JSON文件
with open(label_mapping_file, 'r') as f:
    idx_mapping = json.load(f)

with open(idx_to_label_file, 'r') as f:
    idx_to_label = json.load(f)

with open(description_file, 'r') as f:
    descriptions = json.load(f)

# 生成CSV文件内容
csv_data = []

for class_name, idx in idx_mapping.items():
    if str(idx) in idx_to_label:
        label = idx_to_label[str(idx)]
        description = next((desc for desc, desc_label in descriptions.items() if desc_label == idx), "")
        csv_data.append([label, class_name, '\n\n'+description.replace(f"{class_name}: ", f"{class_name} is ", 1)])

# 写入CSV文件
with open(output_csv_file, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(['SNo', 'Class Name', 'GPT3 discription'])
    for row in csv_data:
        csvwriter.writerow(row)

print(f"CSV file has been created at {output_csv_file}")
