In [1]:
import os
import shutil
from PIL import Image
from collections import Counter, OrderedDict

## Copy and transform

In [2]:
# 定义源文件夹和目标文件夹路径
jtypes = ['1j', '2j', '3j', '4j']
source_folders = [
    '../dataset-new/real/gray', 
    '../dataset-new/real/rgb', 
    '../dataset-new/render/gray', 
    '../dataset-new/front_lab', 
    '../dataset-new/complete_lab',
#     '../dataset-new/color_complete_lab'
]
destination_folders = [
    '../dataset/real/160x160/gray', 
    '../dataset/real/160x160/rgb', 
    '../dataset/rendering', 
    '../dataset/instruction-front', 
    '../dataset/instruction-complete',
#     '../dataset/instruction-complete-color'
]

In [3]:
if not os.path.exists('../dataset'):
    for source_folder_tmp, destination_folder in zip(source_folders, destination_folders):
        for jtype in jtypes:
            source_folder = os.path.join(source_folder_tmp, jtype)

            # 确保目标文件夹存在，不存在则创建
            if not os.path.exists(destination_folder):
                os.makedirs(destination_folder)

            # 获取源文件夹的名称，作为文件名后缀
            folder_name_prefix = os.path.basename(source_folder)

            # 遍历源文件夹下的所有文件
            for filename in os.listdir(source_folder):
                # 检查是否为文件（不包括子文件夹）
                source_file_path = os.path.join(source_folder, filename)
                if os.path.isfile(source_file_path):

                    # 生成新文件名，加上后缀
                    name, ext = os.path.splitext(filename)
                    if folder_name_prefix != '1j':
                        new_filename = f"{folder_name_prefix}_{name}{ext}"
                    else:
                        new_filename = f"{name}{ext}"

                    if 'render' in source_folder and ext == '.png':
                        # 打开PNG图片
                        png_image = Image.open(source_file_path)
                        # 保存为JPG格式
                        new_filename = os.path.splitext(new_filename)[0] + '.jpg'
                        # 将图片转换为灰度模式
                        gray_image = png_image.convert('L')
                        # 目标文件路径
                        destination_file_path = os.path.join(destination_folder, new_filename)
                        gray_image.save(destination_file_path, 'JPEG')
                    else:
                        # 目标文件路径
                        destination_file_path = os.path.join(destination_folder, new_filename)

                        # 复制文件到目标文件夹
                        shutil.copy(source_file_path, destination_file_path)

            print(f"{source_folder}，文件复制完成！")
    print("文件复制完成！")
else:
    print("目标文件夹已存在，跳过复制")

../dataset-new/real/gray/1j，文件复制完成！
../dataset-new/real/gray/2j，文件复制完成！
../dataset-new/real/gray/3j，文件复制完成！
../dataset-new/real/gray/4j，文件复制完成！
../dataset-new/real/rgb/1j，文件复制完成！
../dataset-new/real/rgb/2j，文件复制完成！
../dataset-new/real/rgb/3j，文件复制完成！
../dataset-new/real/rgb/4j，文件复制完成！
../dataset-new/render/gray/1j，文件复制完成！
../dataset-new/render/gray/2j，文件复制完成！
../dataset-new/render/gray/3j，文件复制完成！
../dataset-new/render/gray/4j，文件复制完成！
../dataset-new/front_lab/1j，文件复制完成！
../dataset-new/front_lab/2j，文件复制完成！
../dataset-new/front_lab/3j，文件复制完成！
../dataset-new/front_lab/4j，文件复制完成！
../dataset-new/complete_lab/1j，文件复制完成！
../dataset-new/complete_lab/2j，文件复制完成！
../dataset-new/complete_lab/3j，文件复制完成！
../dataset-new/complete_lab/4j，文件复制完成！
文件复制完成！


In [4]:
# 复制 transfer 文件夹
source_folder = '../dataset-mit/transfer'
destination_folder = '../dataset/transfer'

# 确保目标文件夹存在，不存在则创建
if not os.path.exists(destination_folder):
    shutil.copytree(source_folder, destination_folder)
    print(f"已复制文件夹：{source_folder} 到 {destination_folder}")
else:
    print(f"目标文件夹 {destination_folder} 已存在，跳过复制")

已复制文件夹：../dataset-mit/transfer 到 ../dataset/transfer


In [5]:
# 处理 sj3000.txt 文件
txt_source_path = '../dataset-new/sj3000.txt'
txt_destination_path = '../dataset/sj3000.txt'

# 读取内容并移除 .png 字符串
with open(txt_source_path, 'r', encoding='utf-8') as file:
    content = file.read().replace('.png', '')

# 保存修改后的内容到新的路径
with open(txt_destination_path, 'w', encoding='utf-8') as file:
    file.write(content)
    print(f"已处理并保存文件：{txt_destination_path}")

已处理并保存文件：../dataset/sj3000.txt


## train val split

### sj

In [6]:
with open('../dataset/sj3000.txt', 'r') as f:
    synt_final = []
    for line in f:
        synt_final.append(line.strip())
        
val_synt = []
val_synt_cnt = {
    'Cable1': 0,
    'Hem': 0,
    'Miss': 0,
    'Move2': 0,
    'Links2': 0,
    'Move1': 0,
    'Mesh': 0,
    'Cable2': 0,
    'Links1': 0,
    'Tuck': 0
}

for line in synt_final:
    cat = line.split('_')[0]
    if val_synt_cnt[cat] < 30:
        val_synt.append(line)
        val_synt_cnt[cat] += 1
        
train_synt = [line for line in synt_final if line not in val_synt]
print(len(train_synt))
print(len(val_synt))

2700
300


### 2j3j4j

In [7]:
js = []
for filename in os.listdir('../dataset/rendering'):
    jtype = filename.split('_')[0]
    if jtype in ('2j', '3j', '4j'):
        fname = filename.replace(".jpg", "")
        js.append(fname + '\n')
        
with open('../dataset/2j3j4j.txt', 'w') as f:
    for line in js:
        f.writelines(line)

In [8]:
js_data = []
with open('../dataset/2j3j4j.txt', 'r') as f:
    for line in f:
        js_data.append(line.strip())

In [9]:
jsynt = [(line.split('_')[0], line.split('-')[-3]+'-'+line.split('-')[-2], line) for line in js_data]
jsynt_cnt = OrderedDict(sorted(Counter([line[0:-1] for line in jsynt]).items()))

val_jsynt = []
for (jstyle, stype), cnt in jsynt_cnt.items():
    add_cnt = cnt / 5
    i = 0
    for _jstyle, _stype, _line in jsynt:
        if i >= add_cnt:
            break
            
        if _jstyle == jstyle and _stype == stype:
            val_jsynt.append(_line)
            i += 1

train_jsynt = [line for line in js_data if line not in val_jsynt]
print(len(train_jsynt))
print(len(val_jsynt))

1560
390


### combine

In [10]:
train_synt += train_jsynt
val_synt += val_jsynt
print(len(train_synt))
print(len(val_synt))

4260
690


In [11]:
with open('../dataset/train_synt.txt', 'w') as f:
    for line in train_synt:
        f.write(line+'\n')
        
with open('../dataset/val_synt.txt', 'w') as f:
    for line in val_synt:
        f.write(line+'\n')
        
with open('../dataset/train_unsup.txt', 'w') as f:
    pass

with open('../dataset/train_real.txt', 'w') as f:
    for line in train_synt:
        f.write(line+'\n')

with open('../dataset/val_real.txt', 'w') as f:
    for line in val_synt:
        f.write(line+'\n')
        
with open('../dataset/test_real.txt', 'w') as f:
    for line in train_synt + val_synt:
        f.write(line+'\n')

with open('../dataset/test_synt.txt', 'w') as f:
    for line in train_synt + val_synt:
        f.write(line+'\n')