In [3]:
import os
import json

def check_json_encoding(folder_path):
    non_utf8_files = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, encoding='utf-8') as f:
                    json.load(f)
            except (UnicodeDecodeError, json.JSONDecodeError):
                non_utf8_files.append(filename)
    print(len(non_utf8_files))
    return non_utf8_files

# Example usage:
folder_path = '/home/jc-merlab/Pictures/Data/occ_panda_physical_dataset/'
non_utf8_files = check_json_encoding(folder_path)

if non_utf8_files:
    print(f"These JSON files are not encoded in UTF-8: {non_utf8_files}")
else:
    print("All JSON files are encoded in UTF-8.")

0
All JSON files are encoded in UTF-8.


In [2]:
import os

def count_files_in_folder(folder_path):
    jpg_count = 0
    json_count = 0
    
    for filename in os.listdir(folder_path):
        if filename.endswith('.jpg'):
            jpg_count += 1
        elif filename.endswith('.json'):
            json_count += 1
    
    return jpg_count, json_count

# Example usage:
folder_path = '/home/jc-merlab/Pictures/Data/occ_panda_physical_dataset/'
jpg_count, json_count = count_files_in_folder(folder_path)

print(f"Number of .jpg files: {jpg_count}")
print(f"Number of .json files: {json_count}")

Number of .jpg files: 24835
Number of .json files: 24835


In [5]:
import os
import chardet

def check_file_encoding(file_path):
    with open(file_path, 'rb') as f:
        raw_data = f.read()
        result = chardet.detect(raw_data)
    return result

def check_json_files_encoding(folder_path):
    file_encodings = {}

    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            encoding_info = check_file_encoding(file_path)
            file_encodings[filename] = encoding_info
    
    return file_encodings

# Example usage:
folder_path = '/media/jc-merlab/588F-64E2/Data/occ_panda_physical_dataset/'
file_encodings = check_json_files_encoding(folder_path)

for filename, encoding_info in file_encodings.items():
    print(f"File: {filename}")
    print(f"Encoding: {encoding_info['encoding']}")
    print(f"Confidence: {encoding_info['confidence']}")
    print()

File: 000000.json
Encoding: ascii
Confidence: 1.0

File: 000001.json
Encoding: ascii
Confidence: 1.0

File: 000002.json
Encoding: ascii
Confidence: 1.0

File: 000003.json
Encoding: None
Confidence: 0.0

File: 000004.json
Encoding: None
Confidence: 0.0

File: 000005.json
Encoding: None
Confidence: 0.0

File: 000006.json
Encoding: None
Confidence: 0.0

File: 000007.json
Encoding: None
Confidence: 0.0

File: 000008.json
Encoding: None
Confidence: 0.0

File: 000009.json
Encoding: ascii
Confidence: 1.0

File: 000010.json
Encoding: ascii
Confidence: 1.0

File: 000011.json
Encoding: ascii
Confidence: 1.0

File: 000012.json
Encoding: ascii
Confidence: 1.0

File: 000013.json
Encoding: ascii
Confidence: 1.0

File: 000014.json
Encoding: ascii
Confidence: 1.0

File: 000015.json
Encoding: ascii
Confidence: 1.0

File: 000016.json
Encoding: ascii
Confidence: 1.0

File: 000017.json
Encoding: ascii
Confidence: 1.0

File: 000018.json
Encoding: ascii
Confidence: 1.0

File: 000019.json
Encoding: ascii
Con

In [1]:
import os
import shutil

def copy_files(src_folder, dst_folder, last_idx):
    if not os.path.exists(dst_folder):
        os.makedirs(dst_folder)
        
    for i in range(last_idx + 1):
        img_filename = f"{i:06d}.jpg"
        json_filename = f"{i:06d}.json"
        
        src_img = os.path.join(src_folder, img_filename)
        src_json = os.path.join(src_folder, json_filename)
        
        dst_img = os.path.join(dst_folder, img_filename)
        dst_json = os.path.join(dst_folder, json_filename)
        
        if os.path.exists(src_img):
            shutil.copy(src_img, dst_img)
        
        if os.path.exists(src_json):
            shutil.copy(src_json, dst_json)
            
        print(f"Copied: {img_filename} and {json_filename}")

# Define your source and destination folders here
src_folder = "/home/jc-merlab/Pictures/Data/occ_panda_physical_dataset/"
dst_folder = "/home/jc-merlab/Pictures/Data/source_physical_occlusion/"
last_idx = 12417

copy_files(src_folder, dst_folder, last_idx)

Copied: 000000.jpg and 000000.json
Copied: 000001.jpg and 000001.json
Copied: 000002.jpg and 000002.json
Copied: 000003.jpg and 000003.json
Copied: 000004.jpg and 000004.json
Copied: 000005.jpg and 000005.json
Copied: 000006.jpg and 000006.json
Copied: 000007.jpg and 000007.json
Copied: 000008.jpg and 000008.json
Copied: 000009.jpg and 000009.json
Copied: 000010.jpg and 000010.json
Copied: 000011.jpg and 000011.json
Copied: 000012.jpg and 000012.json
Copied: 000013.jpg and 000013.json
Copied: 000014.jpg and 000014.json
Copied: 000015.jpg and 000015.json
Copied: 000016.jpg and 000016.json
Copied: 000017.jpg and 000017.json
Copied: 000018.jpg and 000018.json
Copied: 000019.jpg and 000019.json
Copied: 000020.jpg and 000020.json
Copied: 000021.jpg and 000021.json
Copied: 000022.jpg and 000022.json
Copied: 000023.jpg and 000023.json
Copied: 000024.jpg and 000024.json
Copied: 000025.jpg and 000025.json
Copied: 000026.jpg and 000026.json
Copied: 000027.jpg and 000027.json
Copied: 000028.jpg a

Copied: 001965.jpg and 001965.json
Copied: 001966.jpg and 001966.json
Copied: 001967.jpg and 001967.json
Copied: 001968.jpg and 001968.json
Copied: 001969.jpg and 001969.json
Copied: 001970.jpg and 001970.json
Copied: 001971.jpg and 001971.json
Copied: 001972.jpg and 001972.json
Copied: 001973.jpg and 001973.json
Copied: 001974.jpg and 001974.json
Copied: 001975.jpg and 001975.json
Copied: 001976.jpg and 001976.json
Copied: 001977.jpg and 001977.json
Copied: 001978.jpg and 001978.json
Copied: 001979.jpg and 001979.json
Copied: 001980.jpg and 001980.json
Copied: 001981.jpg and 001981.json
Copied: 001982.jpg and 001982.json
Copied: 001983.jpg and 001983.json
Copied: 001984.jpg and 001984.json
Copied: 001985.jpg and 001985.json
Copied: 001986.jpg and 001986.json
Copied: 001987.jpg and 001987.json
Copied: 001988.jpg and 001988.json
Copied: 001989.jpg and 001989.json
Copied: 001990.jpg and 001990.json
Copied: 001991.jpg and 001991.json
Copied: 001992.jpg and 001992.json
Copied: 001993.jpg a

Copied: 003846.jpg and 003846.json
Copied: 003847.jpg and 003847.json
Copied: 003848.jpg and 003848.json
Copied: 003849.jpg and 003849.json
Copied: 003850.jpg and 003850.json
Copied: 003851.jpg and 003851.json
Copied: 003852.jpg and 003852.json
Copied: 003853.jpg and 003853.json
Copied: 003854.jpg and 003854.json
Copied: 003855.jpg and 003855.json
Copied: 003856.jpg and 003856.json
Copied: 003857.jpg and 003857.json
Copied: 003858.jpg and 003858.json
Copied: 003859.jpg and 003859.json
Copied: 003860.jpg and 003860.json
Copied: 003861.jpg and 003861.json
Copied: 003862.jpg and 003862.json
Copied: 003863.jpg and 003863.json
Copied: 003864.jpg and 003864.json
Copied: 003865.jpg and 003865.json
Copied: 003866.jpg and 003866.json
Copied: 003867.jpg and 003867.json
Copied: 003868.jpg and 003868.json
Copied: 003869.jpg and 003869.json
Copied: 003870.jpg and 003870.json
Copied: 003871.jpg and 003871.json
Copied: 003872.jpg and 003872.json
Copied: 003873.jpg and 003873.json
Copied: 003874.jpg a

Copied: 005744.jpg and 005744.json
Copied: 005745.jpg and 005745.json
Copied: 005746.jpg and 005746.json
Copied: 005747.jpg and 005747.json
Copied: 005748.jpg and 005748.json
Copied: 005749.jpg and 005749.json
Copied: 005750.jpg and 005750.json
Copied: 005751.jpg and 005751.json
Copied: 005752.jpg and 005752.json
Copied: 005753.jpg and 005753.json
Copied: 005754.jpg and 005754.json
Copied: 005755.jpg and 005755.json
Copied: 005756.jpg and 005756.json
Copied: 005757.jpg and 005757.json
Copied: 005758.jpg and 005758.json
Copied: 005759.jpg and 005759.json
Copied: 005760.jpg and 005760.json
Copied: 005761.jpg and 005761.json
Copied: 005762.jpg and 005762.json
Copied: 005763.jpg and 005763.json
Copied: 005764.jpg and 005764.json
Copied: 005765.jpg and 005765.json
Copied: 005766.jpg and 005766.json
Copied: 005767.jpg and 005767.json
Copied: 005768.jpg and 005768.json
Copied: 005769.jpg and 005769.json
Copied: 005770.jpg and 005770.json
Copied: 005771.jpg and 005771.json
Copied: 005772.jpg a

Copied: 007643.jpg and 007643.json
Copied: 007644.jpg and 007644.json
Copied: 007645.jpg and 007645.json
Copied: 007646.jpg and 007646.json
Copied: 007647.jpg and 007647.json
Copied: 007648.jpg and 007648.json
Copied: 007649.jpg and 007649.json
Copied: 007650.jpg and 007650.json
Copied: 007651.jpg and 007651.json
Copied: 007652.jpg and 007652.json
Copied: 007653.jpg and 007653.json
Copied: 007654.jpg and 007654.json
Copied: 007655.jpg and 007655.json
Copied: 007656.jpg and 007656.json
Copied: 007657.jpg and 007657.json
Copied: 007658.jpg and 007658.json
Copied: 007659.jpg and 007659.json
Copied: 007660.jpg and 007660.json
Copied: 007661.jpg and 007661.json
Copied: 007662.jpg and 007662.json
Copied: 007663.jpg and 007663.json
Copied: 007664.jpg and 007664.json
Copied: 007665.jpg and 007665.json
Copied: 007666.jpg and 007666.json
Copied: 007667.jpg and 007667.json
Copied: 007668.jpg and 007668.json
Copied: 007669.jpg and 007669.json
Copied: 007670.jpg and 007670.json
Copied: 007671.jpg a

Copied: 009596.jpg and 009596.json
Copied: 009597.jpg and 009597.json
Copied: 009598.jpg and 009598.json
Copied: 009599.jpg and 009599.json
Copied: 009600.jpg and 009600.json
Copied: 009601.jpg and 009601.json
Copied: 009602.jpg and 009602.json
Copied: 009603.jpg and 009603.json
Copied: 009604.jpg and 009604.json
Copied: 009605.jpg and 009605.json
Copied: 009606.jpg and 009606.json
Copied: 009607.jpg and 009607.json
Copied: 009608.jpg and 009608.json
Copied: 009609.jpg and 009609.json
Copied: 009610.jpg and 009610.json
Copied: 009611.jpg and 009611.json
Copied: 009612.jpg and 009612.json
Copied: 009613.jpg and 009613.json
Copied: 009614.jpg and 009614.json
Copied: 009615.jpg and 009615.json
Copied: 009616.jpg and 009616.json
Copied: 009617.jpg and 009617.json
Copied: 009618.jpg and 009618.json
Copied: 009619.jpg and 009619.json
Copied: 009620.jpg and 009620.json
Copied: 009621.jpg and 009621.json
Copied: 009622.jpg and 009622.json
Copied: 009623.jpg and 009623.json
Copied: 009624.jpg a

Copied: 011441.jpg and 011441.json
Copied: 011442.jpg and 011442.json
Copied: 011443.jpg and 011443.json
Copied: 011444.jpg and 011444.json
Copied: 011445.jpg and 011445.json
Copied: 011446.jpg and 011446.json
Copied: 011447.jpg and 011447.json
Copied: 011448.jpg and 011448.json
Copied: 011449.jpg and 011449.json
Copied: 011450.jpg and 011450.json
Copied: 011451.jpg and 011451.json
Copied: 011452.jpg and 011452.json
Copied: 011453.jpg and 011453.json
Copied: 011454.jpg and 011454.json
Copied: 011455.jpg and 011455.json
Copied: 011456.jpg and 011456.json
Copied: 011457.jpg and 011457.json
Copied: 011458.jpg and 011458.json
Copied: 011459.jpg and 011459.json
Copied: 011460.jpg and 011460.json
Copied: 011461.jpg and 011461.json
Copied: 011462.jpg and 011462.json
Copied: 011463.jpg and 011463.json
Copied: 011464.jpg and 011464.json
Copied: 011465.jpg and 011465.json
Copied: 011466.jpg and 011466.json
Copied: 011467.jpg and 011467.json
Copied: 011468.jpg and 011468.json
Copied: 011469.jpg a