In [None]:
import os
import shutil

def copy_csv_files(source_dir, target_dir):
    """
    Copy all CSV files from subdirectories of source_dir to target_dir.

    Parameters:
        source_dir (str): The root directory to search for CSV files.
        target_dir (str): The directory to copy CSV files to.
    """
    # Ensure target directory exists
    os.makedirs(target_dir, exist_ok=True)

    # Walk through the source directory
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.endswith('CONCH.csv'):
                source_file = os.path.join(root, file)
                target_file = os.path.join(target_dir, file)

                # Avoid overwriting existing files with the same name
                if os.path.exists(target_file):
                    base, ext = os.path.splitext(file)
                    counter = 1
                    while os.path.exists(target_file):
                        target_file = os.path.join(target_dir, f"{base}_{counter}{ext}")
                        counter += 1

                # Copy the CSV file
                shutil.copy2(source_file, target_file)
                print(f"Copied: {source_file} -> {target_file}")

# Example usage
source_directory = "/remote-home/share/lisj/Workspace/SOTA_NAS/runs/cmz/2024-12-04_16-33/feature"
target_directory = "/remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature"
copy_csv_files(source_directory, target_directory)


In [2]:
import os
import shutil

def safe_copyfile(src, dst, buffer_size=1024*1024):
    """Copy file from src to dst in chunks without using sendfile."""
    with open(src, 'rb') as fsrc:
        with open(dst, 'wb') as fdst:
            shutil.copyfileobj(fsrc, fdst, length=buffer_size)

def copy_and_rename_skip_duplicates(source_dir, conch_target_dir, prov_target_dir):
    os.makedirs(conch_target_dir, exist_ok=True)
    os.makedirs(prov_target_dir, exist_ok=True)

    for image_id_dir in os.listdir(source_dir):
        image_id_path = os.path.join(source_dir, image_id_dir)
        if not os.path.isdir(image_id_path):
            continue

        # CONCH目录处理
        conch_path = os.path.join(image_id_path, 'CONCH')
        if os.path.isdir(conch_path):
            for file_name in os.listdir(conch_path):
                if file_name.endswith('_CONCH.csv'):
                    source_file = os.path.join(conch_path, file_name)
                    base_name = file_name.replace('_CONCH', '')
                    target_file = os.path.join(conch_target_dir, base_name)
                    
                    if os.path.exists(target_file):
                        continue

                    safe_copyfile(source_file, target_file)
                    print(f"Copied {source_file} -> {target_file}")

        # PROV目录处理
        prov_path = os.path.join(image_id_path, 'PROV')
        if os.path.isdir(prov_path):
            for file_name in os.listdir(prov_path):
                if file_name.endswith('_PROV.csv'):
                    source_file = os.path.join(prov_path, file_name)
                    base_name = file_name.replace('_PROV', '')
                    target_file = os.path.join(prov_target_dir, base_name)
                    
                    if os.path.exists(target_file):
                        continue

                    safe_copyfile(source_file, target_file)
                    print(f"Copied {source_file} -> {target_file}")

# 使用时需要修改路径
source_dir = "/remote-home/share/lisj/Workspace/SOTA_NAS/runs/cmz/2024-12-04_16-33/feature"
conch_target_dir = "/remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature"
prov_target_dir = "/remote-home/share/lisj/Workspace/SOTA_NAS/datasets/PROV-feature"

copy_and_rename_skip_duplicates(source_dir, conch_target_dir, prov_target_dir)


File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1909791.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1917457.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1913647.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1906548.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1912577.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1917208.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1913632.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1907343.csv already exists. Skipping.
File /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/CONCH-feature/1913052.csv already exists. Skipping.
File /remote-home/s

In [12]:
import os
import csv
import numpy as np
import torch
from torchvision import transforms
from PIL import Image
from openslide import open_slide
from openslide.deepzoom import DeepZoomGenerator
import sys

print("Start of script")

slide_path = "/remote-home/share/lisj/Workspace/SOTA_NAS/datasets/WSI/1819360.mrxs"
print("Check slide path:", slide_path, "exists:", os.path.exists(slide_path))

if not os.path.exists(slide_path):
    print("Slide not found, exiting...")
    sys.exit(1)

print("Slide found")

output_dir = "/remote-home/share/lisj/test"
print("Creating output dir:", output_dir)
os.makedirs(output_dir, exist_ok=True)
output_csv_path = os.path.join(output_dir, "test_features.csv")
print("Output CSV path:", output_csv_path)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
print("Transform defined")

try:
    print("About to open slide")
    slide = open_slide(slide_path)
    print("Slide opened")

    dz = DeepZoomGenerator(slide, tile_size=224, overlap=0, limit_bounds=True)
    print("DeepZoomGenerator created")

    level = dz.level_count - 1
    print("Level chosen:", level)
    tiles_x, tiles_y = dz.level_tiles[level]
    print("Tiles x,y:", tiles_x, tiles_y)
    if tiles_x == 0 or tiles_y == 0:
        print("No tiles available, sys.exit(1)")
        sys.exit(1)

    print("Getting tile (0,0)")
    img = dz.get_tile(level, (0, 0))
    print("Tile obtained")

    patch_img = np.asarray(img, dtype=np.uint8)
    print("Converted tile to numpy array")

    patch_img_pil = Image.fromarray(patch_img, mode="RGB")
    print("Converted to PIL image")

    patch_tensor = transform(patch_img_pil).unsqueeze(0)  
    print("Transformed and unsqueezed tensor")

    fake_feats = np.random.rand(1, 512)
    print("Fake feats generated")

    conch_buffer = []
    coord = "0,0"
    row_data = [coord] + fake_feats[0].tolist()
    print("Row data prepared, type(row_data):", type(row_data))

    conch_buffer.append(row_data)
    print("conch_buffer prepared")

    print("About to open CSV file")
    with open(output_csv_path, mode='w', newline='') as f:
        print("CSV file opened")

        writer = csv.writer(f)
        print("Writer created, type(writer):", type(writer))

        header = ['coord'] + [f'{i}' for i in range(512)]
        print("Header prepared")

        writer.writerow(header)
        print("Header written")

        writer.writerows(conch_buffer)
        print("Rows written")

    print(f"Features saved to {output_csv_path}")

except Exception as e:
    print("Error during processing:", e)
    # 打印异常的完整堆栈信息以便更好地诊断
    import traceback
    traceback.print_exc()
    sys.exit(1)


Start of script
Check slide path: /remote-home/share/lisj/Workspace/SOTA_NAS/datasets/WSI/1819360.mrxs exists: True
Slide found
Creating output dir: /remote-home/share/lisj/test
Output CSV path: /remote-home/share/lisj/test/test_features.csv
Transform defined
About to open slide
Slide opened
DeepZoomGenerator created
Level chosen: 19
Tiles x,y: 753 1566
Getting tile (0,0)
Tile obtained
Converted tile to numpy array
Converted to PIL image
Transformed and unsqueezed tensor
Fake feats generated
Row data prepared, type(row_data): <class 'list'>
conch_buffer prepared
About to open CSV file
CSV file opened
Writer created, type(writer): <class '_csv.writer'>
Header prepared
Header written
Rows written
Features saved to /remote-home/share/lisj/test/test_features.csv


In [5]:
import csv
with open('simple_test.csv', 'w', newline='') as f:
    w = csv.writer(f)
    w.writerow(['a','b','c'])
    w.writerows([['1','2','3'],['4','5','6']])
