In [2]:
import torch
import numpy as np
from torch.utils.data import DataLoader
import pandas as pd

# 1. Test BBBC022 dataset in non end to end pipeline

In [3]:
import os
import pandas as pd
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
from skimage.transform import resize
from sklearn.preprocessing import LabelEncoder
from PIL import Image
from huggingface_mae import MAEModel
from torch.utils.data import DataLoader
from tqdm import tqdm
from utils import *

# PNG in /data/boom/cpg0019/broad/training_images/BBBC022/20585/A01/1/ .png   : 960 x 160, 160 x 5 = 800
# save in /data/boom/cpg0019/broad/workspace_dl/embeddings/105281_zenodo7114558/BBBC022/20585/A01/1/OpenPhenom_embedding.npy

class NewnoendDataset(torch.utils.data.Dataset):
    '''
    noendDataset(image_path = "/data/boom/cpg0019/broad/",
               embedding_path = "/data/boom/cpg0019/broad/workspace_dl/embeddings/105281_zenodo7114558/",
            #    CSV_path = "/data/boom/cpg0019/broad/workspace_dl/metadata/sc-metadata-fil.csv")
    '''
    def __init__(self, image_path, embedding_path, CSV_path):

        self.image_path = image_path
        self.embedding_path = embedding_path
        self.CSV_file = pd.read_csv(CSV_path)
        
        self.label_encoder = LabelEncoder()
        self.encode_labels()

    def encode_labels(self):
        if 'Treatment' in self.CSV_file.columns:
            column_name = 'Treatment'
        elif 'pert_name' in self.CSV_file.columns:
            column_name = 'pert_name'
        else:
            raise ValueError("CSV file must contain either 'Treatment' or 'pert_name' column")

        self.CSV_file['encoded_labels'] = self.label_encoder.fit_transform(self.CSV_file[column_name])
        
    def __getitem__(self, idx):
        item = {}
        # 获取图像位置，然后读取，然后按照坐标拆分，得到5张图象，然后叠加
        img_path = self.image_path + str(self.CSV_file.loc[idx, 'Image_Name'])[6:]
        All_img = Image.open(img_path)
        # print(img_path, All_img.size)

        # 分割大图像为6个子图像，每个子图像的尺寸为（160，160）
        sub_images = []
        for i in range(6):
            left = i * 160
            upper = 0
            right = left + 160
            lower = upper + 160
            sub_image = All_img.crop((left, upper, right, lower))
            sub_images.append(sub_image)
        
        # 按照通道叠加前面5张子图像在一起
        combined_image = np.stack(sub_images[:5], axis=0)
        # print(combined_image.shape)
        resized_image = resize(combined_image, (5, 256, 256), anti_aliasing=True)

        # embedding
        # /data/boom/cpg0019/broad/workspace_dl/embeddings/105281_zenodo7114558/BBBC022/20585/A01/1
        path = os.path.dirname(self.CSV_file.loc[idx, 'Image_Name'][22:])
        embedding_path = os.path.join(self.embedding_path, str(path))
        # print("embedding_path:", self.embedding_path, path, embedding_path)
        # with open(embedding_path, "rb") as data:
        #     info = np.load(data)
        #     cells = np.array(np.copy(info["features"]))
        #     embedding = cells[~np.isnan(cells).any(axis=1)]
        #     # embedding = np.median(embedding, axis=0)
        #     # print(idx, embedding.shape)

        #     embedding = embedding[idx]
            # print(idx, embedding.shape)

        item['image'] = torch.tensor(resized_image).float()  # torch.Size([5, 448, 448]) 
        item['embedding'] = embedding_path
        # print(item['embedding'].shape, item['image'].shape)
        
        # encoded_labels = self.CSV_file.loc[idx, 'encoded_labels']
        # item['class'] = torch.tensor(encoded_labels).long()

        return item


    def __len__(self):
        return self.CSV_file.shape[0]

In [13]:
meta_data = pd.read_csv(os.path.join('/data/boom/cpg0019/broad/workspace_dl/metadata/sc-bbbc022.csv'))
meta_data.head()

Unnamed: 0,Collection,Metadata_Plate,Metadata_Well,Metadata_Site,Nuclei_Location_Center_X,Nuclei_Location_Center_Y,Image_Name,Treatment,Treatment_Type,Control,Cell_line,LeaveReplicatesOut,LeaveCellsOut
0,BBBC022,20585,A01,1,128,28,../../training_images/BBBC022/20585/A01/1/1583...,BRD-K98763141,Compound,Treatment,U2OS,Training,Training
1,BBBC022,20585,A01,1,444,33,../../training_images/BBBC022/20585/A01/1/1583...,BRD-K98763141,Compound,Treatment,U2OS,Training,Training
2,BBBC022,20585,A01,1,573,52,../../training_images/BBBC022/20585/A01/1/1583...,BRD-K98763141,Compound,Treatment,U2OS,Training,Training
3,BBBC022,20585,A01,1,304,62,../../training_images/BBBC022/20585/A01/1/1583...,BRD-K98763141,Compound,Treatment,U2OS,Training,Training
4,BBBC022,20585,A01,1,143,66,../../training_images/BBBC022/20585/A01/1/1583...,BRD-K98763141,Compound,Treatment,U2OS,Training,Training


In [None]:
import warnings
warnings.filterwarnings("ignore")

# 定义超参数
save_path = "../bbbc022/"

# 读取meta文件
meta_data = pd.read_csv(os.path.join('/data/boom/cpg0019/broad/workspace_dl/metadata/sc-bbbc022.csv'))

# 针对全部图像，逐个读取，逐个处理单个图像, 返回读取好的图像
def image_process(img_paths):
    all_img = []
    for img_path in img_paths:
        try:
            img = Image.open(img_path)

            # 分割大图像为6个子图像，每个子图像的尺寸为（160，160）
            sub_images = []
            for i in range(6):
                left = i * 160
                upper = 0
                right = left + 160
                lower = upper + 160
                sub_image = img.crop((left, upper, right, lower))
                sub_images.append(sub_image)
            
            # 按照通道叠加前面5张子图像在一起
            combined_image = np.stack(sub_images[:5], axis=0)
            resized_image = resize(combined_image, (5, 224, 224), anti_aliasing=True)
            all_img.append(resized_image)
        except FileNotFoundError:
            print(f"File not found: {img_path}")
            continue
    
    # 拼接all_img的所有数据，成为维度(B, 5, 224, 224)
    if all_img:
        all_img_tensor = torch.tensor(np.array(all_img)).float()
        return all_img_tensor
    else:
        return torch.empty(0)

# 加载模型
MODEL_PATH = "recursionpharma/OpenPhenom"
model = MAEModel.from_pretrained(MODEL_PATH).cuda()
# img_embeddings = get_image_embeddings(MODEL_PATH, model, batch_size=1)  # change batch_size to fit your device
# features = img_embeddings.cpu().numpy()

model.eval()
print("Finished loading model")

def model_eval(batch_imgs):
    model = MAEModel.from_pretrained(MODEL_PATH).cuda()
    model.return_channelwise_embeddings = False
    image_embeddings = model.predict(batch_imgs.cuda())
    
    return image_embeddings

def get_png_filenames(directory):
    # 支持的图像文件扩展名
    image_extension = '.png'
    image_filenames = []

    try:
        # 遍历目录中的文件（不包括子目录）
        for file in os.listdir(directory):
            # 检查文件扩展名是否为PNG
            if file.lower().endswith(image_extension):
                image_filenames.append(os.path.join(directory, file))
    except FileNotFoundError:
        pass
        # print(f"Directory not found: {directory}")
    
    return image_filenames

# 存储所有site的处理结果
features = []

# 遍历每个plate
for plate in tqdm(meta_data["Metadata_Plate"].unique()):
    print(plate)
    # if plate.astype(str) in ['20585', '20586', '20589']:
    #     continue
    # print(plate)
    m1 = meta_data["Metadata_Plate"] == plate
    wells = meta_data[m1]["Metadata_Well"].unique()
    
    # 遍历每个well
    for well in wells:
        result = meta_data.query(f"Metadata_Plate == {plate} and Metadata_Well == '{well}'")
        
        # 遍历每个site
        for site in result["Metadata_Site"].unique():
            # 读取 site-level 的所有图像，然后处理成 embedding
            img_path = f'/data/boom/cpg0019/broad/training_images/BBBC022/{plate}/{well}/{site}'
            img_paths = get_png_filenames(img_path)
            if not img_paths:  # 检查是否为空列表
                continue
            input_img = image_process(img_paths)
            if input_img.numel() == 0:  # 检查是否为空张量
                continue
            with torch.no_grad():
                image_embeddings = model_eval(input_img)
            # 改代码：将image_embeddings放到img_path的路径下面，存为 .npy 文件
            # features.append(image_embeddings.cpu())

            # 将image_embeddings放到img_path的路径下面，存为 .npy 文件
            # /data/boom/cpg0019/broad/workspace_dl/embeddings/105281_zenodo7114558/BBBC022/20585/A01/1/
            output_path = os.path.join(img_path.replace('training_images','workspace_dl/embeddings/105281_zenodo7114558'), "Phenom_embeddings.npy")
            np.save(output_path, image_embeddings.cpu().numpy())
            
            # 清理显存
            torch.cuda.empty_cache()
            break
        break
    break
print(output_path, image_embeddings.shape)

In [16]:
image_embeddings.shape

torch.Size([114, 384])

# 3. Test BBBC036 dataset in end to end pipeline

In [6]:
meta_data = pd.read_csv(os.path.join('/data/boom/cpg0019/broad/workspace_dl/metadata/BBBC036_profiling.csv'))
meta_data.head()

Unnamed: 0,Metadata_Plate,Metadata_Well,Metadata_Site,Plate_Map_Name,DNA,ER,RNA,AGP,Mito,broad_sample_Replicate,Treatment,Compound,Concentration
0,24277,a01,2,H-BIOA-004-3,24277/cdp2bioactives_a01_s2_w15e4541e6-dfcb-40...,24277/cdp2bioactives_a01_s2_w36b0ca5a6-63c8-44...,24277/cdp2bioactives_a01_s2_w26bec6edf-cec2-45...,24277/cdp2bioactives_a01_s2_w44bf0ab1d-a7a1-42...,24277/cdp2bioactives_a01_s2_w5d2a5a2a2-8548-40...,1,BRD-K18250272@3.02251611288227,BRD-K18250272,3.022516
1,24277,a01,3,H-BIOA-004-3,24277/cdp2bioactives_a01_s3_w148f1c410-2d16-4c...,24277/cdp2bioactives_a01_s3_w32dc91611-633b-4a...,24277/cdp2bioactives_a01_s3_w29a7efc66-a795-4e...,24277/cdp2bioactives_a01_s3_w49abe4e30-4eed-41...,24277/cdp2bioactives_a01_s3_w5bf626b46-cc60-41...,1,BRD-K18250272@3.02251611288227,BRD-K18250272,3.022516
2,24277,a01,4,H-BIOA-004-3,24277/cdp2bioactives_a01_s4_w1e9b39acd-2c91-4d...,24277/cdp2bioactives_a01_s4_w394c0f934-4bc1-47...,24277/cdp2bioactives_a01_s4_w2bc33bb34-f4a1-4d...,24277/cdp2bioactives_a01_s4_w41acd7309-157d-41...,24277/cdp2bioactives_a01_s4_w5d76fb6c3-8bf5-43...,1,BRD-K18250272@3.02251611288227,BRD-K18250272,3.022516
3,24277,a01,5,H-BIOA-004-3,24277/cdp2bioactives_a01_s5_w196a63107-8e9d-41...,24277/cdp2bioactives_a01_s5_w3aa91cc5a-8c46-4a...,24277/cdp2bioactives_a01_s5_w23869d8be-1e44-42...,24277/cdp2bioactives_a01_s5_w4b254a0b2-f029-4b...,24277/cdp2bioactives_a01_s5_w5ae645261-7db0-40...,1,BRD-K18250272@3.02251611288227,BRD-K18250272,3.022516
4,24277,a01,6,H-BIOA-004-3,24277/cdp2bioactives_a01_s6_w1dcad1406-ee25-41...,24277/cdp2bioactives_a01_s6_w3e008f0a8-1642-44...,24277/cdp2bioactives_a01_s6_w2fa2c1b40-ec9b-4d...,24277/cdp2bioactives_a01_s6_w4b328e14f-5559-4b...,24277/cdp2bioactives_a01_s6_w503cdcfe7-bb4d-4c...,1,BRD-K18250272@3.02251611288227,BRD-K18250272,3.022516


In [11]:
import warnings
warnings.filterwarnings("ignore")

# 定义超参数
save_path = "../bbbc036/"

# 读取meta文件
meta_data = pd.read_csv(os.path.join('/data/boom/cpg0019/broad/workspace_dl/metadata/BBBC036_profiling.csv'))

# 针对全部图像，逐个读取，逐个处理单个图像, 返回读取好的图像
def image_process(img_paths):
    all_img = []
    for img_path in img_paths:
        try:
            img = Image.open(img_path)

            # 分割大图像为6个子图像，每个子图像的尺寸为（160，160）
            sub_images = []
            for i in range(6):
                left = i * 160
                upper = 0
                right = left + 160
                lower = upper + 160
                sub_image = img.crop((left, upper, right, lower))
                sub_images.append(sub_image)
            
            # 按照通道叠加前面5张子图像在一起
            combined_image = np.stack(sub_images[:5], axis=0)
            resized_image = resize(combined_image, (5, 224, 224), anti_aliasing=True)
            all_img.append(resized_image)
        except FileNotFoundError:
            print(f"File not found: {img_path}")
            continue
    
    # 拼接all_img的所有数据，成为维度(B, 5, 224, 224)
    if all_img:
        all_img_tensor = torch.tensor(np.array(all_img)).float()
        return all_img_tensor
    else:
        return torch.empty(0)

# 加载模型
MODEL_PATH = "recursionpharma/OpenPhenom"
model = MAEModel.from_pretrained(MODEL_PATH).cuda()
# img_embeddings = get_image_embeddings(MODEL_PATH, model, batch_size=1)  # change batch_size to fit your device
# features = img_embeddings.cpu().numpy()

model.eval()
print("Finished loading model")

def model_eval(batch_imgs):
    model = MAEModel.from_pretrained(MODEL_PATH).cuda()
    model.return_channelwise_embeddings = False
    image_embeddings = model.predict(batch_imgs.cuda())
    
    return image_embeddings

def get_png_filenames(directory):
    # 支持的图像文件扩展名
    image_extension = '.png'
    image_filenames = []

    try:
        # 遍历目录中的文件（不包括子目录）
        for file in os.listdir(directory):
            # 检查文件扩展名是否为PNG
            if file.lower().endswith(image_extension):
                image_filenames.append(os.path.join(directory, file))
    except FileNotFoundError:
        pass
        # print(f"Directory not found: {directory}")
    
    return image_filenames

# 存储所有site的处理结果
features = []

# 遍历每个plate
for plate in tqdm(meta_data["Metadata_Plate"].unique()):
    print(plate)
    m1 = meta_data["Metadata_Plate"] == plate
    wells = meta_data[m1]["Metadata_Well"].unique()
    
    # 遍历每个well
    for well in wells:
        # print(well)
        result = meta_data.query(f"Metadata_Plate == {plate} and Metadata_Well == '{well}'")
        
        # 遍历每个site
        for site in result["Metadata_Site"].unique():
            # 读取 site-level 的所有图像，然后处理成 embedding
            img_path = f'/data/boom/cpg0019/broad/training_images/BBBC036/{plate}/{well}/{site}'
            img_paths = get_png_filenames(img_path)
            if not img_paths:  # 检查是否为空列表
                continue
            input_img = image_process(img_paths)
            if input_img.numel() == 0:  # 检查是否为空张量
                continue
            with torch.no_grad():
                image_embeddings = model_eval(input_img)
            # 改代码：将image_embeddings放到img_path的路径下面，存为 .npy 文件
            # features.append(image_embeddings.cpu())

            # 将image_embeddings放到img_path的路径下面，存为 .npy 文件
            # /data/boom/cpg0019/broad/workspace_dl/embeddings/105281_zenodo7114558/BBBC022/20585/A01/1/
            output_path = os.path.join(img_path.replace('training_images','workspace_dl/embeddings/105281_zenodo7114558'), "Phenom_embeddings.npy")
            np.save(output_path, image_embeddings.cpu().numpy())
            
            # 清理显存
            torch.cuda.empty_cache()
#             break
#         break
#     break
# print(output_path, image_embeddings.shape)

Finished loading model


  0%|          | 0/55 [00:00<?, ?it/s]

24277


  2%|▏         | 1/55 [12:31<11:16:01, 751.13s/it]

24279


  4%|▎         | 2/55 [25:58<11:32:39, 784.14s/it]

24293


  5%|▌         | 3/55 [40:04<11:44:00, 812.31s/it]

24295


  7%|▋         | 4/55 [53:31<11:28:40, 810.20s/it]

24297


  9%|▉         | 5/55 [1:07:44<11:28:02, 825.64s/it]

24301


 11%|█         | 6/55 [1:19:17<10:37:25, 780.53s/it]

24303


 13%|█▎        | 7/55 [1:30:14<9:52:13, 740.29s/it] 

24305


 15%|█▍        | 8/55 [1:42:24<9:37:22, 737.08s/it]

24307


 16%|█▋        | 9/55 [1:55:00<9:29:34, 742.92s/it]

24309


 18%|█▊        | 10/55 [2:07:15<9:15:23, 740.52s/it]

24311


 20%|██        | 11/55 [2:20:37<9:16:48, 759.28s/it]

24313


 22%|██▏       | 12/55 [2:29:20<8:12:35, 687.33s/it]

24320


 24%|██▎       | 13/55 [2:38:10<7:27:53, 639.85s/it]

24352


 25%|██▌       | 14/55 [2:47:18<6:58:07, 611.88s/it]

25937


 27%|██▋       | 15/55 [2:54:09<6:07:40, 551.52s/it]

25939


 29%|██▉       | 16/55 [3:03:13<5:57:02, 549.28s/it]

25944


 31%|███       | 17/55 [3:09:43<5:17:29, 501.31s/it]

25949


 33%|███▎      | 18/55 [3:16:31<4:51:54, 473.36s/it]

25962


 35%|███▍      | 19/55 [3:23:25<4:33:15, 455.43s/it]

25966


 36%|███▋      | 20/55 [3:31:02<4:25:53, 455.83s/it]

25968


 38%|███▊      | 21/55 [3:39:22<4:25:45, 469.00s/it]

25984


 40%|████      | 22/55 [3:47:15<4:18:38, 470.26s/it]

25986


 42%|████▏     | 23/55 [3:56:18<4:22:26, 492.09s/it]

25988


 44%|████▎     | 24/55 [4:04:16<4:12:03, 487.85s/it]

25990


 45%|████▌     | 25/55 [4:11:00<3:51:26, 462.89s/it]

25992


 47%|████▋     | 26/55 [4:18:41<3:43:22, 462.14s/it]

26232


 49%|████▉     | 27/55 [4:26:48<3:39:09, 469.62s/it]

26247


 51%|█████     | 28/55 [4:34:42<3:32:00, 471.13s/it]

24278


 53%|█████▎    | 29/55 [4:48:49<4:12:53, 583.61s/it]

24280


 55%|█████▍    | 30/55 [5:04:46<4:49:53, 695.74s/it]

24294


 56%|█████▋    | 31/55 [5:21:27<5:14:59, 787.46s/it]

24296


 58%|█████▊    | 32/55 [5:42:23<5:55:40, 927.83s/it]

24300


 60%|██████    | 33/55 [6:10:46<7:05:33, 1160.61s/it]

24302


 62%|██████▏   | 34/55 [6:23:28<6:04:19, 1040.95s/it]

24304


 64%|██████▎   | 35/55 [6:34:54<5:11:25, 934.28s/it] 

24306


 65%|██████▌   | 36/55 [6:46:30<4:33:12, 862.77s/it]

24308


 67%|██████▋   | 37/55 [6:58:58<4:08:34, 828.58s/it]

24310


 69%|██████▉   | 38/55 [7:12:24<3:52:49, 821.74s/it]

24312


 71%|███████   | 39/55 [7:25:28<3:36:04, 810.26s/it]

24319


 73%|███████▎  | 40/55 [7:33:12<2:56:37, 706.52s/it]

24321


 75%|███████▍  | 41/55 [7:42:19<2:33:39, 658.56s/it]

24357


 76%|███████▋  | 42/55 [7:51:10<2:14:25, 620.41s/it]

25938


 78%|███████▊  | 43/55 [7:59:28<1:56:45, 583.80s/it]

25943


 80%|████████  | 44/55 [8:07:46<1:42:18, 558.07s/it]

25945


 82%|████████▏ | 45/55 [8:13:49<1:23:12, 499.27s/it]

25955


 84%|████████▎ | 46/55 [8:20:00<1:09:07, 460.83s/it]

25965


 85%|████████▌ | 47/55 [8:26:32<58:42, 440.37s/it]  

25967


 87%|████████▋ | 48/55 [8:33:45<51:05, 437.98s/it]

25983


 89%|████████▉ | 49/55 [8:40:25<42:40, 426.71s/it]

25985


 91%|█████████ | 50/55 [8:48:40<37:15, 447.06s/it]

25987


 93%|█████████▎| 51/55 [8:56:26<30:11, 452.75s/it]

25989


 95%|█████████▍| 52/55 [9:02:53<21:39, 433.02s/it]

25991


 96%|█████████▋| 53/55 [9:09:06<13:50, 415.06s/it]

26224


 98%|█████████▊| 54/55 [9:16:27<07:02, 422.87s/it]

26239


100%|██████████| 55/55 [9:23:35<00:00, 614.82s/it]


In [9]:
features.shape

(119736, 384)

# 4. Test BBBC037 dataset in end to end pipeline

In [None]:
import warnings
warnings.filterwarnings("ignore")

# 定义超参数
save_path = "../bbbc037/"

# 读取meta文件
meta_data = pd.read_csv(os.path.join('/data/boom/cpg0019/broad/workspace_dl/metadata/BBBC037_profiling.csv'))

# 针对全部图像，逐个读取，逐个处理单个图像, 返回读取好的图像
def image_process(img_paths):
    all_img = []
    for img_path in img_paths:
        try:
            img = Image.open(img_path)

            # 分割大图像为6个子图像，每个子图像的尺寸为（160，160）
            sub_images = []
            for i in range(6):
                left = i * 160
                upper = 0
                right = left + 160
                lower = upper + 160
                sub_image = img.crop((left, upper, right, lower))
                sub_images.append(sub_image)
            
            # 按照通道叠加前面5张子图像在一起
            combined_image = np.stack(sub_images[:5], axis=0)
            resized_image = resize(combined_image, (5, 224, 224), anti_aliasing=True)
            all_img.append(resized_image)
        except FileNotFoundError:
            print(f"File not found: {img_path}")
            continue
    
    # 拼接all_img的所有数据，成为维度(B, 5, 224, 224)
    if all_img:
        all_img_tensor = torch.tensor(np.array(all_img)).float()
        return all_img_tensor
    else:
        return torch.empty(0)

# 加载模型
MODEL_PATH = "recursionpharma/OpenPhenom"
model = MAEModel.from_pretrained(MODEL_PATH).cuda()
# img_embeddings = get_image_embeddings(MODEL_PATH, model, batch_size=1)  # change batch_size to fit your device
# features = img_embeddings.cpu().numpy()

model = MAEModel.from_pretrained(MODEL_PATH).cuda().eval()
print("Finished loading model")
    
def model_eval(batch_imgs):
    model.return_channelwise_embeddings = False
    image_embeddings = model.predict(batch_imgs.cuda())
    
    return image_embeddings

def get_png_filenames(directory):
    # 支持的图像文件扩展名
    image_extension = '.png'
    image_filenames = []

    try:
        # 遍历目录中的文件（不包括子目录）
        for file in os.listdir(directory):
            # 检查文件扩展名是否为PNG
            if file.lower().endswith(image_extension):
                image_filenames.append(os.path.join(directory, file))
    except FileNotFoundError:
        pass
        # print(f"Directory not found: {directory}")
    
    return image_filenames

# 存储所有site的处理结果
features = []

# 遍历每个plate
for plate in tqdm(meta_data["Metadata_Plate"].unique()):
    # print(plate)
    m1 = meta_data["Metadata_Plate"] == plate
    wells = meta_data[m1]["Metadata_Well"].unique()
    
    # 遍历每个well
    for well in wells:
        # print(well)
        result = meta_data.query(f"Metadata_Plate == {plate} and Metadata_Well == '{well}'")
        
        # 遍历每个site
        for site in result["Metadata_Site"].unique():
            # 读取 site-level 的所有图像，然后处理成 embedding
            img_path = f'/data/boom/cpg0019/broad/training_images/BBBC037/{plate}/{well}/{site}'
            img_paths = get_png_filenames(img_path)
            if not img_paths:  # 检查是否为空列表
                continue
            input_img = image_process(img_paths)
            if input_img.numel() == 0:  # 检查是否为空张量
                continue
            with torch.no_grad():
                # print(input_img.shape) [38, 5, 224, 224]
                image_embeddings = model_eval(input_img)
            # 改代码：将image_embeddings放到img_path的路径下面，存为 .npy 文件
            # features.append(image_embeddings.cpu())

            # 将image_embeddings放到img_path的路径下面，存为 .npy 文件
            # /data/boom/cpg0019/broad/workspace_dl/embeddings/105281_zenodo7114558/BBBC022/20585/A01/1/
            output_path = os.path.join(img_path.replace('training_images','workspace_dl/embeddings/105281_zenodo7114558'), "Phenom_embeddings.npy")
            np.save(output_path, image_embeddings.cpu().numpy())
            
            # 清理显存
            torch.cuda.empty_cache()
#             break
#         break
#     break
# print(output_path, image_embeddings.shape)