## 数据预处理笔记

In [2]:
import clip
from sympy import Q 
import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from PIL import Image
import numpy as np
from typing import Any, Dict, Optional, Tuple, Union
import open_clip
import copy
import torch.nn as nn
import torch.functional as F
from torchvision.transforms import InterpolationMode

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
import clip
# from sympy import Q 
import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from PIL import Image
import numpy as np
from typing import Any, Dict, Optional, Tuple, Union
import open_clip
import copy
import torch.nn as nn
import torch.functional as F
from torchvision.transforms import InterpolationMode

try:
    BICUBIC = InterpolationMode.BICUBIC
except ImportError:
    BICUBIC = Image.BICUBIC

def _convert_image_to_rgb(image):
    return image.convert("RGB")

def _transform(n_px):
    return Compose([
        Resize(n_px, interpolation=BICUBIC),
        CenterCrop(n_px),
        _convert_image_to_rgb,
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])



def CLIP_Process(image_path, dest):
    img = Image.open(image_path)
    a = 224
    b = _transform(a)
    c = b(img)
    if ((dest.split(".")[-1]) != "pth"):
      dest+=".pth"
      
    torch.save(c, dest)
    return c



OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)

_FIELDS = '__dataclass_fields__'
def _is_dataclass_instance(obj):
    """Returns True if obj is an instance of a dataclass."""
    return hasattr(type(obj), _FIELDS)

def asdict(obj, *, dict_factory=dict):
    """Return the fields of a dataclass instance as a new dictionary mapping
    field names to field values.

    Example usage:

      @dataclass
      class C:
          x: int
          y: int

      c = C(1, 2)
      assert asdict(c) == {'x': 1, 'y': 2}

    If given, 'dict_factory' will be used instead of built-in dict.
    The function applies recursively to field values that are
    dataclass instances. This will also look into built-in containers:
    tuples, lists, and dicts.
    """
    if not _is_dataclass_instance(obj):
        raise TypeError("asdict() should be called on dataclass instances")
    return _asdict_inner(obj, dict_factory)


def _asdict_inner(obj, dict_factory):
    if _is_dataclass_instance(obj):
        result = []
        for f in fields(obj):
            value = _asdict_inner(getattr(obj, f.name), dict_factory)
            result.append((f.name, value))
        return dict_factory(result)
    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
        # obj is a namedtuple.  Recurse into it, but the returned
        # object is another namedtuple of the same type.  This is
        # similar to how other list- or tuple-derived classes are
        # treated (see below), but we just need to create them
        # differently because a namedtuple's __init__ needs to be
        # called differently (see bpo-34363).

        # I'm not using namedtuple's _asdict()
        # method, because:
        # - it does not recurse in to the namedtuple fields and
        #   convert them to dicts (using dict_factory).
        # - I don't actually want to return a dict here.  The main
        #   use case here is json.dumps, and it handles converting
        #   namedtuples to lists.  Admittedly we're losing some
        #   information here when we produce a json list instead of a
        #   dict.  Note that if we returned dicts here instead of
        #   namedtuples, we could no longer call asdict() on a data
        #   structure where a namedtuple was used as a dict key.

        return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
    elif isinstance(obj, (list, tuple)):
        # Assume we can create an object of this type by passing in a
        # generator (which is not true for namedtuples, handled
        # above).
        return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
    elif isinstance(obj, dict):
        return type(obj)((_asdict_inner(k, dict_factory),
                          _asdict_inner(v, dict_factory))
                         for k, v in obj.items())
    else:
        return copy.deepcopy(obj)

class AugmentationCfg:
    scale: Tuple[float, float] = (0.9, 1.0)
    ratio: Optional[Tuple[float, float]] = None
    color_jitter: Optional[Union[float, Tuple[float, float, float]]] = None
    interpolation: Optional[str] = None
    re_prob: Optional[float] = None
    re_count: Optional[int] = None
    use_timm: bool = False

class ResizeMaxSize(nn.Module):

    def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, fn='max', fill=0):
        super().__init__()
        if not isinstance(max_size, int):
            raise TypeError(f"Size should be int. Got {type(max_size)}")
        self.max_size = max_size
        self.interpolation = interpolation
        self.fn = min if fn == 'min' else min
        self.fill = fill

    def forward(self, img):
        if isinstance(img, torch.Tensor):
            height, width = img.shape[:2]
        else:
            width, height = img.size
        scale = self.max_size / float(max(height, width))
        new_size = tuple(round(dim * scale) for dim in (height, width))
        if scale != 1.0:
            img = F.resize(img, new_size, self.interpolation)
        if not width == height:
            pad_h = self.max_size - new_size[0]
            pad_w = self.max_size - new_size[1]
            img = F.pad(img, padding=[pad_w//2, pad_h//2, pad_w - pad_w//2, pad_h - pad_h//2], fill=self.fill)
        return img


def image_transform(
        image_size: int,
        is_train:bool = False,
        mean: Optional[Tuple[float, ...]] = None,
        std: Optional[Tuple[float, ...]] = None,
        resize_longest_max: bool = False,
        fill_color: int = 0,
        aug_cfg: Optional[Union[Dict[str, Any], AugmentationCfg]] = None,
):
    mean = mean or OPENAI_DATASET_MEAN
    if not isinstance(mean, (list, tuple)):
        mean = (mean,) * 3

    std = std or OPENAI_DATASET_STD
    if not isinstance(std, (list, tuple)):
        std = (std,) * 3

    if isinstance(image_size, (list, tuple)) and image_size[0] == image_size[1]:
        # for square size, pass size as int so that Resize() uses aspect preserving shortest edge
        image_size = image_size[0]

    if isinstance(aug_cfg, dict):
        aug_cfg = AugmentationCfg(**aug_cfg)
    else:
        aug_cfg = aug_cfg or AugmentationCfg()
    normalize = Normalize(mean=mean, std=std)
    if is_train:
        raise NotImplemented("!!LDS!!")
        # aug_cfg_dict = {k: v for k, v in asdict(aug_cfg).items() if v is not None}
        # use_timm = aug_cfg_dict.pop('use_timm', False)
        # if use_timm:
        #     from timm.data import create_transform  # timm can still be optional
        #     if isinstance(image_size, (tuple, list)):
        #         assert len(image_size) >= 2
        #         input_size = (3,) + image_size[-2:]
        #     else:
        #         input_size = (3, image_size, image_size)
        #     # by default, timm aug randomly alternates bicubic & bilinear for better robustness at inference time
        #     aug_cfg_dict.setdefault('interpolation', 'random')
        #     aug_cfg_dict.setdefault('color_jitter', None)  # disable by default
        #     train_transform = create_transform(
        #         input_size=input_size,
        #         is_training=True,
        #         hflip=0.,
        #         mean=mean,
        #         std=std,
        #         re_mode='pixel',
        #         **aug_cfg_dict,
        #     )
        # else:
        #     train_transform = Compose([
        #         RandomResizedCrop(
        #             image_size,
        #             scale=aug_cfg_dict.pop('scale'),
        #             interpolation=InterpolationMode.BICUBIC,
        #         ),
        #         _convert_to_rgb,
        #         ToTensor(),
        #         normalize,
        #     ])
        #     if aug_cfg_dict:
        #         warnings.warn(f'Unused augmentation cfg items, specify `use_timm` to use ({list(aug_cfg_dict.keys())}).')
        # return train_transform
    else:
        if resize_longest_max:
            transforms = [
                ResizeMaxSize(image_size, fill=fill_color)
            ]
        else:
            transforms = [
                Resize(image_size, interpolation=InterpolationMode.BICUBIC),
                CenterCrop(image_size),
            ]
        transforms.extend([
            _convert_image_to_rgb,
            ToTensor(),
            normalize,
        ])
        return Compose(transforms)


def BiomedCLIP_processor(image_path, dest):
    img = Image.open(image_path)
    preprocess_val = image_transform(224)
    data = preprocess_val(img)
    if ((dest.split(".")[-1]) != "pth"):
      dest+=".pth"
      
    torch.save(data, dest)
    return data
   
  

if  __name__ == "__main__":
  image_path = r"D:\project_x_ray_CLIP\data\physionet.org\files\mimic-cxr-jpg\2.0.0\files\p10\p10000898\s54205396\9e7a6aae-2580e589-6212d336-9813ebbd-a9239a34.jpg"
  img = Image.open(image_path)
  process = BiomedCLIP_processor(image_path, r'D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\output/biomed_tensor_var.pth')
  # print(process)
  process2 = CLIP_Process(image_path, r'D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\output/_tensor_var.pth')
  

  # a = 224
  # b = _transform(a)
  # print((process == process2).sum())
  # print(3*224*224)


  # clip_model, preprocess_train, clip_processor = open_clip.create_model_and_transforms('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
  # clip_model2, clip_processor2  = clip.load("ViT-B/32",)

  # a = clip_processor(Image.open(image_path))
  # b = clip_processor2(Image.open(image_path))
  # print((a == b).sum())
  # print((a == process2).sum())
  # c = b(img)

  # torch.save(c, r'D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\output/tensor_var.pth')



In [11]:
from pathlib import Path
from typing import Tuple
import torch
from src.utils.health_multimodal.image.utils import ImageModelType
from src.utils.health_multimodal.image import get_image_inference
import logging
from tqdm import tqdm
import pandas as pd


class generate_img_tensor_for_biovil_t:
  def __init__(self):
    self.image_inference = get_image_inference(ImageModelType.BIOVIL_T)
    self.count = 1
    return 
  
  
  def process_row2generate_img_tensor_biovil(self, image_path, dest):
      # 在这个示例中，我们将列 'a' 中的值加上 10，然后保存到新的列 'B' 中
      raw_img = row['file_path']

      # if "D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files" in raw_img:
      #   raw_img = raw_img.replace("D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/", "/public_bme/data/lds/")   
      input_image_tensor, _ = self.image_inference.load_and_transform_input_image(Path(image_path), self.image_inference.transform)
      if ((dest.split(".")[-1]) != "pth"):
        dest+=".pth"
      
      torch.save(input_image_tensor, dest)

      return input_image_tensor
      


  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'health_multimodal' from partially initialized module 'src' (most likely due to a circular import) (/home_data/home/v-liudsh/coding/constrastive_P/diagnosisP/exchange/Fine-Grained_Features_Alignment_via_Constrastive_Learning/src/__init__.py)

----
### image data preprocessing 

In [18]:
import pandas as pd
import os 
pwd = os.getcwd()
data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_train_12_16_labels14.csv")
print(data.file_path[10])
data.head()


D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10000898/s50771383//0c4eb1e1-b801903c-bcebe8a4-3da9cd3c-3b94a27c.jpg


Unnamed: 0.1,Unnamed: 0,study_id,label,img_path,train_label,file_path,split,Biomed_img_tensor_path,Clip_img_tensor_path,BiomedClip_img_tensor_path,train_14_labels
0,0,50414267,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,train,D:/project_x_ray_CLIP/data/physionet.org/files...,/public_bme/data/lds/p10/p10000032/s50414267/0...,/public_bme/data/lds/p10/p10000032/s50414267/0...,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
1,1,50414267,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,train,D:/project_x_ray_CLIP/data/physionet.org/files...,/public_bme/data/lds/p10/p10000032/s50414267/1...,/public_bme/data/lds/p10/p10000032/s50414267/1...,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
2,2,53189527,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,train,D:/project_x_ray_CLIP/data/physionet.org/files...,/public_bme/data/lds/p10/p10000032/s53189527/2...,/public_bme/data/lds/p10/p10000032/s53189527/2...,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
3,3,53189527,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,train,D:/project_x_ray_CLIP/data/physionet.org/files...,/public_bme/data/lds/p10/p10000032/s53189527/e...,/public_bme/data/lds/p10/p10000032/s53189527/e...,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
4,4,53911762,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,train,D:/project_x_ray_CLIP/data/physionet.org/files...,/public_bme/data/lds/p10/p10000032/s53911762/6...,/public_bme/data/lds/p10/p10000032/s53911762/6...,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"


In [17]:
from pathlib import Path
import os

file_path = data.file_path
filename = file_path[0]
print(filename)

def change_suffix(filename, custom = ""):
    # path_obj = Path(filename)
    # filename = path_obj.stem
    # # print(filename)
    # # 构建新的文件路径
    # new_file_path = path_obj.with_suffix(new_suffix)
    # print(new_file_path)
    modified_string = filename.replace("//", "/")
    modified_string = modified_string.replace("D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/", "/public_bme/data/lds/")
    filename, file_extension = os.path.splitext(modified_string)
    # print(filename)
    return filename+f"_{custom}"+".pth"
# data['Biomed_img_tensor_path'] = data['file_path'].apply(lambda x: change_suffix(x, 'biomed'))
# data['Clip_img_tensor_path'] = data['file_path'].apply(lambda x: change_suffix(x, "clip"))
data['Biovil_img_tensor_path'] = data['file_path'].apply(lambda x: change_suffix(x, "biovil"))
data.to_csv(pwd + r"/data/mimic-cxr-train/P10_12_train_1_29_labels14_biovil.csv", index=False)
data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_train_1_29_labels14_biovil.csv", index_col=0)
data

D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10032725/s50331901//687754ce-7420bfd3-0a19911f-a27a3916-9019cd53.jpg


Unnamed: 0_level_0,train_label,file_path,split,BiomedClip_img_tensor_path,Clip_img_tensor_path,sid,test_14_labels,Biovil_img_tensor_path
img_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10032725/s50331901/,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,50331901,"[2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]",/public_bme/data/lds/p10/p10032725/s50331901/6...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10032725/s55504914/,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,55504914,"[2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]",/public_bme/data/lds/p10/p10032725/s55504914/f...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10046166/s50051329/,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]",/public_bme/data/lds/p10/p10046166/s50051329/4...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10046166/s50051329/,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]",/public_bme/data/lds/p10/p10046166/s50051329/a...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10046166/s51738740/,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,51738740,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]",/public_bme/data/lds/p10/p10046166/s51738740/3...
...,...,...,...,...,...,...,...,...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p12//p12963531/s59505688/,"[2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,59505688,"[2, 0, 2, 0, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2]",/public_bme/data/lds/p12/p12963531/s59505688/4...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p12//p12963531/s59505688/,"[2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,59505688,"[2, 0, 2, 0, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2]",/public_bme/data/lds/p12/p12963531/s59505688/5...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p12//p12966004/s55553875/,"[2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,55553875,"[2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 2, 2]",/public_bme/data/lds/p12/p12966004/s55553875/d...
D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p12//p12966004/s57399078/,"[2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,D:/project_x_ray_CLIP/data/physionet.org/files...,D:/project_x_ray_CLIP/data/physionet.org/files...,57399078,"[2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 1, 0]",/public_bme/data/lds/p12/p12966004/s57399078/8...


In [4]:
data = data.head()
print(data.iloc[0].Biovil_img_tensor_path)   

D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014_biovil.pth


In [10]:
img_paths = data.file_path
tensor_path = data.BiomedClip_tensor_path
total = len(tensor_path)
print(total)
print(len(tensor_path), len(img_paths))
dev = total // 10
count = 0
# for i, j in enumerate(img_paths):
#   print(type(j), j,  tensor_path[i])
#   CLIP_Process(str(j), tensor_path[i])
#   if i % dev == 0:
#     print(i)
#   else:
#     continue


from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
for  (img_path, tensor_path) in (zip(img_paths, tensor_path)):
  try:
    # print( type(img_path), img_path, type(ten,,l[pl-0o-or_path), tensor_path)
    BiomedCLIP_processor(img_path, tensor_path)
    if count%dev == 0:
      print(count/dev)
      print(img_path, tensor_path)
    count+=1
  except Exception as e:
    print(e)





img_paths = data.file_path
tensor_path = data.tensor_path
total = len(tensor_path)
print(total)
print(len(tensor_path), len(img_paths))
dev = total // 10
count = 0


from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
for  (img_path, tensor_path) in (zip(img_paths, tensor_path)):
  try:
    # print( type(img_path), img_path, type(ten,,l[pl-0o-or_path), tensor_path)
    CLIP_Process(img_path, tensor_path)
    if count%dev == 0:
      print(count/dev)
      print(img_path, tensor_path)
    count+=1
  except Exception as e:
    print(e)
  

  

5
5 5


NameError: name 'get_image_inference' is not defined

### testing data image process

In [13]:
import pandas as pd
import os 
pwd = os.getcwd()
data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_test_12_1.csv")
source = "/public_bme/data/lds"
data.file_path[0].replace("//", '/')



Unnamed: 0.1,Unnamed: 0,label,img_path,train_label,file_path,split,BiomedClip_img_tensor_path,Clip_img_tensor_path,sid,test_14_labels
0,0,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s50331901/6...,/public_bme/data/lds/p10/p10032725/s50331901/6...,50331901,"[2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]"
1,1,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s55504914/f...,/public_bme/data/lds/p10/p10032725/s55504914/f...,55504914,"[2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]"
2,2,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/4...,/public_bme/data/lds/p10/p10046166/s50051329/4...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
3,3,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/a...,/public_bme/data/lds/p10/p10046166/s50051329/a...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
4,4,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s51738740/3...,/public_bme/data/lds/p10/p10046166/s51738740/3...,51738740,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
...,...,...,...,...,...,...,...,...,...,...
1170,1170,"{'Atelectasis': 2, 'Cardiomegaly': 0, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12963531/s59505688/4...,/public_bme/data/lds/p12/p12963531/s59505688/4...,59505688,"[2, 0, 2, 0, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2]"
1171,1171,"{'Atelectasis': 2, 'Cardiomegaly': 0, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12963531/s59505688/5...,/public_bme/data/lds/p12/p12963531/s59505688/5...,59505688,"[2, 0, 2, 0, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2]"
1172,1172,"{'Atelectasis': 2, 'Cardiomegaly': 2, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12966004/s55553875/d...,/public_bme/data/lds/p12/p12966004/s55553875/d...,55553875,"[2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 2, 2]"
1173,1173,"{'Atelectasis': 2, 'Cardiomegaly': 2, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12966004/s57399078/8...,/public_bme/data/lds/p12/p12966004/s57399078/8...,57399078,"[2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 1, 0]"


In [13]:
def fun(x, pre):
  x = x.replace("//", '/')
  a = x.split("/")
  b = a[-4:]
  temp = pre + "/" + "/".join(b)
  return(temp)

img_paths = data.file_path
tensor_path = data.BiomedClip_img_tensor_path
total = len(tensor_path)
print(total)
print(len(tensor_path), len(img_paths))
dev = total // 10
count = 0
# for i, j in enumerate(img_paths):
#   print(type(j), j,  tensor_path[i])
#   CLIP_Process(str(j), tensor_path[i])
#   if i % dev == 0:
#     print(i)
#   else:
#     continue


from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
for  (img_path, tensor_path) in (zip(img_paths, tensor_path)):
  try:
    # print( type(img_path), img_path, type(ten,,l[pl-0o-or_path), tensor_path)
    img_path = fun(img_path, source) 
    BiomedCLIP_processor(img_path, tensor_path)
    if count%dev == 0:
      print(count/dev)
      print(img_path, tensor_path)
    count+=1
  except Exception as e:
    print(e)





img_paths = data.file_path
tensor_path = data.Clip_img_tensor_path
total = len(tensor_path)
print(total)
print(len(tensor_path), len(img_paths))
dev = total // 10
count = 0


from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
for  (img_path, tensor_path) in (zip(img_paths, tensor_path)):
  try:
    # print( type(img_path), img_path, type(ten,,l[pl-0o-or_path), tensor_path)
    img_path = fun(img_path, source)
    CLIP_Process(img_path, tensor_path)
    if count%dev == 0:
      print(count/dev)
      print(img_path, tensor_path)
    count+=1
  except Exception as e:
    print(e)
  

  

1175
1175 1175
0.0
/public_bme/data/lds/p10/p10032725/s50331901/687754ce-7420bfd3-0a19911f-a27a3916-9019cd53.jpg /public_bme/data/lds/p10/p10032725/s50331901/687754ce-7420bfd3-0a19911f-a27a3916-9019cd53_biomed.pth
1.0
/public_bme/data/lds/p10/p10439781/s51129150/1d74ca1d-12ac2785-bd84a322-376f04bc-b9fdaa99.jpg /public_bme/data/lds/p10/p10439781/s51129150/1d74ca1d-12ac2785-bd84a322-376f04bc-b9fdaa99_biomed.pth
2.0
/public_bme/data/lds/p10/p10885696/s56443683/5b429228-9769c874-369577de-11d25077-c9ad1f2b.jpg /public_bme/data/lds/p10/p10885696/s56443683/5b429228-9769c874-369577de-11d25077-c9ad1f2b_biomed.pth
3.0
/public_bme/data/lds/p10/p10975446/s55185117/0d768fcf-0bb1bca1-eb1fe1d6-686b876b-675a2e95.jpg /public_bme/data/lds/p10/p10975446/s55185117/0d768fcf-0bb1bca1-eb1fe1d6-686b876b-675a2e95_biomed.pth
4.0
/public_bme/data/lds/p11/p11293517/s51788928/d488ce83-528fa722-abe67b2b-ef58f254-0d7db9b2.jpg /public_bme/data/lds/p11/p11293517/s51788928/d488ce83-528fa722-abe67b2b-ef58f254-0d7db9b2_b

### testing data

In [1]:
import os 
import pandas as pd
pwd = os.getcwd()

data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_test_12_16_labels14.csv")
data.head()
print(data.file_path[10])
print(len(data))
data.head()

D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10046166/s56173345//da33ac9f-b047f007-dd9e0ac7-81b4a35e-bb2b6b5b.jpg
1175


Unnamed: 0.1,Unnamed: 0,label,img_path,train_label,file_path,split,BiomedClip_img_tensor_path,Clip_img_tensor_path,sid,test_14_labels
0,0,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s50331901/6...,/public_bme/data/lds/p10/p10032725/s50331901/6...,50331901,"[2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]"
1,1,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s55504914/f...,/public_bme/data/lds/p10/p10032725/s55504914/f...,55504914,"[2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]"
2,2,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/4...,/public_bme/data/lds/p10/p10046166/s50051329/4...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
3,3,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/a...,/public_bme/data/lds/p10/p10046166/s50051329/a...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"
4,4,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s51738740/3...,/public_bme/data/lds/p10/p10046166/s51738740/3...,51738740,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]"


In [2]:
from pathlib import Path
import os

file_path = data.file_path
filename = file_path[0]
print(filename)

def change_suffix(filename, custom = ""):
    # path_obj = Path(filename)
    # filename = path_obj.stem
    # # print(filename)
    # # 构建新的文件路径
    # new_file_path = path_obj.with_suffix(new_suffix)
    # print(new_file_path)
    modified_string = filename.replace("//", "/")
    modified_string = modified_string.replace("D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/", "/public_bme/data/lds/")
    filename, file_extension = os.path.splitext(modified_string)
    # print(filename)
    return filename+f"_{custom}"+".pth"
data['Biovil_img_tensor_path'] = data['file_path'].apply(lambda x: change_suffix(x, "biovil"))
data.to_csv(pwd + r"/data/mimic-cxr-train/P10_12_test_1_29_labels14_biovil.csv", index=False)
data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_test_1_29_labels14_biovil.csv", index_col=0)
data.head()


D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10//p10032725/s50331901//687754ce-7420bfd3-0a19911f-a27a3916-9019cd53.jpg


Unnamed: 0_level_0,label,img_path,train_label,file_path,split,BiomedClip_img_tensor_path,Clip_img_tensor_path,sid,test_14_labels,Biovil_img_tensor_path
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s50331901/6...,/public_bme/data/lds/p10/p10032725/s50331901/6...,50331901,"[2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]",/public_bme/data/lds/p10/p10032725/s50331901/6...
1,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s55504914/f...,/public_bme/data/lds/p10/p10032725/s55504914/f...,55504914,"[2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0]",/public_bme/data/lds/p10/p10032725/s55504914/f...
2,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/4...,/public_bme/data/lds/p10/p10046166/s50051329/4...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]",/public_bme/data/lds/p10/p10046166/s50051329/4...
3,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/a...,/public_bme/data/lds/p10/p10046166/s50051329/a...,50051329,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]",/public_bme/data/lds/p10/p10046166/s50051329/a...
4,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s51738740/3...,/public_bme/data/lds/p10/p10046166/s51738740/3...,51738740,"[2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2]",/public_bme/data/lds/p10/p10046166/s51738740/3...


### customize data path in ws (generalize)

In [None]:
data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_test_12_1.csv", index_col=0)

In [None]:
data = pd.read_csv(pwd + r"/data/mimic-cxr-train/P10_12_train_12_1.csv", index_col=0)
data.head()
print(data.Clip_img_tensor_path.iloc[0])
def fun(x, pre):
  a = x.split("/")
  b = a[-4:]
  temp = pre + "/" + "/".join(b)
  return(temp)
def generalize_path(df, loc=None, pre = None):
  df["BiomedClip_img_tensor_path"] = df["BiomedClip_img_tensor_path"].apply(lambda x: fun(x, pre))
  df["Clip_img_tensor_path"] = df["Clip_img_tensor_path"].apply(lambda x: fun(x, pre))
  df.to_csv(loc, index=False)
  
  # print(df)

  return df

generalize_path(data, pre = "/public_bme/data/lds", loc = pwd + r"/data/mimic-cxr-train/P10_12_train_12_1.csv")

D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10032725/s50331901/687754ce-7420bfd3-0a19911f-a27a3916-9019cd53_clip.pth


Unnamed: 0_level_0,label,img_path,train_label,file_path,split,BiomedClip_img_tensor_path,Clip_img_tensor_path
study_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
50331901,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s50331901/6...,/public_bme/data/lds/p10/p10032725/s50331901/6...
55504914,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10032725/s55504914/f...,/public_bme/data/lds/p10/p10032725/s55504914/f...
50051329,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/4...,/public_bme/data/lds/p10/p10046166/s50051329/4...
50051329,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s50051329/a...,/public_bme/data/lds/p10/p10046166/s50051329/a...
51738740,"{'Atelectasis': 1, 'Cardiomegaly': 1, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p10/p10046166/s51738740/3...,/public_bme/data/lds/p10/p10046166/s51738740/3...
...,...,...,...,...,...,...,...
59505688,"{'Atelectasis': 2, 'Cardiomegaly': 0, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12963531/s59505688/4...,/public_bme/data/lds/p12/p12963531/s59505688/4...
59505688,"{'Atelectasis': 2, 'Cardiomegaly': 0, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12963531/s59505688/5...,/public_bme/data/lds/p12/p12963531/s59505688/5...
55553875,"{'Atelectasis': 2, 'Cardiomegaly': 2, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12966004/s55553875/d...,/public_bme/data/lds/p12/p12966004/s55553875/d...
57399078,"{'Atelectasis': 2, 'Cardiomegaly': 2, 'Consoli...",D:/project_x_ray_CLIP/data/physionet.org/files...,"[2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 1]",D:/project_x_ray_CLIP/data/physionet.org/files...,test,/public_bme/data/lds/p12/p12966004/s57399078/8...,/public_bme/data/lds/p12/p12966004/s57399078/8...


In [34]:
len(data)

1175

In [17]:


img_paths = data.file_path
tensor_path = data.tensor_path
total = len(tensor_path)
print(len(tensor_path), len(img_paths))
dev = total // 10
count = 0
# for i, j in enumerate(img_paths):
#   print(type(j), j,  tensor_path[i])
#   CLIP_Process(str(j), tensor_path[i])
#   if i % dev == 0:
#     print(i)
#   else:
#     continue


from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
for  (img_path, tensor_path) in (zip(img_paths, tensor_path)):
  try:
    # print( type(img_path), img_path, type(ten,,l[pl-0o-or_path), tensor_path)
    CLIP_Process(img_path, tensor_path)
    if count%dev == 0:
      print(count/dev)
      print(img_path, tensor_path)
    count+=1
  except Exception as e:
    print(e)
    
    
    
img_paths = data.file_path
tensor_path = data.BiomedClip_tensor_path
total = len(tensor_path)
print(total)
print(len(tensor_path), len(img_paths))
dev = total // 10
count = 0
# for i, j in enumerate(img_paths):
#   print(type(j), j,  tensor_path[i])
#   CLIP_Process(str(j), tensor_path[i])
#   if i % dev == 0:
#     print(i)
#   else:
#     continue


from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
for  (img_path, tensor_path) in (zip(img_paths, tensor_path)):
  try:
    # print( type(img_path), img_path, type(ten,,l[pl-0o-or_path), tensor_path)
    BiomedCLIP_processor(img_path, tensor_path)
    if count%dev == 0:
      print(count/dev)
      print(img_path, tensor_path)
    count+=1
  except Exception as e:
    print(e)

AttributeError: 'DataFrame' object has no attribute 'tensor_path'

In [None]:
(data["tensor_path"][1461])

'D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10032725/s50331901/687754ce-7420bfd3-0a19911f-a27a3916-9019cd53.pth'

## Prompt preprocess

<font color=red size=5>convert textual prompts into tensors</font>

In [3]:
import sys
sys.path.append('..')  # 将上级目录添加到sys.path中
from constants import BASIC_PROMPT
import os
import clip
import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from PIL import Image
import numpy as np
from typing import Any, Dict, Optional, Tuple, Union
import open_clip
import copy
import torch.nn as nn
import torch.functional as F
from torchvision.transforms import InterpolationMode
# print(f'\033[31mthe type of text_inputs : {type(text_inputs)}\033[0m')


class Prompt_preprocess(nn.Module):
    def __init__(self, text_embedding_dim = 512, num_transformer_heads = 8, num_transformer_layers = 6, proj_bias = False, nntype = None, prompt_type = "basic"):
        super().__init__()
        # 初始化 CLIP 预训练模型和处理器
        self.projection_head = nn.Linear(512, 512, bias=False)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.prompt = prompt_type
        if nntype == None:
            self.backbone = "clip"
        else:
          self.backbone = nntype        
        if self.backbone in ["biomed", "BiomedCLIP", "biomedclip"]:
            import open_clip
            self.clip_model, _, _ = open_clip.create_model_and_transforms('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
            self.tokenizer = open_clip.get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
        elif self.backbone == "custom":
            raise NotImplemented("has not implemented the custom backbone in text branch")
        else:
            ## the default backbone is CLIP -- text encoder
            self.clip_model, self.clip_processor  = clip.load("ViT-B/32", device=self.device)
        # 冻结 CLIP 部分的参数
        if self.backbone != "custom":
          for param in self.clip_model.parameters():
              param.requires_grad = False
        # text orthogonal 部分
    
    def process(self, save = False, path = r"D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\data\prompts_tensors\basic"):
      text_features = []
      with torch.no_grad():
          for text_input in BASIC_PROMPT:
            print(">>>>>>>>>>>>>>>>>>>",text_input)
            if self.backbone in ["biomed", "BiomedCLIP", "biomedclip"]:
                context_length = 256
                self.clip_model.to(self.device)
                self.clip_model.eval()
                # print(self.tokenizer(text_input, context_length=context_length).cuda())
                _, text_feature, _= self.clip_model(None, self.tokenizer(text_input, context_length=context_length).cuda())
                # text_feature = torch.tensor(text_feature)
                text_feature = text_feature.clone().detach()
                text_feature /= text_feature.norm(dim=-1, keepdim=True)
                text_features.append(text_feature)                  
            else:
                text_feature = self.clip_model.encode_text(clip.tokenize(text_input).cuda()).cuda().float()
                text_feature /= text_feature.norm(dim=-1, keepdim=True)
                text_features.append(text_feature)
      # text-features shape - [batch, num of text, dim]
      # print(len(text_features), torch.tensor(text_features[0]).shape)
      text_features = torch.stack(text_features, dim = 0).squeeze()
      if save:
        folder = os.path.exists(path)
         
        if not folder:                   #判断是否存在文件夹如果不存在则创建为文件夹
          os.makedirs(path)            #makedirs 创建文件时如果路径不存在会创建这个路径
        pth_path = path +"/" + self.backbone + "_" + self.prompt + ".pt"
        torch.save(text_features, pth_path)
          
test = Prompt_preprocess(nntype="clip")
test.process(save=True)

>>>>>>>>>>>>>>>>>>> images for Atelectasis
>>>>>>>>>>>>>>>>>>> images for Cardiomegaly
>>>>>>>>>>>>>>>>>>> images for Consolidation
>>>>>>>>>>>>>>>>>>> images for Edema
>>>>>>>>>>>>>>>>>>> images for Enlarged Cardiomediastinum
>>>>>>>>>>>>>>>>>>> images for Fracture
>>>>>>>>>>>>>>>>>>> images for Lung Lesion
>>>>>>>>>>>>>>>>>>> images for Lung Opacity
>>>>>>>>>>>>>>>>>>> images for No Finding
>>>>>>>>>>>>>>>>>>> images for Pleural Effusion
>>>>>>>>>>>>>>>>>>> images for Pleural Other
>>>>>>>>>>>>>>>>>>> images for Pneumonia
>>>>>>>>>>>>>>>>>>> images for Pneumothorax
>>>>>>>>>>>>>>>>>>> images for Support Devices


In [4]:
tensor = torch.load(r"D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\data\prompts_tensors\basic\clip_basic.pt")
print(tensor.shape)
tensor1 = torch.load(r"D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\data\prompts_tensors\basic\biomedclip_basic.pt")
print(tensor1.shape)

torch.Size([14, 512])
torch.Size([14, 512])


-----
# csv 数据转换

In [None]:
import pandas as pd
train = pd.read_csv(r"D:\exchange\ShanghaiTech\learning\code\diagnosisP\x_ray_constrastive\data\mimic-cxr-train\P10_12_train_11_19.csv", index_col=0)
print(train.head().iloc[0].tensor_path)    

def replace(x):
  tensor_path = x.replace("D:/project_x_ray_CLIP/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/", "/public_bme/data/lds/")
  temp = tensor_path.split(".")
  clip = temp[0]+ "_clip"
  biomed = temp[0] + "_biomed"
  clip = ".".join(clip)
  biomed = ".".join(biomed)
  return clip, biomed

train["ws_tensor_path"] , train["ws_biomed_tensor_path"]= train.tensor_path.apply(lambda x: replace(x))

train
  