In [1]:
import pycocotools.mask as mask_util
from datasets.severstal_coco import DatasetCOCO
from torchvision import transforms

from datasets.utils import blend_image_segmentation
from models.clipseg import CLIPDensePredT
import torch
from PIL import Image
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

In [2]:
data_path = '/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/train_subimages'
weights = '/home/eas/Enol/pycharm_projects/clipseg_steel_defect/logs/rd64-7K-vit16-cbh-coco-enol-5classes_no_neg/weights.pth'
mean = [0.34388125, 0.34388125, 0.34388125]
std = [0.13965334, 0.13965334, 0.13965334]
image_size = 256
COCO_CLASSES = {1: 'network of fine, hairline cracks or fissures on the surface of the steel',
                2: 'scale—oxides embedded into the steel plate',
                3: 'shallow, narrow grooves or lines on the surface of the steel',
                4: 'impurity or foreign material embedded within the steel matrix',
                5: 'defects on a steel plate'}
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
COCO_CLASSES = {1: 'network of fine, hairline cracks or fissures on the surface of the steel',
                2: 'scale—oxides embedded into the steel plate',
                3: 'shallow, narrow grooves or lines on the surface of the steel',
                4: 'impurity or foreign material embedded within the steel matrix',
                5: 'defects on a steel plate'}
model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64)
model.load_state_dict(torch.load(weights, weights_only=True, map_location=torch.device('cpu')), strict=False)
model.cuda()
model.eval()
split = 'train'

In [3]:
df_queries = pd.DataFrame(columns = ['class_id', 'embedding'])
df_text_support = pd.DataFrame(columns = ['class_id', 'embedding'])
df_visual_support = pd.DataFrame(columns = ['class_id', 'embedding'])

In [4]:
coco_dataset = DatasetCOCO(data_path, transform, split, True)

loading annotations into memory...
Done (t=0.54s)
creating index...
index created!


In [5]:
class_ids = coco_dataset.ids_by_class
metadata = coco_dataset.img_metadata

In [6]:
idx = 0
for i in class_ids:
    class_id = i
    text_support = COCO_CLASSES[class_id]
    text_cond = model.compute_conditional([text_support])
    df_text_support.loc[i] = [class_id, text_cond.squeeze(0).cpu().numpy().tolist()]
    for id in tqdm(class_ids[class_id]):
        ann = metadata.loadAnns(ids=id)[0]
        image = Image.open(os.path.join(data_path, ann['image_id']))
        q = transform(image).unsqueeze(0)
        q = q.cuda()
        visual_q, _, _ = model.visual_forward(q)
        df_queries.loc[idx] = [class_id, visual_q.squeeze(0).cpu().numpy().tolist()]
        if class_id != 5:
            mask = mask_util.decode(ann['segmentation'])
            s = transform(image)
            s_blend = blend_image_segmentation(s, mask, mode='crop_blur_highlight256', image_size=256)
            s_tensor = torch.Tensor(s_blend[0]).unsqueeze(0).cuda()
            visual_s_cond, _, _ = model.visual_forward(s_tensor)
            df_visual_support.loc[idx] = [class_id, visual_s_cond.squeeze(0).cpu().numpy().tolist()]
        idx += 1


100%|██████████| 1296/1296 [00:40<00:00, 32.06it/s]
100%|██████████| 251/251 [00:07<00:00, 33.39it/s]
100%|██████████| 9846/9846 [05:12<00:00, 31.49it/s]
100%|██████████| 1596/1596 [00:52<00:00, 30.18it/s]
100%|██████████| 48985/48985 [14:02<00:00, 58.14it/s]


In [7]:
df_queries.to_csv('../Severstal/train_embeddings.csv')
df_text_support.to_csv('../Severstal/text_embeddings.csv')
df_visual_support.to_csv('../Severstal/visual_support_embeddings.csv')