In [1]:
from transformers import OneFormerProcessor, OneFormerForUniversalSegmentation
from PIL import Image
import pandas as pd
from tqdm import tqdm
import numpy as np

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_ade20k_dinat_large")
model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_ade20k_dinat_large")

ImportError: 
DinatBackbone requires the natten library but it was not found in your environment. You can install it by referring to:
shi-labs.com/natten . You can also install it with pip (may take longer to build):
`pip install natten`. Please note that you may need to restart your runtime after installation.


In [None]:
df = pd.read_csv('test.csv')
df.head()

In [None]:
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
ade20k_to_12 = {
    0: [6],
    1: [11, 53],
    2: [0, 1, 32],
    3: [32],
    4: [87],
    5: [136],
    6: [43],
    7: [4, 17],
    8: [2],
    9: [12],
    10: [],
    11: [20, 80, 116]
}

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
result = []
for i in tqdm(range(len(df))):
    image = Image.open(df['img_path'][i])
    # image resize
    image = image.resize((960, 540))
    
    semantic_inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt")

    for key in semantic_inputs.keys():
        semantic_inputs[key] = semantic_inputs[key].to('cuda')

    semantic_outputs = model(**semantic_inputs)

    # pass through image_processor for postprocessing
    predicted_semantic_map = processor.post_process_semantic_segmentation(semantic_outputs, target_sizes=[image.size[::-1]])[0]

    del semantic_inputs, semantic_outputs
    predicted_semantic_map_np = np.array(predicted_semantic_map.cpu().numpy())

    # convert to rle
    for key, value in ade20k_to_12.items():
        key_mask = np.isin(predicted_semantic_map_np, value)
        if np.sum(key_mask) > 0:
            mask_rle = rle_encode(key_mask)
            result.append(mask_rle)
        else:
            result.append(-1)

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result
submit

In [None]:
submit.to_csv('segformer_pretrain_submit.csv', index=False)