In [2]:
import os
import sys
from glob import glob

import numpy as np
import pandas as pd

import cv2 as cv
from PIL import Image
import matplotlib.pyplot as plt

basic_packages = [os, sys, glob, np, pd, cv, Image, plt]
print("Packages Version Infos :")
for pname in basic_packages:
    try:
        print(f"\t{pname.__name__}\t:\t{pname.__version__}")
    except:
        print(f"\tmodule '{pname.__name__}' has no attribute '__version__'")

Packages Version Infos :
	module 'os' has no attribute '__version__'
	module 'sys' has no attribute '__version__'
	module 'glob' has no attribute '__version__'
	numpy	:	1.21.4
	pandas	:	1.3.4
	cv2	:	4.5.4
	PIL.Image	:	8.4.0
	module 'matplotlib.pyplot' has no attribute '__version__'


In [3]:
from os.path import split as spt
from os.path import join as jn

# init path
BASE = spt(spt(os.getcwd())[0])[0]
CONF = jn(BASE, "config")
DATA = jn(BASE, "assets/data/plastic-segmentation")

# comp. Train and Test

In [5]:
TRAIN = jn(BASE, "assets/data/plastic-segmentation/Train")
TEST = jn(BASE, "assets/data/plastic-segmentation/Test")

PCLS = []
paths = {"image_path" : [], "annot_path": []}

for fpath in [TRAIN, TEST]:
    kind = os.path.basename(fpath)
    BDIR = sorted(os.listdir(fpath))
    for bdir in BDIR:
        try:
            pcls = str(bdir.lower())
            assert pcls.startswith("p")
            img_path = jn(fpath, bdir)
            img_paths = sorted(glob(img_path + "/*.jpg"))
            (paths['image_path']).extend(img_paths)
            PCLS.append(bdir)
        except:
            for pdir in np.unique(PCLS):
                annot_path = jn(fpath, bdir, pdir)
                annot_path = sorted(glob(annot_path + "/*.json"))
                (paths['annot_path']).extend(annot_path)

In [6]:
kind = pd.Series([fpath.split('/')[-3] for fpath in paths['image_path']], name="kind")
label = pd.Series([fpath.split('/')[-2] for fpath in paths['image_path']], name="label")
metainfo_id = pd.Series([(fpath.split('/')[-1]).split('_')[1] for fpath in paths['image_path']], name="metainfo_id").astype(int)
feats = pd.Series([(fpath.split('/')[-1]).split('_')[2][:-4] for fpath in paths['image_path']], name="feature").astype(int)

baseDF = pd.concat([kind, label, metainfo_id, feats, pd.DataFrame(paths)], axis=1)

In [7]:
print(len(baseDF['metainfo_id'].unique()))

pd.DataFrame(baseDF['metainfo_id'].value_counts().reset_index()).head(20)

44


Unnamed: 0,index,metainfo_id
0,58,295
1,76,294
2,56,277
3,54,270
4,50,258
5,34,183
6,35,160
7,74,152
8,88,152
9,37,150


In [8]:
baseDF.columns

Index(['kind', 'label', 'metainfo_id', 'feature', 'image_path', 'annot_path'], dtype='object')

In [9]:
pd.DataFrame(baseDF.groupby(["kind", "label", "metainfo_id"])["feature"].value_counts())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,feature
kind,label,metainfo_id,feature,Unnamed: 4_level_1
Test,PE,74,1,1
Test,PE,74,3,1
Test,PE,74,5,1
Test,PE,74,7,1
Test,PE,74,9,1
...,...,...,...,...
Train,PS,42,7294,1
Train,PS,42,7482,1
Train,PS,42,7596,1
Train,PS,42,7605,1


In [10]:
cnt = 0
for mid in baseDF['metainfo_id'].unique():
    cnt += 1
    mid_cls = baseDF[baseDF['metainfo_id']==mid]['label'].unique()
    if len(mid_cls)>1:
        print(baseDF[baseDF['metainfo_id']==mid]['label'].unique())
print(cnt)

44


# Comp. Images and Annots

## way-1 : 위에서 생성한 변수 그대로 사용하는 방법

In [11]:
trnDF = baseDF[baseDF['kind']=='Train']
tstDF = baseDF[baseDF['kind']=='Test']

In [12]:
# example
print(trnDF[trnDF['label']=='PE']['image_path'][0])
print(trnDF[trnDF['label']=='PE']['annot_path'][0])

/Volumes/WORK/Google/Colab-Server-VM/Waste-Recycling-Image-Segmentation/assets/data/plastic-segmentation/Train/PE/PE_074_10.jpg
/Volumes/WORK/Google/Colab-Server-VM/Waste-Recycling-Image-Segmentation/assets/data/plastic-segmentation/Train/annotation/PE/PE_074_10.json


## way-2 : ./config 에 생성한 함수 사용

```bash
%load "{CONF}/annots_cv.py"
%load "{CONF}/LoadDataset.py"
```

In [15]:
# %load "/Volumes/WORK/Google/Colab-Server-VM/Waste-Recycling-Image-Segmentation/config/annots_cv.py"
import numpy as np

import cv2 as cv
from PIL import Image
import matplotlib.pyplot as plt

class annotCats:
    def __init__(self, image_path=None, label=None):
        self.colors = self.cvColor()
        self.label = label
        try:
            self.image = Image.open(image_path)
            self.image = cv.cvtColor(self.image, cv.COLOR_BGR2RGB)
        except:
            print("이미지 경로를 제대로 입력하세요.")
        
    def img2points(self, x, y, pcolor="RED", size=5, option=-1, check=True):
        if check:
            self.updated_img = self.image.copy()
        plt.grid(color=self.colors["WHITE"], linestyle="dashdot", linewidth=.5)
        cv.circle(self.updated_img, (x, y), size, self.colors[pcolor], option)
        plt.imshow(self.updated_img)
        if not check:
            return self.updated_img

    def img2rectangle(self, x1, y1, x2, y2, pcolor="RED", check=True, rect_lwidth=2):
        if check:
            self.updated_img = self.image.copy()
        plt.grid(color=self.colors["WHITE"], linestyle="dashdot", linewidth=.5)
        cv.rectangle(self.updated_img, (x1, y1), (x2, y2), self.colors[pcolor], linewidth=rect_lwidth)
        plt.imshow(self.updated_img)
        if not check:
            return self.updated_img 
        
    def cvColor(self):
        # R, G, B
        BLACK = (0, 0, 0)
        WHITE = (255, 255, 255)
        GRAY = (125, 125, 125)
        RED = (255, 0, 0)
        GREEN = (0, 255, 0)
        BLUE = (0, 0, 255)
        CYAN = (0, 255, 255)
        MAGENTA = (255, 0, 255)
        YELLOW = (255, 255, 0)
        PINK = (238, 130, 238)
        ORANGE = (255, 165, 0)
        MINT = (60, 179, 113)
        LAVENDER = (106, 90, 205)
        IVORY = (240, 240, 240)
        SALMON = (240, 150, 120)

        colors = {"RED":RED, "GREEN":GREEN, "BLUE":BLUE, \
                "MAGENTA":MAGENTA, "CYAN":CYAN, "YELLOW":YELLOW, \
                "WHITE":WHITE, "GRAY":GRAY, "BLACK":BLACK, \
                "PINK":PINK, "ORANGE":ORANGE, "MINT":MINT, \
                "LAVENDER":LAVENDER, "IVORY":IVORY, "SALMON":SALMON}
        return colors
    
# class annot

In [16]:
# %load "/Volumes/WORK/Google/Colab-Server-VM/Waste-Recycling-Image-Segmentation/config/LoadDataset.py"
# assets/LoadDataset.py
import os
import cv2 as cv
import json
import torch
import pandas as pd

class CustomPath:
    def __init__(self, path=None, ddir=None, data_opt=None, fdir=None, image_ext="jpg", annotation_ext="json", annot_path=True):
        try:
            self.path = os.getcwd() if path is None else path
        except:
            print("본인이 설치한 폴더 구조에 맞게 파일 경로를 재설정해야 합니다.")
        self.bpath = self.basepath(ddir=ddir, data_opt=data_opt, fdir=fdir)
        self.images = self.imagepath(extension=image_ext)
        self.annots = self.annotpath(extension=annotation_ext, annot_path=annot_path)
    
    def basepath(self, ddir=None, data_opt="Train", fdir=None):
        self.data_opt = data_opt.lower()    # train일 때 model.train()으로 받기 위함
        if ((ddir==None)&(data_opt==None)&(fdir==None)):
            return self.path
        else:
            ddir = [p for p in ddir.split('/') if p != ""]
            data_opt = [p for p in data_opt.split('/') if p != ""]
            fdir = [p for p in fdir.split('/') if p != ""]
            
            add_dirs = (([].extend(ddir)).extend(data_opt)).extend(fdir)
            add_dirs = list(map(lambda x : "".join(x) if x != None else "" , add_dirs))
            add_dirs = "/".join(add_dirs)
            return os.path.join(self.path, add_dirs)
        
    def imagepath(self, extension="jpg"):
        from glob import glob
        return list(sorted(glob(self.bpath + "/*." + extension)))
    
    def annotpath(self, extension="json", annot_path=True):
        from glob import glob
        if annot_path:
            return list(sorted(self.bpath + "/*." + extension))
        else:
            annot_dir = input(f"{self.bpath} 하위 경로 중 annotation 할 파일이 위치한 경로를 작성하세요. : ")
            if annot_dir.startswith('/'):
                annot_dir = annot_dir[1:]
            if annot_dir.endswith('/'):
                annot_dir = annot_dir[:-2]
            annot_dir = os.path.join(self.bpath, annot_dir)
            return list(sorted(annot_dir + "/*." + extension))
class PreTrainDataset(CustomPath):
    def __init__(self, path=None, transforms=None, ddir=None, bbox_opt="minmax"):
        super(PreTrainDataset, self).__init__(self)
        self.path = path
        self.bbox_opt = bbox_opt.lower()
        self.transforms = transforms
        if ddir==None:
            self.train_path = list(sorted(os.path.join(self.path, 'assets')))
        self.img_path = [os.path.join(self.path, 'assets', imgs) for imgs in self.train_path if imgs[:-4]==".JPG"]
        self.wgt_path = [os.path.join(self.path, 'assets', wgts) for wgts in self.train_path if wgts[:-4]==".pth"]
        self.imgs, self.targets = self.cvtImageAnnots()
        
    def __getitem__(self, idx):
        image_id = torch.tensor([idx])
        # img = Image.open()
        img = self.imgs[idx]
        target = self.targets[idx]
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target
    
    def cvtImageAnnots(self):
        imgs = []
        for path in self.img_path:
            img = cv.imread(path)
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
            imgs.append(img)
        imgs = torch.stack(imgs)
        
        ann_path = str([os.path.join(self.path, 'assets', ann) for ann in self.train_path if ann[:-4]=="json"])
        with open(ann_path, 'r') as jsf:
            ann_data = json.loads(jsf.read())   # ['fname', 'backgroud', 'label', 'bbox_minmax', 'bbox_minWH', 'bbox_centerWH'] : 'background'는 나중에 활용하기.
        targets = {}
        targets['image_id'] = torch.stack([torch.tensor([idx]) for idx in range(len(ann_data['fname']))])
        targets['boxes'] = torch.as_tensor(ann_data[self.bboxtype()], dtype=torch.float32)
        targets['labels'] = torch.as_tensor(self.label2code(ann_data['label']), dtype=torch.int64)
        targets['area'] = (targets['boxes'][:, 3]) * (targets['boxes'][:, 2]) if self.bbox_opt[-2:]=='wh' \
            else (targets['boxes'][:, 3] - targets['boxes'][:, 1]) * (targets['boxes'][:, 2] - targets['boxes'][:, 0])
        # mask..., dtype=torch.uint8
        return imgs, targets
    
    def bboxtype(self):
        try:
            assert self.bbox_opt in ['minmax', 'minwh', 'centerwh']
            if self.bbox_opt=='minmax':
                bbox_opt = 'bbox_'+self.bbox_opt
            else:
                bbox_opt = 'bbox_'+self.bbox_opt[:-2]+self.bbox_opt[-2:].upper()
            return bbox_opt
        except AssertionError:
            print("Put right BBox option which is one of 'minmax', 'minwh', and 'centerwh'")
        
    def label2code(label_list):
        cvtDict = {1:'paper', 2:'carton', 3:'can', 4:'glass', 5:'pet', 6:'plastic', 7:'plastic bag'}
        revDict = {v:k for k,v in cvtDict.items()}
        codes = []
        for cont in label_list:
            if type(cont)==str:
                codes.append(revDict[cont])
            else:
                subs = []
                for sub in cont:
                    subs.append(revDict[sub])
                codes.append(subs)
        # 언제 뭐가 필요할지 모르니까, 우선은 이렇게만 작성
        return codes
    
def parse_one_annot(path_to_data_file, filename):
    data = pd.read_csv(path_to_data_file)
    print(filename)
    boxes_array = data[data["filename"]==filename][["xmin", "ymin", "xmax", "ymax"]].values
    return boxes_array

def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = list()
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
                fullPath = fullPath.replace(data_folder, '')
                allFiles.append(fullPath)
    return allFiles

In [13]:
# jn(BASE, NONE, "assets")
"/".join(list(map(lambda x : "".join(x) if x != None else "" ,[None, "/assets", "Train",None])))

'//assets/Train/'

In [14]:
[p for p in "/assets".split('/') if p != ""]

['assets']